program define dist version 4.0 preserve /* Save all the current stata stuff */ clear /* Clear everything so no conflicts */ global D_sm1 " A crucial idea in statistics is the distribution of a population, that is, a graph of the values in the population and how likely each is to occur. " global D_sm2 " This lab shows: 1) Distributions come in all shapes and sizes, 2) They come in families such as the normal family, 3) Different populations (people's height and lifetimes, for example) will have distributions in different families (e.g. heights in the normal family, lifetimes in the exponential or Weibull families), 4) Each curve in a family is determined by the value(s) of parameter(s) (such as the mean and variance of a normal curve)." global D_sm3 " If you choose curves and select a distribution, the lab will draw several curves from that family. If you choose real-data (simulated-data), and select a distribution, you will see the histogram of a real (simulated) data set having that distribution with a curve from the chosen family superimposed. If you choose data-set and select one of the data sets, you will see the histogram of that data set and the lab will select the best of all possible curves from any distribution family superimposed." global D_sm4 " 2) If you choose real-data (simulated-data), and select a distribution, you will see the histogram of a real (simulated) data set from having that distribution with a curve from the chosen family superimposed." * global D_sm4 "NOTE: We suggest you read the help file while you run the lab. You can switch among the help window, graphics window, and dialog box by clicking anywhere in the window you want to see." wdctl static D_sm1 5 5 290 18 wdctl static D_sm2 5 22 290 40 wdctl static D_sm3 5 65 290 48 * wdctl static D_sm4 5 110 125 50 global D_sm8 "Option" global D_sm9 "Curves Real-Data Simulated-Data Data-Set" wdctl static D_sm8 90 115 60 10 global D_var2 "Curves" wdctl ssimple D_var2 D_sm9 90 125 60 50 global D_sm6 "Distribution Family" global D_sm7 "Normal t Chi-square F Beta Cauchy Exponential Gamma" global D_sm7 "$D_sm7 Laplace Logistic Lognormal Pareto" global D_sm7 "$D_sm7 Uniform Weibull" wdctl static D_sm6 160 115 70 10 global D_var1 "Normal" wdctl ssimple D_var1 D_sm7 160 125 70 60 global D_sm11 "Data Set" global D_sm12 "Data-1 Data-2 Data-3 Data-4 Data-5 Data-6 Data-7" wdctl static D_sm11 240 115 50 10 global D_var3 "Data-1" wdctl ssimple D_var3 D_sm12 240 125 50 60 wdctl button "Run" 5 165 25 14 D_b1 wdctl button "Close" 35 165 25 14 D_b2 wdctl button "Help" 65 165 25 14 D_b3 * distdr 0 global D_b1 "distdr" global D_b2 "exit 1234" global D_b3 "whelp dist" cap noi wdlg "How are Populations Distributed?" $D_dlgx $D_dlgy 300 200 restore end program define distdr version 4.0 *----------------------------------------------------------------------- if "$D_var2"=="Real-Data" { *------------------------------------------------------------------------ clear gph open gph pen 1 local ioptf=0 local nbins=20 if "$D_var1"=="Normal" { use norm local ioptf=1 replace y=max(y,55) replace y=min(y,145) gen x=55+90*(_n-1)/(_N-1) gen fx=exp(-((x-100)^2)/(2*225))/sqrt(2*_pi*225) local xmin=55 local xmax=145 global D_var5 "Sample of 500 IQs with N(100,225) Curve" global yl "0,.01,.02,.03" global xl "55,70,85,100,115,130,145"} if `ioptf'==0 { sstopbox stop "The Real-Data Examples Are Not Finished Yet, Sorry" exit} graph fx x, c(l) s(i) xlab($xl) ylab($yl) l1(" ") l2(" ") b1(" ") b2(" ") bbox(3000,0,23000,32000,850,400,0) pen(3) gphconv gph pen 2 histgm y `xmin' `xmax' `nbins' gph text 1000 16000 0 0 $D_var5 exit} *----------------------------------------------------------------------- if "$D_var2"=="Simulated-Data" { *----------------------------------------------------------------------- clear gph open gph pen 1 set obs 500 gen y=. gen y1=. gen x=. gen fx=. local ioptf=0 local nbins=20 if "$D_var1"=="Normal" { local ioptf=1 replace y1=100+15*invnorm(uniform()) replace y=max(y1,55) replace y=min(y1,145) replace x=55+90*(_n-1)/499 replace fx=exp(-((x-100)^2)/(2*225))/sqrt(2*_pi*225) local xmin=55 local xmax=145 global D_var5 "Normal(mean=100,variance=225)" global yl "0,.01,.02,.03" global xl "55,70,85,100,115,130,145"} if "$D_var1"=="Exponential" { local ioptf=1 replace y1=-log(uniform()) replace y=min(y1,6) replace x=0+6*(_n-1)/499 replace fx=exp(-x) local xmin=0 local xmax=6 global D_var5 "Exponential(mean=1)" global yl "0,.25,.50,.75,1" global xl "0,1,2,3,4,5,6"} if "$D_var1"=="Uniform" { replace y=uniform() graph y y, s(i) xlab(0,.25,.50,.75,1) ylab(0,.25,.50,.75,1,1.25,1.50) l1(" ") l2(" ") b1(" ") b2(" ") bbox(3000,0,23000,32000,850,400,0) gphconv replace x=(_n-1)/(_N-1) replace fx=1 gph pen 3 lines x fx gph pen 2 histgm y 0 1 20 gph text 1000 16000 0 0 Simulated Data from Uniform(0,1) exit} if `ioptf'==0 { sstopbox stop "The Simulated-Data Examples Are Not Finished Yet, Sorry" exit} graph fx x, c(l) s(i) xlab($xl) ylab($yl) l1(" ") l2(" ") b1(" ") b2(" ") bbox(3000,0,23000,32000,850,400,0) pen(3) gphconv gph pen 2 histgm y `xmin' `xmax' `nbins' gph text 1000 16000 0 0 Simulated Data from $D_var5 exit} *----------------------------------------------------------------------- if "$D_var2"=="Data-Set" { *----------------------------------------------------------------------- sstopbox stop "The Data-Set Examples Are Not Finished Yet, Sorry" exit} *----------------------------------------------------------------------- if "$D_var2"=="Curves" { *----------------------------------------------------------------------- gph open *---------------------------- if "$D_var1"=="Normal" { *---------------------------- clear set obs 501 gen y=. gen x=5*((_n-251)/250) normplot 0 1 x y 1 0 0.0 .42 normplot 1 1 x y 2 0 1.5 .42 normplot -1 1 x y 3 0 -1.5 .42 normplot 0 4 x y 4 0 -4.0 .05 normplot 0 .25 x y 6 0 1.0 .60 gph pen 2 gph text 500 16000 0 0 Five Members of the Normal Family exit} *---------------------------- if "$D_var1"=="Beta" { *---------------------------- clear set obs 200 gen y=. gen x=(_n-.5)/200. betaplot 3 3 x y 1 0 .5 2.1 betaplot .5 .5 x y 2 -1 0. 6. betaplot 9 2 x y 3 1 .9 4. betaplot 2 9 x y 6 -1 .1 4. gph pen 1 gph text 500 16000 0 0 Four Members of the Beta Family exit} *--------------------------------- if "$D_var1"=="Exponential" { *--------------------------------- clear set obs 500 gen y=. gen x=15*(_n-.5)/500. expplot 2 x y 1 0 .4 .4 expplot 3 x y 2 0 .4 .3 expplot 4 x y 3 0 .4 .23 expplot 5 x y 6 0 .4 .17 gph pen 1 gph text 500 16000 0 0 Four Members of the Exponential Family exit} *----------------------- if "$D_var1"=="F" { *----------------------- clear set obs 201 gen y=. gen x=3*(_n-1)/200. pdf_f x y 10 10 graph y x, xlab(0,1,2,3) ylab(0,1,2,3) c(l) s(.) pen(3) gphconv local df=10 * drtext 2 2 -1 df=`df' local df=20 while `df' <= 200 { pdf_f x y `df' `df' lines x y local df=`df'+10 } gph text 1000 16000 0 0 F curves for degrees of freedom (10,10),...,(200,200) exit} *--------------------------------- if "$D_var1"=="Chi-square" { *--------------------------------- clear set obs 201 gen y=. gen x=60*(_n-1)/200. pdf_chi2 x y 10 graph y x, xlab(0,50,100,150,200,250) ylab(0,.025,.050,.075,.1) c(l) s(.) pen(3) gphconv local df=20 while `df' <= 200 { if `df' > 30 {replace x=250*(_n-1)/200} pdf_chi2 x y `df' lines x y local df=`df'+10 } gph text 1000 16000 0 0 Chi-square curves for degrees of freedom 10,20,...,200 exit} *----------------------- if "$D_var1"=="t" { *----------------------- clear set obs 201 gen x=3*(_n-101)/100. gen y=exp(-(x^2)/2.)/sqrt(2.*_pi) graph y x, xlab(-3,-2,-1,0,1,2,3) ylab(0,.1,.2,.3,.4) c(l) s(.) pen(3) gphconv gph pen 1 local df=1 while `df' <= 30 { pdf_t x y `df' lines x y local df=`df'+1 } gph text 1000 16000 0 0 Z curve and t curves for degrees of freedom 1,2,...,30 exit} *---------------------------- if "$D_var1"=="Cauchy" { *---------------------------- clear set obs 201 gen x=5*(_n-101)/100. gen y=exp(-(x^2)/2.)/sqrt(2.*_pi) graph y x, xlab(-5,-4,-3,-2,-1,0,1,2,3,4,5) ylab(0,.1,.2,.3,.4) c(l) s(.) pen(3) gphconv drtext 1 .35 -1 N(0,1) gph pen 1 replace y=1/(_pi*(1+x^2)) lines x y drtext 0 .2 0 Cauchy gph text 1000 16000 0 0 Comparing Standard Normal and Standard Cauchy exit} *----------------------------- if "$D_var1"=="Weibull" { *----------------------------- clear set obs 200 gen x=3*(_n-.5)/200. gen y=1.6*uniform() graph y x, xlab(0,1,2,3) ylab(0,.4,.8,1.2,1.6) s(i) pen(3) gphconv weibplot 1. x y 1 -1 .2 .80 weibplot 2. x y 2 -1 .85 .80 weibplot 3. x y 3 -1 .8 1.25 weibplot 4. x y 6 -1 1.1 1.5 gph pen 3 gph text 1000 16000 0 0 Four Members of the Weibull Family exit} *--------------------------- if "$D_var1"=="Gamma" { *--------------------------- clear set obs 200 gen x=10*(_n-.5)/200. gen y=uniform() graph y x, xlab(0,2,4,6,8,10) ylab(0,.2,.4,.6,.8,1) s(i) pen(3) gphconv gammplot 1. x y 1 -1 1 .8 gammplot 2. x y 2 -1 1.5 .4 gammplot 3. x y 3 -1 2.5 .3 gammplot 4. x y 6 -1 6 .15 gph pen 3 gph text 1000 16000 0 0 Four Members of the Gamma Family exit} *------------------------------- if "$D_var1"=="Lognormal" { *------------------------------- clear set obs 200 gen x=6*(_n-.5)/200. gen y=uniform() graph y x, xlab(0,2,4,6) ylab(0,.2,.4,.6,.8,1) s(i) pen(3) gphconv lognplot 0 1 x y 1 -1 .5 .6 lognplot 1 1 x y 2 -1 2 .2 lognplot 0 .25 x y 3 -1 1.2 .8 lognplot 1 .25 x y 4 -1 3 .3 lognplot 0 4 x y 6 -1 .2 .95 gph pen 3 gph text 1000 16000 0 0 Five Members of the Lognormal Family exit} *----------------------------- if "$D_var1"=="Laplace" { *----------------------------- clear set obs 401 gen x=6*(_n-201)/200. gen y=1.5*uniform() graph y x, xlab(-6,-4,-2,0,2,4,6) ylab(0,.25,.5,.75,1,1.25,1.5) s(i) pen(3) gphconv lapplot 0 1.00 x y 1 -1 -.3 .8 lapplot -2 1.00 x y 2 1 -2 .8 lapplot 2 1.00 x y 3 -1 2 .8 lapplot 0 4.00 x y 4 -1 -.3 .4 lapplot 0 .25 x y 6 -1 0 1.25 gph pen 3 gph text 1000 16000 0 0 Five Members of the Laplace Family exit} *------------------------------- if "$D_var1"=="Logistic" { *------------------------------- clear set obs 401 gen x=10*(_n-201)/200. gen y=.5*uniform() graph y x, xlab(-10,-5,0,5,10) ylab(0,.1,.2,.3,.4,.5) s(i) pen(3) gphconv logsplot -2 1.0 x y 1 1 -3.0 .2 logsplot 0 1.0 x y 2 0 0 .25 logsplot 2 1.0 x y 3 -1 3 .2 logsplot 0 2.0 x y 4 -1 3 .06 logsplot 0 .5 x y 6 -1 1 .4 gph pen 3 gph text 1000 16000 0 0 Five Members of the Logistic Family exit} *----------------------------- if "$D_var1"=="Uniform" { *----------------------------- clear set obs 201 gen x=4*(uniform()-.5) gen y=2*uniform() graph y x, xlab(-2,-1,0,1,2) ylab(0,.5,1,1.5,2,2.5) s(i) pen(3) gphconv unifplot 0 1 x y 1 -1 1 1 unifplot -.75 .75 x y 2 1 -1 .7 unifplot -.25 .25 x y 3 -1 .3 2 unifplot -2 2 x y 4 1 2 .3 gph pen 3 gph text 1000 16000 0 0 Four Members of the Uniform Family exit} *---------------------------- if "$D_var1"=="Pareto" { *---------------------------- clear set obs 200 gen x=1+(_n-.5)/200. gen y=4*uniform() graph y x, xlab(1,1.25,1.50,1.75,2) ylab(0,1,2,3,4) s(i) pen(3) gphconv parplot 1. x y 1 -1 1 1 parplot 2. x y 2 -1 1 2 parplot 3. x y 3 -1 1 3 parplot 4. x y 6 -1 1 4 gph pen 3 gph text 1000 16000 0 0 Four Members of the Pareto Family exit} } *----------------------------------------------------------------------- else { *----------------------------------------------------------------------- sstopbox stop "This lab still needs work" exit} gph close end program define normplot version 4.0 local mu = `1' local var = `2' local x = "`3'" local y = "`4'" local penno = `5' local centopt= `6' local xl = `7' local yl = `8' replace `y' = exp(-((`x'-`mu')^2)/(2*`var'))/sqrt(2*_pi*`var') graph `y' `x', c(l) s(i) xlabel(-5,-4,-3,-2,-1,0,1,2,3,4,5) ylabel yscale(0,.8) xscale(-5,5) pen(`penno') gphconv drtext `xl' `yl' `centopt' N(`mu',`var') end program define expplot version 4.0 local mu = `1' local x = "`2'" local y = "`3'" local penno = `4' local centopt= `5' local xl = `6' local yl = `7' replace `y'=exp(-`x'/`mu')/`mu' graph `y' `x', c(l) s(i) xlabel(0,5,10,15) ylabel(0,.1,.2,.3,.4,.5) yscale(0,.5) xscale(0,15) pen(`penno') gphconv drtext `xl' `yl' `centopt' mu=`mu' end program define betaplot version 4.0 local p = `1' local q = `2' local x = "`3'" local y = "`4'" local penno = `5' local centopt= `6' local xl = `7' local yl = `8' pdf_beta `x' `y' `p' `q' graph `y' `x', c(l) s(i) xlabel(0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1) ylabel yscale(0,7) xscale(0,1) pen(`penno') gphconv drtext `xl' `yl' `centopt' Beta(p=`p',q=`q') end program define weibplot version 4.0 local c = `1' local x = "`2'" local y = "`3'" local penno = `4' local centopt= `5' local xl = `6' local yl = `7' pdf_weib `x' `y' `c' gph pen `penno' lines `x' `y' drtext `xl' `yl' `centopt' c=`c' end program define gammplot version 4.0 local alpha = `1' local x = "`2'" local y = "`3'" local penno = `4' local centopt= `5' local xl = `6' local yl = `7' pdf_gam `x' `y' `alpha' gph pen `penno' lines `x' `y' drtext `xl' `yl' `centopt' alpha=`alpha' end program define lognplot version 4.0 local a = `1' local b2= `2' local x = "`3'" local y = "`4'" local penno = `5' local centopt= `6' local xl = `7' local yl = `8' pdf_logn `x' `y' `a' `b2' gph pen `penno' lines `x' `y' drtext `xl' `yl' `centopt' a=`a',b=`b2' end program define lapplot version 4.0 local mu = `1' local sig2 = `2' local x = "`3'" local y = "`4'" local penno = `5' local centopt= `6' local xl = `7' local yl = `8' pdf_lap `x' `y' `mu' `sig2' gph pen `penno' lines `x' `y' drtext `xl' `yl' `centopt' mu=`mu',sig2=`sig2' end program define logsplot version 4.0 local alpha = `1' local beta = `2' local x = "`3'" local y = "`4'" local penno = `5' local centopt= `6' local xl = `7' local yl = `8' pdf_logs `x' `y' `alpha' `beta' gph pen `penno' lines `x' `y' drtext `xl' `yl' `centopt' a=`alpha',b=`beta' end program define unifplot version 4.0 local alpha = `1' local beta = `2' local x = "`3'" local y = "`4'" local penno = `5' local centopt= `6' local xl = `7' local yl = `8' replace `x'=`alpha'+(`beta'-`alpha')*(_n-1)/(_N-1) local yl2=1/(`beta'-`alpha') replace `y'=`yl2' gph pen `penno' lines `x' `y' drline `alpha' `yl2' `alpha' 0 drline `beta' `yl2' `beta' 0 drtext `xl' `yl' `centopt' U(`alpha',`beta') end program define parplot version 4.0 local a = `1' local x = "`2'" local y = "`3'" local penno = `4' local centopt= `5' local xl = `6' local yl = `7' pdf_par `x' `y' `a' gph pen `penno' lines `x' `y' drtext `xl' `yl' `centopt' a=`a' end exit