program define dist version 4.0 preserve /* Save all the current stata stuff */ clear /* Clear everything so no conflicts */ global D_sm1 " A crucial idea in statistics is the distribution of a continuous population, that is, a graph of the values in the population and how likely each is to occur. " global D_sm2 " This lab shows: 1) Distributions come in all shapes and sizes, 2) They come in families such as the normal family, 3) Different populations (people's height and lifetimes, for example) will have distributions in different families (e.g. heights in the normal family, lifetimes in the exponential or Weibull families), 4) Each curve in a family is determined by the value(s) of parameter(s) (such as the mean and variance of a normal curve)." * global D_sm3 " If you choose curves and select a distribution, the lab will draw several curves from that family. If you choose real-data (simulated-data), and select a distribution, you will see the histogram of a real (simulated) data set having that distribution with a curve from the chosen family superimposed. If you choose data-set and select one of the data sets, you will see the histogram of that data set and the lab will select the best of all possible curves from any distribution family superimposed." global D_sm3 " If you choose curves and select a distribution, the lab will draw several curves from that family. If you choose simulated-data, and select a distribution, you will see the histogram of a simulated data set (with n=500) having that distribution with a curve from the chosen family superimposed." * global D_sm4 "NOTE: We suggest you read the help file while you run the lab. You can switch among the help window, graphics window, and dialog box by clicking anywhere in the window you want to see." wdctl static D_sm1 5 5 200 24 wdctl static D_sm2 5 35 200 64 wdctl static D_sm3 5 105 200 60 * wdctl static D_sm4 5 110 125 50 global D_sm8 "Option" * global D_sm9 "Curves Real-Data Simulated-Data Data-Set" global D_sm9 "Curves Simulated-Data" wdctl static D_sm8 220 145 60 10 global D_var2 "Curves" * wdctl ssimple D_var2 D_sm9 100 125 60 50 wdctl ssimple D_var2 D_sm9 220 155 60 30 global D_sm6 "Distribution Family" global D_sm7 "Normal t Chi-square F Beta Cauchy Exponential Gamma" global D_sm7 "$D_sm7 Laplace Logistic Lognormal Pareto" global D_sm7 "$D_sm7 Uniform Weibull" wdctl static D_sm6 220 5 70 10 global D_var1 "Normal" wdctl ssimple D_var1 D_sm7 220 18 50 125 * global D_sm11 "Data Set" * global D_sm12 "Data-1 Data-2 Data-3 Data-4 Data-5 Data-6 Data-7" * wdctl static D_sm11 240 115 50 10 global D_var3 "Data-1" * wdctl ssimple D_var3 D_sm12 240 125 50 60 wdctl button "Run" 5 165 25 14 D_b1 wdctl button "Close" 35 165 25 14 D_b2 wdctl button "Help" 65 165 25 14 D_b3 * distdr 0 global D_b1 "distdr" global D_b2 "exit 1234" global D_b3 "whelp dist" cap noi wdlg "How are Populations Distributed?" $D_dlgx $D_dlgy 300 200 restore end program define distdr version 4.0 *----------------------------------------------------------------------- if "$D_var2"=="Simulated-Data" { *----------------------------------------------------------------------- clear gph open gph pen 1 set obs 500 gen y=. gen y1=. gen x=. gen fx=. local ioptf=0 local nbins=20 if "$D_var1"=="Normal" { replace y1=100+15*invnorm(uniform()) replace y=max(y1,55) replace y=min(y1,145) replace x=55+90*(_n-1)/499 replace fx=exp(-((x-100)^2)/(2*225))/sqrt(2*_pi*225) local xmin=55 local xmax=145 global D_var5 "Normal (mean=100,variance=225)" global yl "0,.01,.02,.03" global xl "55,70,85,100,115,130,145"} if "$D_var1"=="Lognormal" { replace y1=exp(invnorm(uniform())) replace y=min(y1,7) replace x=7*(_n-1)/499 pdf_logn x fx 0 1 local xmin=0 local xmax=7 global D_var5 "Lognormal (a=0, b=1)" global yl "0,.1,.2,.3,.4,.5,.6,.7" global xl "0,1,2,3,4,5,6,7"} if "$D_var1"=="Cauchy" { replace y1=invnorm(uniform())/invnorm(uniform()) replace y=max(y1,-10) replace y=min(y1,10) replace x=-10+20*(_n-1)/499 replace fx=1/(_pi*(1+x^2)) local xmin=-10 local xmax=10 global D_var5 "Cauchy" global yl "0,.05,.10,.15,.20,.25,.30,.35" global xl "-10,-5,0,5,10"} if "$D_var1"=="Weibull" { local c=2 replace y1=(-log(1-uniform()))^(1/`c') replace y=min(y1,3) replace x=3*(_n-1)/499 pdf_weib x fx `c' local xmin=0 local xmax=3 global D_var5 "Weibull (c=2)" global yl "0,.25,.50,.75,1" global xl "0,1,2,3"} if "$D_var1"=="Laplace" { replace x=uniform() replace y1=-sign(x-.5)*log(uniform())/sqrt(2) replace y=min(y1,3) replace y=max(y1,-3) replace x=-3+6*(_n-1)/499 pdf_lap x fx 0 1 local xmin=-3 local xmax=3 global D_var5 "Laplace (mu=0, sig2=1)" global yl "0,.25,.50,.75" global xl "-3,-2,-1,0,1,2,3"} if "$D_var1"=="Logistic" { local a=0 local b=1 replace x=uniform() replace y1=`a'+`b'*log(x/(1-x)) replace y=min(y1,5) replace y=max(y1,-5) replace x=-5+10*(_n-1)/499 pdf_logs x fx 0 1 local xmin=-5 local xmax=5 global D_var5 "Logistic (a=`a', b=`b')" global yl "0,.1,.2,.3" global xl "-5,-4,-3,-2,-1,0,1,2,3,4,5"} if "$D_var1"=="Pareto" { local a=2 replace x=uniform() replace y1=(1-x)^(-1/`a') replace y=min(y1,5) replace y=max(y1,1) replace x=1+4*(_n-1)/500 pdf_par x fx `a' local xmin=1 local xmax=5 global D_var5 "Pareto (a=`a')" global yl "0,.5,1,1.5,2" global xl "1,2,3,4,5"} if "$D_var1"=="Gamma" { replace y1=-log(uniform())-log(uniform()) replace y=min(y1,6) replace x=6*(_n-1)/499 pdf_gam x fx 2 local xmin=0 local xmax=6 global D_var5 "Gamma (alpha=2)" global yl "0,.1,.2,.3,.4" global xl "0,2,4,6"} if "$D_var1"=="Beta" { replace x=-log(uniform())-log(uniform()) replace y1=0 replace y1=y1-log(uniform())-log(uniform())-log(uniform()) replace y1=y1-log(uniform())-log(uniform())-log(uniform()) replace y1=y1-log(uniform())-log(uniform())-log(uniform()) replace y1=y1/(y1+x) replace y=y1 replace x=(_n-1)/499 pdf_beta x fx 9 2 local xmin=0 local xmax=1 global D_var5 "Beta (p=9, q=2)" global yl "0,1,2,3,4" global xl "0,.2,.4,.6,.8,1"} if "$D_var1"=="t" { local df=5 replace y1=uniform() replace y1=sign(y1-.5)*invt(`df',1-2*min(y1,1-y1)) replace y=max(y1,-4) replace y=min(y1,4) replace x=-4+8*(_n-1)/499 pdf_t x fx `df' local xmin=-4 local xmax=4 global D_var5 "t with `df' degrees of freedom" global yl "0,.1,.2,.3,.4" global xl "-4,-3,-2,-1,0,1,2,3,4"} if "$D_var1"=="Chi-square" { local df=5 replace y1=invnorm(uniform())^2 local i=2 while `i' <= `df' { replace y1=y1+invnorm(uniform())^2 local i=`i'+1} replace y=min(y1,20) replace x=20*(_n-1)/499 pdf_chi2 x fx `df' local xmin=0 local xmax=20 global D_var5 "Chi-square with `df' degrees of freedom" global yl "0,.025,.05,.075,.1,.125,.15,.175" global xl "0,5,10,15,20"} if "$D_var1"=="F" { local df1=10 local df2=15 replace y1=invfprob(`df1',`df2',uniform()) replace y=min(y1,6) replace x=6*(_n-1)/499 pdf_f x fx 10 15 local xmin=0 local xmax=6 global D_var5 "F with `df1' and `df2' degrees of freedom" global yl "0,.2,.4,.6,.8" global xl "0,2,4,6"} if "$D_var1"=="Exponential" { replace y1=-log(uniform()) replace y=min(y1,6) replace x=0+6*(_n-1)/499 replace fx=exp(-x) local xmin=0 local xmax=6 global D_var5 "Exponential(mean=1)" global yl "0,.25,.50,.75,1" global xl "0,1,2,3,4,5,6"} if "$D_var1"=="Uniform" { replace y1=uniform() replace y=y1 graph y y, s(i) xlab(0,.25,.50,.75,1) ylab(0,.25,.50,.75,1,1.25,1.50) l1(" ") l2(" ") b1(" ") b2(" ") bbox(3000,0,23000,32000,850,400,0) gphconv replace x=(_n-1)/(_N-1) replace fx=1 gph pen 3 lines x fx gph pen 2 histgm y 0 1 20 } if "$D_var1"!="Uniform" { graph fx x, c(l) s(i) xlab($xl) ylab($yl) l1(" ") l2(" ") b1(" ") b2(" ") bbox(3000,0,23000,32000,850,400,0) pen(3) gphconv gph pen 2 histgm y `xmin' `xmax' `nbins'} gph text 1000 16000 0 0 Simulated Data from $D_var5 gph text 2000 16000 0 0 (Any data outside x range are in the end boxes) showmom y1 y exit} *----------------------------------------------------------------------- if "$D_var2"=="Curves" { *----------------------------------------------------------------------- gph open clear *---------------------------- if "$D_var1"=="Normal" { *---------------------------- set obs 501 gen y=. gen x=5*((_n-251)/250) normplot 0 1 x y 1 0 0.0 .42 normplot 1 1 x y 2 0 1.5 .42 normplot -1 1 x y 3 0 -1.5 .42 normplot 0 4 x y 4 0 -4.0 .05 normplot 0 .25 x y 6 0 1.0 .60 gph pen 2 gph text 500 16000 0 0 Five Members of the Normal Family exit} *---------------------------- if "$D_var1"=="Beta" { *---------------------------- set obs 200 gen y=. gen x=(_n-.5)/200. betaplot 3 3 x y 1 0 .5 2.1 betaplot .5 .5 x y 2 -1 0. 6. betaplot 9 2 x y 3 1 .9 4. betaplot 2 9 x y 6 -1 .1 4. gph pen 2 gph text 500 16000 0 0 Four Members of the Beta Family exit} *--------------------------------- if "$D_var1"=="Exponential" { *--------------------------------- set obs 500 gen y=. gen x=15*(_n-.5)/500. expplot 2 x y 1 0 .4 .4 expplot 3 x y 2 0 .4 .3 expplot 4 x y 3 0 .4 .23 expplot 5 x y 6 0 .4 .17 gph pen 2 gph text 500 16000 0 0 Four Members of the Exponential Family exit} *----------------------- if "$D_var1"=="F" { *----------------------- set obs 201 gen y=. gen x=3*(_n-1)/200. pdf_f x y 10 10 graph y x, xlab(0,1,2,3) ylab(0,1,2,3) c(l) s(.) pen(3) gphconv local df=10 * drtext 2 2 -1 df=`df' local df=20 while `df' <= 200 { pdf_f x y `df' `df' lines x y local df=`df'+10 } gph pen 2 gph text 1000 16000 0 0 F curves for degrees of freedom (10,10),...,(200,200) exit} *--------------------------------- if "$D_var1"=="Chi-square" { *--------------------------------- set obs 201 gen y=. gen x=60*(_n-1)/200. pdf_chi2 x y 10 graph y x, xlab(0,50,100,150,200,250) ylab(0,.025,.050,.075,.1) c(l) s(.) pen(3) gphconv local df=20 while `df' <= 200 { if `df' > 30 {replace x=250*(_n-1)/200} pdf_chi2 x y `df' lines x y local df=`df'+10 } gph pen 2 gph text 1000 16000 0 0 Chi-square curves for degrees of freedom 10,20,...,200 exit} *----------------------- if "$D_var1"=="t" { *----------------------- set obs 201 gen x=3*(_n-101)/100. gen y=exp(-(x^2)/2.)/sqrt(2.*_pi) graph y x, xlab(-3,-2,-1,0,1,2,3) ylab(0,.1,.2,.3,.4) c(l) s(.) pen(3) gphconv gph pen 1 local df=1 while `df' <= 30 { pdf_t x y `df' lines x y local df=`df'+1 } gph pen 2 gph text 1000 16000 0 0 Z curve and t curves for degrees of freedom 1,2,...,30 exit} *---------------------------- if "$D_var1"=="Cauchy" { *---------------------------- set obs 201 gen x=5*(_n-101)/100. gen y=exp(-(x^2)/2.)/sqrt(2.*_pi) graph y x, xlab(-5,-4,-3,-2,-1,0,1,2,3,4,5) ylab(0,.1,.2,.3,.4) c(l) s(.) pen(3) gphconv drtext 1 .35 -1 N(0,1) gph pen 1 replace y=1/(_pi*(1+x^2)) lines x y drtext 0 .2 0 Cauchy gph pen 2 gph text 1000 16000 0 0 Comparing Standard Normal and Standard Cauchy exit} *----------------------------- if "$D_var1"=="Weibull" { *----------------------------- set obs 200 gen x=3*(_n-.5)/200. gen y=1.6*uniform() graph y x, xlab(0,1,2,3) ylab(0,.4,.8,1.2,1.6) s(i) pen(3) gphconv weibplot 1. x y 1 -1 .2 .80 weibplot 2. x y 2 -1 .85 .80 weibplot 3. x y 3 -1 .8 1.25 weibplot 4. x y 6 -1 1.1 1.5 gph pen 2 gph text 1000 16000 0 0 Four Members of the Weibull Family exit} *--------------------------- if "$D_var1"=="Gamma" { *--------------------------- set obs 200 gen x=10*(_n-.5)/200. gen y=uniform() graph y x, xlab(0,2,4,6,8,10) ylab(0,.2,.4,.6,.8,1) s(i) pen(3) gphconv gammplot 1. x y 1 -1 1 .8 gammplot 2. x y 2 -1 1.5 .4 gammplot 3. x y 3 -1 2.5 .3 gammplot 4. x y 6 -1 6 .15 gph pen 2 gph text 1000 16000 0 0 Four Members of the Gamma Family exit} *------------------------------- if "$D_var1"=="Lognormal" { *------------------------------- set obs 200 gen x=6*(_n-.5)/200. gen y=uniform() graph y x, xlab(0,2,4,6) ylab(0,.2,.4,.6,.8,1) s(i) pen(3) gphconv lognplot 0 1 x y 1 -1 .5 .6 lognplot 1 1 x y 2 -1 2 .2 lognplot 0 .25 x y 3 -1 1.2 .8 lognplot 1 .25 x y 4 -1 3 .3 lognplot 0 4 x y 6 -1 .2 .95 gph pen 2 gph text 1000 16000 0 0 Five Members of the Lognormal Family exit} *----------------------------- if "$D_var1"=="Laplace" { *----------------------------- set obs 401 gen x=6*(_n-201)/200. gen y=1.5*uniform() graph y x, xlab(-6,-4,-2,0,2,4,6) ylab(0,.25,.5,.75,1,1.25,1.5) s(i) pen(3) gphconv lapplot 0 1.00 x y 1 -1 -.3 .8 lapplot -2 1.00 x y 2 1 -2 .8 lapplot 2 1.00 x y 3 -1 2 .8 lapplot 0 4.00 x y 4 -1 -.3 .4 lapplot 0 .25 x y 6 -1 0 1.25 gph pen 2 gph text 1000 16000 0 0 Five Members of the Laplace Family exit} *------------------------------- if "$D_var1"=="Logistic" { *------------------------------- set obs 401 gen x=10*(_n-201)/200. gen y=.5*uniform() graph y x, xlab(-10,-5,0,5,10) ylab(0,.1,.2,.3,.4,.5) s(i) pen(3) gphconv logsplot -2 1.0 x y 1 1 -3.0 .2 logsplot 0 1.0 x y 2 0 0 .25 logsplot 2 1.0 x y 3 -1 3 .2 logsplot 0 2.0 x y 4 -1 3 .06 logsplot 0 .5 x y 6 -1 1 .4 gph pen 2 gph text 1000 16000 0 0 Five Members of the Logistic Family exit} *----------------------------- if "$D_var1"=="Uniform" { *----------------------------- set obs 201 gen x=4*(uniform()-.5) gen y=2*uniform() graph y x, xlab(-2,-1,0,1,2) ylab(0,.5,1,1.5,2,2.5) s(i) pen(3) gphconv unifplot 0 1 x y 1 -1 1 1 unifplot -.75 .75 x y 2 1 -1 .7 unifplot -.25 .25 x y 3 -1 .3 2 unifplot -2 2 x y 4 1 2 .3 gph pen 2 gph text 1000 16000 0 0 Four Members of the Uniform Family exit} *---------------------------- if "$D_var1"=="Pareto" { *---------------------------- set obs 200 gen x=1+(_n-.5)/200. gen y=4*uniform() graph y x, xlab(1,1.25,1.50,1.75,2) ylab(0,1,2,3,4) s(i) pen(3) gphconv parplot 1. x y 1 -1 1 1 parplot 2. x y 2 -1 1 2 parplot 3. x y 3 -1 1 3 parplot 4. x y 6 -1 1 4 gph pen 2 gph text 1000 16000 0 0 Four Members of the Pareto Family exit} } *----------------------------------------------------------------------- else { *----------------------------------------------------------------------- sstopbox stop "This lab still needs work" exit} gph close end program define showmom version 4.0 local y1="`1'" local y="`2'" sum `y1' local mom1=_result(3) local dmom1: display %8.4f `mom1' replace `y1'=`y1'-`mom1' replace `y'=`y1'*`y1' sum `y' local mom2=_result(3) local dmom2: display %8.4f `mom2' replace `y'=`y'*`y1' sum `y' local mom3=_result(3) local dmom3: display %8.4f `mom3' replace `y'=`y'*`y1' sum `y' local mom4=_result(3) local dmom4: display %8.4f `mom4' local alpha1=`mom3'/(`mom2'^(1.5)) local dalpha1: display %8.4f `alpha1' local alpha2=`mom4'/(`mom2'^2) local dalpha2: display %8.4f `alpha2' gph text 3000 16000 0 0 Moments: `dmom1',`dmom2',`dmom3',`dmom4',`dalpha1',`dalpha2' end program define normplot version 4.0 local mu = `1' local var = `2' local x = "`3'" local y = "`4'" local penno = `5' local centopt= `6' local xl = `7' local yl = `8' replace `y' = exp(-((`x'-`mu')^2)/(2*`var'))/sqrt(2*_pi*`var') graph `y' `x', c(l) s(i) xlabel(-5,-4,-3,-2,-1,0,1,2,3,4,5) ylabel yscale(0,.8) xscale(-5,5) pen(`penno') gphconv drtext `xl' `yl' `centopt' N(`mu',`var') end program define expplot version 4.0 local mu = `1' local x = "`2'" local y = "`3'" local penno = `4' local centopt= `5' local xl = `6' local yl = `7' replace `y'=exp(-`x'/`mu')/`mu' graph `y' `x', c(l) s(i) xlabel(0,5,10,15) ylabel(0,.1,.2,.3,.4,.5) yscale(0,.5) xscale(0,15) pen(`penno') gphconv drtext `xl' `yl' `centopt' mu=`mu' end program define betaplot version 4.0 local p = `1' local q = `2' local x = "`3'" local y = "`4'" local penno = `5' local centopt= `6' local xl = `7' local yl = `8' pdf_beta `x' `y' `p' `q' graph `y' `x', c(l) s(i) xlabel(0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1) ylabel yscale(0,7) xscale(0,1) pen(`penno') gphconv drtext `xl' `yl' `centopt' Beta(p=`p',q=`q') end program define weibplot version 4.0 local c = `1' local x = "`2'" local y = "`3'" local penno = `4' local centopt= `5' local xl = `6' local yl = `7' pdf_weib `x' `y' `c' gph pen `penno' lines `x' `y' drtext `xl' `yl' `centopt' c=`c' end program define gammplot version 4.0 local alpha = `1' local x = "`2'" local y = "`3'" local penno = `4' local centopt= `5' local xl = `6' local yl = `7' pdf_gam `x' `y' `alpha' gph pen `penno' lines `x' `y' drtext `xl' `yl' `centopt' alpha=`alpha' end program define lognplot version 4.0 local a = `1' local b2= `2' local x = "`3'" local y = "`4'" local penno = `5' local centopt= `6' local xl = `7' local yl = `8' pdf_logn `x' `y' `a' `b2' gph pen `penno' lines `x' `y' drtext `xl' `yl' `centopt' a=`a',b=`b2' end program define lapplot version 4.0 local mu = `1' local sig2 = `2' local x = "`3'" local y = "`4'" local penno = `5' local centopt= `6' local xl = `7' local yl = `8' pdf_lap `x' `y' `mu' `sig2' gph pen `penno' lines `x' `y' drtext `xl' `yl' `centopt' mu=`mu',sig2=`sig2' end program define logsplot version 4.0 local alpha = `1' local beta = `2' local x = "`3'" local y = "`4'" local penno = `5' local centopt= `6' local xl = `7' local yl = `8' pdf_logs `x' `y' `alpha' `beta' gph pen `penno' lines `x' `y' drtext `xl' `yl' `centopt' a=`alpha',b=`beta' end program define unifplot version 4.0 local alpha = `1' local beta = `2' local x = "`3'" local y = "`4'" local penno = `5' local centopt= `6' local xl = `7' local yl = `8' replace `x'=`alpha'+(`beta'-`alpha')*(_n-1)/(_N-1) local yl2=1/(`beta'-`alpha') replace `y'=`yl2' gph pen `penno' lines `x' `y' drline `alpha' `yl2' `alpha' 0 drline `beta' `yl2' `beta' 0 drtext `xl' `yl' `centopt' U(`alpha',`beta') end program define parplot version 4.0 local a = `1' local x = "`2'" local y = "`3'" local penno = `4' local centopt= `5' local xl = `6' local yl = `7' pdf_par `x' `y' `a' gph pen `penno' lines `x' `y' drtext `xl' `yl' `centopt' a=`a' end exit