program define clt version 4.0 preserve clear quietly{ set obs 500 gen x=. gen y=. gen xbar=.} global D_sm11 " You saw in the 'Random Sampling Lab' that sample means from a discrete uniform 'parent' distribution tend to fall near the middle of the range of the parent. Further, as n increases, the variability in the sample means decreases." wdctl static D_sm11 5 5 295 24 global D_sm12 " The 'Central Limit Theorem' says that this is true for any parent population no matter what the shape of its distribution curve. It also says that the population of all possible sample means (the 'child' population) is always bell shaped." wdctl static D_sm12 5 32 295 24 global D_sm13 " This lab lets you experiment with this by sampling from one of four parent populations. By choosing 'Parent-Curves' and clicking on Run you get graphs of the four parent distributions. When you choose a value of n and a parent curve, you can either 1) generate and plot sample means one at a time or 2) generate 500 sample means all at once and then plot their histogram with the theoretical normal curve superimposed." wdctl static D_sm13 5 60 295 48 global D_sm15 "Suggestions: 1) First look at the four parent curves, 2) For each parent, run the lab several times, each time increasing n and see what happens." wdctl static D_sm15 5 110 140 32 global D_sm1 "n" * global HJN_V1=5 * wdctl static D_sm1 60 130 10 10 * wdctl edit HJN_V1 75 130 16 10 global D_sm14 "5 10 15 20 25 30 50 100" wdctl static D_sm1 150 110 30 10 global D_var3 "10" wdctl ssimple D_var3 D_sm14 150 120 30 76 * global D_sm2 "# of bins" * global HJN_V2=20 * wdctl static D_sm2 100 130 30 10 * wdctl edit HJN_V2 135 130 16 10 global D_sm6 "Sample from" global D_sm7 "Normal(0,1) Uniform(0,1) Exponential(1) 0-1" wdctl static D_sm6 185 110 60 10 global D_var1 "Normal(0,1)" wdctl ssimple D_var1 D_sm7 185 120 55 48 global D_sm8 "Choice" global D_sm9 "Parent-Curves One-at-a-time 500-Samples" wdctl static D_sm8 245 110 45 10 global D_var2 "500-Samples" wdctl ssimple D_var2 D_sm9 245 120 55 40 wdctl button "Run" 5 160 30 14 D_b1 wdctl button "Close" 40 160 30 14 D_b2 wdctl button "Help" 75 160 30 14 D_b3 help global D_b1 "cltdr" global D_b2 "exit 1234" global D_b3 "whelp clt" gph open gph pen 1 cap noi wdlg "Central Limit Theorem" $D_dlgx $D_dlgy 320 240 gph close restore end program define cltdr version 4.0 local nints=20 * local nints=$HJN_V2 * if `nints' < 2 | `nints' > 50 { * sstopbox stop "number of bins must be between 2 and 50" * exit} * local n=$HJN_V1 * if `n' < 1 | `n' > 50 { * sstopbox stop "sample size must be between 1 and 50" * exit} local n=$D_var3 gph close gph open gph pen 1 if "$D_var2"=="500-Samples" { gph text 1000 16000 0 0 Generating 500 Samples of Size `n' From $D_var1 gph box 6000 10000 8000 20000 4 replace xbar=0 local i=1 while `i' <= `n' { local bx2=int(10000 + 10000*(`i'/`n')) gph box 6000 10000 8000 `bx2' 1 if "$D_var1"=="Uniform(0,1)" { replace x = uniform()} if "$D_var1"=="Exponential(1)" { replace x = -log(uniform())} if "$D_var1"=="Normal(0,1)" { replace x = invnorm(uniform())} if "$D_var1"=="0-1" { replace x = uniform() < .5} replace xbar = xbar + x local i = `i' +1} gph close gph open gph pen 1 replace xbar = xbar/`n' if "$D_var1"=="Uniform(0,1)" { local xmin=.2 local xmax=.8 global xl ".2,.4,.6,.8" global yl "0,2,4,6,8" local ymax=8 local mu=.5 local sig2=1/12.} if "$D_var1"=="Normal(0,1)" { local xmin=-1.5 local xmax=1.5 global xl "-1.5,-1,-.5,0,.5,1,1.5" global yl "0,.5,1,1.5,2,2.5" local ymax=2.5 local mu=0 local sig2=1.} if "$D_var1"=="Exponential(1)" { local xmin=0 local xmax=2.5 global xl "0,.5,1,1.5,2,2.5" global yl "0,.5,1,1.5,2,2.5" local ymax=2.5 local mu=1. local sig2=1.} if "$D_var1"=="0-1" { local xmin=0 local xmax=1 global xl "0,.25,.50,.75,1" global yl "0,1,2,3,4,5,6" local ymax=6 local mu=.5 local sig2=1/4.} local sig2n=`sig2'/`n' local xmin1=`mu'-3*sqrt(`sig2n') local xmax1=`mu'+3*sqrt(`sig2n') local nbins=20 if "$D_var1"=="0-1" { local xmin1=-.5/`n' local xmax1=(`n'+.5)/`n' local nbins=`n'+1} replace x=`xmin' + (`xmax'-`xmin')*(_n-1)/(_N-1) replace y=exp(-((x-`mu')^2)/(2*`sig2n'))/sqrt(2*_pi*`sig2n') * summarize y * local y1max=1.2*_result(6) graph y x, c(l) s(i) xlabel($xl) ylabel($yl) l1(" ") l2(" ") b1(" ") b2(" ") bbox(3000,0,23000,32000,850,400,0) pen(3) gphconv gph pen 2 histgm xbar `xmin1' `xmax1' `nbins' gph text 1000 16000 0 0 Sample Means of 500 Samples of Size `n' From $D_var1 exit} if "$D_var2"=="Parent-Curves" { replace x=(_n-1)/(_N-1) replace y=uniform() graph y x, s(i) xlabel(0,.25,.5,.75,1) ylabel(0,.25,.50,.75,1) l1(" ") l2(" ") b1(" ") b2(" ") bbox(2000,0,11500,16000,850,400,0) gphconv replace y=1 lines x y drline 0 0 0 1 drline 1 0 1 1 gph text 1000 8000 0 0 Uniform(0,1) replace x=6*(x-0.5) replace y=exp(-(x^2)/2)/sqrt(2*_pi) graph y x, c(l) s(i) xlabel(-3,-2,-1,0,1,2,3) ylabel(0,.1,.2,.3,.4) l1(" ") l2(" ") b1(" ") b2(" ") bbox(11500,0,21000,16000,850,400,0) gph text 11000 8000 0 0 Normal(0,1) replace x=6*(_n-1)/(_N-1) replace y=exp(-x) graph y x, c(l) s(i) xlabel(0,1,2,3,4,5,6) ylabel(0,.25,.50,.75,1) l1(" ") l2(" ") b1(" ") b2(" ") bbox(2000,16000,11500,32000,850,400,0) gph text 1000 24000 0 0 Exponential(1) replace x = -3 + 6 * (_n-1)/(_N-1) replace y=uniform()/2 graph y x, s(i) xlabel(0,1) ylabel(0,.25,.50) l1(" ") l2(" ") b1(" ") b2(" ") bbox(11500,16000,21000,32000,850,400,0) gphconv drbox -.5 0 .5 .5 drbox .5 0 1.5 .5 gph text 11000 24000 0 0 0-1 Population exit} gph close gph open gph pen 1 replace y=uniform() if "$D_var1"=="Normal(0,1)" { local xmin=-1.5 local xmax=1.5} if "$D_var1"=="Uniform(0,1)" { local xmin=.2 local xmax=.8} if "$D_var1"=="Exponential(1)" { local xmin=0. local xmax=2.5} if "$D_var1"=="0-1" { local xmin=0. local xmax=1.} * gph text 1000 16000 0 0 `xmin' `xmax' replace x=`xmin'+(`xmax'-`xmin')*((_n-1)/(_N-1)) local delx=(`xmax'-`xmin')/`nints' local dely=1./`nints' if "$D_var1"=="Uniform(0,1)" { graph y x, s(i) xlab(.2,.4,.6,.8) ylab(0,.25,.50,.75,1) /* */ yscale(0,1) l1(" ") l2(" ") b1(" ") b2(" ") bbox(3000,0,23000,32000,850,400,0)} if "$D_var1"=="Exponential(1)" { graph y x, s(i) xlab(0,.5,1,1.5,2,2.5) ylab(0,.25,.50,.75,1) /* */ yscale(0,1) l1(" ") l2(" ") b1(" ") b2(" ") bbox(3000,0,23000,32000,850,400,0)} if "$D_var1"=="Normal(0,1)" { graph y x, s(i) xlab(-1.5,-1,-.5,0,.5,1,1.5) ylab(0,.25,.50,.75,1) /* */ yscale(0,1) l1(" ") l2(" ") b1(" ") b2(" ") bbox(3000,0,23000,32000,850,400,0)} if "$D_var1"=="0-1" { graph y x, s(i) xlab(0,.25,.50,.75,1) ylab(0,.25,.50,.75,1) /* */ yscale(0,1) l1(" ") l2(" ") b1(" ") b2(" ") bbox(3000,0,23000,32000,850,400,0)} gphconv gph text 1000 16000 0 0 Means of Samples of Size `n' From $D_var1 * gph text 1000 16000 0 0 AX,BX,AY,BY $AX $BX $AY $BY replace y=0 local mnc = 0 while `mnc' < `nints' { if "$D_var1"=="Uniform(0,1)" { replace x = uniform() in 1/`n'} if "$D_var1"=="Exponential(1)" { replace x = -log(uniform()) in 1/`n' } if "$D_var1"=="Normal(0,1)" { replace x = invnorm(uniform()) in 1/`n'} if "$D_var1"=="0-1" { replace x = uniform() < .5 in 1/`n'} summarize x in 1/`n' local xbar = _result(3) * gph text 3000 16000 0 0 `xbar' local ncol = min(int((`xbar'-`xmin')/`delx')+1,`nints') local ncol=max(`ncol',1) * gph text 4000 16000 0 0 `ncol' replace y=y+1 in `ncol' local yy=y[`ncol'] * gph text 5000 16000 0 0 `yy' local nx1=`xmin'+(`ncol'-1)*`delx' local nx2=`xmin'+`ncol'*`delx' local ny1=(y[`ncol']-1)*`dely' local ny2=y[`ncol']*`dely' * gph text 6000 16000 0 0 `nx1' `nx2' `ny1' `ny2' local nx1 = int($BX + $AX * `nx1') local nx2 = int($BX + $AX * `nx2') local ny1 = int($BY + $AY * `ny1') local ny2 = int($BY + $AY * `ny2') * gph text 7000 16000 0 0 `nx1' `nx2' `ny1' `ny2' gph box `ny2' `nx1' `ny1' `nx2' 4 if y[`ncol'] >= `nints' { local mnc=`nints'} } end exit