program define mnmd version 4.0 preserve /* Save all the current stata stuff */ clear /* Clear everything so no conflicts */ quietly { set obs 500 gen x= . gen y=. gen mean = . gen median = .} global D_sm11 " When this lab starts, you are shown four different symmetric distributions, all centered at 0. For such distributions, the sample mean and sample median are both estimating the center of the distribution, that is, they are both estimating 0. Which estimator should we use? The principal of minimum variance says if both estimators are unbiased (which these are as you'll see), we should use the estimator with the smallest variance." wdctl static D_sm11 5 5 290 40 global D_sm12 " When you pick a population to sample from and give a sample size n, the lab generates 500 samples of that size from the chosen population and calculates the sample mean and sample median from each sample. It then draws side-by-side box plots and histograms for the 500 means and 500 medians. It also writes the standard deviations of each set on their histogram." wdctl static D_sm12 5 50 290 40 global D_sm13 "Notes: 1) The sample means and medians are both centered at the center of the original population, that is, they are both unbiased. 2) You will see that for some populations, the sample mean is better and for other populations the sample median is better." wdctl static D_sm13 5 95 290 30 global HJN_V1 = 5 global HJN_1 "n" wdctl static HJN_1 5 132 10 8 wdctl edit HJN_V1 20 132 20 8 global D_sm6 "Sample from" global D_sm7 "Uniform(-.5,.5) Normal(0,1) Laplace(0,1) t_3" wdctl static D_sm6 45 132 50 10 global D_var1 "Normal(0,1)" wdctl ssimple D_var1 D_sm7 100 132 60 50 wdctl button "Run" 175 130 25 12 D_b1 wdctl button "Close" 205 130 25 12 D_b2 wdctl button "Help" 235 130 25 12 D_b3 global D_b1 "mnmddr 1" global D_b2 "exit 1234" global D_b3 "whelp mnmd" mnmddr 0 cap noi wdlg "Minimum Variance Estimation" $D_dlgx $D_dlgy 300 200 drop x y mean median restore end program define mnmddr version 4.0 local iopt=`1' local n=$HJN_V1 gph open gph pen 2 if `iopt'==0 { gph text 1000 16000 0 0 Four Population Curves replace x=5*(uniform()-.5) replace y=uniform() graph y x, s(i) xlab(-2.5,-2,-1.5,-1,-.5,0,.5,1,1.5,2,2.5) /* */ ylab(0,.2,.4,.6,.8,1) bbox(3000,0,23000,32000,850,400,0) gphconv replace x=-.5 + (_n-1)/(_N-1) replace y=1 gph pen 1 lines x y drline -.5 0 -.5 1 drline .5 0 .5 1 drline -2.5 0 -.5 0 drline .5 0 2.5 0 drtext .5 1 -1 Uniform(-.5,.5) replace x=-2.5 + 5 * (_n-1)/(_N-1) replace y=exp(-(x^2)/2)/sqrt(2*_pi) gph pen 2 lines x y drtext 0 .42 0 N(0,1) pdf_lap x y 0 1 gph pen 3 lines x y drtext 0 .72 0 Laplace(0,1) pdf_t x y 3 gph pen 5 lines x y drtext 0 .30 0 t_3 snooze 500 exit} if `n' < 5 | `n' > 50 { sstopbox stop "n must be between 5 and 50" exit} * Start while loop: gph text 2000 16000 0 0 Generating 500 Samples of Size `n' From a $D_var1 Population and gph text 3000 16000 0 0 Calculating the Sample Mean and Median of Each Sample gph box 6000 10000 8000 20000 4 local i = 1 while `i' <= 500 { * Write out every 20th sample number: if(20*int(`i'/20)==`i') { local bx2=int(10000+10000*(`i'/500)) gph box 6000 10000 8000 `bx2' 1} * Generate sample (note how we put it only in first n places in x): if "$D_var1"=="Uniform(-.5,.5)" {replace x = -.5 + uniform() in 1/`n' } if "$D_var1"=="Normal(0,1)" {replace x = invnorm(uniform()) in 1/`n'} if "$D_var1"=="Laplace(0,1)" {replace x = -sign(2*(uniform()-.5))*log(uniform()) in 1/`n'} if "$D_var1"=="t_3" { replace x = uniform() in 1/`n' replace x = sign(x-0.5)*invt(3,1-2*min(x,1-x)) in 1/`n'} * Get mean and median: summarize x in 1/`n', detail replace mean = _result(3) in `i' replace median = _result(10) in `i' * End while loop: local i = `i' + 1} * Get scaling stuff for the histograms (turn off graphics so user doesn't * see what's happening): gph close set graphics off graph mean, ylab xlab bin(10) bbox(0,16000,11500,32000,850,400,0) pen(2) local ymax1 = _result(2) local xmin1 = _result(3) local xmax1 = _result(4) graph median, ylab xlab bin(10) bbox(11500,16000,23000,32000,850,400,0) pen(3) local ymax2 = _result(2) local xmin2 = _result(3) local xmax2 = _result(4) local ymax=max(`ymax1',`ymax2') local xmin = min(`xmin1',`xmin2') local xmax = max(`xmax1',`xmax2') * Now turn graphics back on and draw box plots (move boxplot down so * title will fit later): set graphics on gph open gph pen 1 graph mean median, box ylab bbox(4000,0,23000,16000,850,400,0) * Now draw histograms (the way xscale is used makes it so have same scale): * Also find and display standard deviations of the 500 means and medians: graph mean, xlab ylab xscale(`xmin',`xmax') yscale(0,`ymax') bin(15) l1(" ") l2(" ") bbox(0,16000,11500,32000,850,400,0) pen(2) summarize mean local std: display %6.2f sqrt(_result(4)) gph text 1000 24000 0 0 s=`std' graph median, xlab ylab xscale(`xmin',`xmax') yscale(0,`ymax') bin(15) l1(" ") l2(" ") bbox(11500,16000,23000,32000,850,400,0) pen(3) summarize median local std: display %6.2f sqrt(_result(4)) gph text 12500 24000 0 0 s=`std' * Finally, put title of graph above boxplots: gph text 1000 9000 0 0 Comparison of Sample Mean and Sample Median as gph text 2000 9000 0 0 Estimators of Population Mean of gph text 3000 9000 0 0 $D_var1 Population, Sample Size=`n' gph close end exit