*_________________. * steps in splitting the data model validation. * Splitting the data into two parts. *1) create a file with only the independent variables. * 2) standard the indep. * 3) Run commands below. *3a) Save data file - you will need to merge with original data. * 4) do vol. * If you want a 75 - 25 split, then do the following. * duplex2 then vol. set mxloop = 99999. matrix. get x/ variables = all /file = * /missing = omit /names = nam1. save x /outfile = 'c:\temp1.sav' /names=nam1. compute nr = nrow(x). compute nc = ncol(x). compute j = make(nr,1,1). compute mean = t(j)*x/nr. print mean. compute diff = x-j*mean. compute bigs = t(diff)*diff. print bigs. compute s1 =mdiag(diag(bigs)&**-.5). print s1. compute z =x*s1. compute zpz =t(z)*z. print zpz. compute t1 = chol(zpz). print t1. compute diff1 = zpz -t(t1)*t1. print diff1. compute w = z*inv(t1). compute di = t(w)*w. print di. *compute distances. compute d1 = make(nc,1,0). compute d2 = make(nc,1,0). compute bign = nr*(nr-1)/2. compute dist = make(bign,3,0). compute iii=0. loop ii = 1 to nr. loop jj = ii+1 to nr. compute iii = iii+1. loop kk = 1 to nc. compute d1(kk,1) = w(ii,kk). compute d2(kk,1) = w(jj,kk). end loop. compute dist(iii,1) = ii. compute dist(iii,2) = jj. compute dist(iii,3) = t(d1-d2)*(d1-d2). end loop. end loop. save w/outfile = 'c:\w.sav'. save dist/outfile = 'c:\dist.sav'. end matrix. GET FILE='C:\dist.sav'. SORT CASES BY col3 (D) . SAVE OUTFILE='C:\dist.sav' /COMPRESSED. matrix. get dist/variables = all /file = 'C:\dist.sav'. get w /variables = all/ file = 'C:\w.sav'. compute nr = nrow(w). compute nc = ncol(w). compute d1 = make(nc,1,0). compute d2 = make(nc,1,0). compute mean_x = make(nc,1,0). compute mean_y = make(nc,1,0). compute zero1 = make(nc,1,0). compute j1 = make(nr,1,1). compute in1 = make (nr,3,0). loop ii = 1 to nr. compute in1(ii,1) = ii. end loop. compute in1(dist(1,1),2) = 1. compute in1(dist(1,2),2) = 1. compute in1(dist(2,1),3) = 2. compute in1(dist(2,2),3) =2. * print in1. compute mean_x = zero1. compute ind1 = 0. loop ii = 1 to nr. do if (in1(ii,2) = 1). compute ind1 = ind1 +1. loop jj = 1 to nc. *print ii. *print jj. *print ind1. *print in1(ii,1). *print w(in1(ii,1),jj). compute mean_x(jj,1) = mean_x(jj,1)+w(in1(ii,1),jj). end loop. end if. end loop . compute mean_x = mean_x/ind1. print mean_x. compute ii = 0. compute jj = 0. compute mean_y = zero1. compute ind1 = 0. loop ii = 1 to nr. do if (in1(ii,3) = 2). compute ind1 = ind1 +1. loop jj = 1 to nc. *print ii. *print jj. *print ind1. *print in1(ii,1). *print w(in1(ii,1),jj). compute mean_y(jj,1) = mean_y(jj,1)+w(in1(ii,1),jj). end loop. end if. end loop . compute mean_y = mean_y/ind1. print mean_y. * find distance. compute dist1 = make(nr,2,-1). loop ii = 1 to nr. loop kk = 1 to nc. compute d1(kk,1) = w(ii,kk). compute d2(kk,1) = mean_x(kk,1). end loop. compute dist1(ii,1) = ii. compute dist1(ii,2) = t(d1-d2)*(d1-d2). end loop *print dist1. loop ii = 1 to nr. do if (in1(ii,2) > 0 or in1(ii,3) >0). compute dist1(ii,2) = 0. end if. end loop. *print dist1. *print in1. loop iii = 1 to nr. compute sas1 = t(j1)*dist1. compute ind12 = t(j1)*in1. * Note. do if ind12(1,2) 0). compute max1 = cmax(dist1). * print max1. loop iv1 = 1 to nr. do if (abs(max1(1,2)-dist1(iv1,2) )< .000001). compute in1(iv1,2) = 1. compute dist1(iv1,2) = 0. end if. end loop. compute max1 = cmax(dist1). loop iv2 = 1 to nr. do if (abs(max1(1,2)-dist1(iv2,2) )< .000001). compute in1(iv2,3) = 2. compute dist1(iv2,2) = 0. end if. end loop. end if. end if. end loop. *print dist1. *print in1. save in1/outfile = 'c:\in1.sav'. end matrix. GET FILE='c:\temp1.sav'. MATCH FILES /FILE=* /FILE='C:\in1.sav'. EXECUTE. compute filter_$ = col2. execute. SAVE OUTFILE='C:\good.sav' /COMPRESSED. FILTER OFF. USE ALL. SELECT IF(filter_$ =0). EXECUTE . SAVE OUTFILE='C:\good1.sav' /drop col1 to filter_$ /COMPRESSED. GET FILE='C:\good.sav'. FILTER OFF. USE ALL. SELECT IF(filter_$=1). EXECUTE . SAVE OUTFILE='C:\good2.sav' /COMPRESSED. GET FILE='C:\good.sav'.