/* d:\paper\ruzong4\soft\hotel_sibs.sas Version 0.1, 8.03.05 Aenderungen: */ %macro va_lst(anz,namen); %global va_lst; %let va_lst=""; %do i=1 %to &anz; %let help=%scan(&namen,&i,' '); %if &i.>1 %then %let va_lst=%unquote(&va_lst &help._1 &help._2); %else %let va_lst=%unquote(&help._1 &help._2); %end; %put &va_lst.; %mend; %macro hotel_sibs(infile,ma_number,ma_names,outfile,simumf); %let max_allele=100; %let max_fam=1000; %va_lst(&ma_number.,&ma_names.); data dfile; set &infile.; array ma[&ma_number.,2] &va_lst.; complete=1; i=0; do while ((i<&ma_number.) and (complete=1)); i=i+1; if (ma[i,1]*ma[i,2]=0) then complete=0; end; if (complete=1) then output; keep fam ind father mother sex aff &va_lst.; run; proc sort; by fam father sex; run; proc iml; use dfile var { fam ind father mother sex aff &va_lst.}; read all into daten; anz_obs=NROW(daten); anz_marker=(NCOL(daten)-6)/2; rdaten=J(anz_obs,6+2*anz_marker); recode_table=J(anz_marker,&max_allele.,0); anz_allele=J(anz_marker,1,0); START recode(daten,rdaten,recode_table,anz_allele); fehler=0; anz_obs=NROW(daten); rdaten[1:anz_obs,1:6]=daten[1:anz_obs,1:6]; /* Rekodierung der Markerdaten */ do j=1 to &ma_number.; do i=1 to anz_obs; do k=1 to 2; allel=daten[i,6+2*(j-1)+k]; if (allel) then do; found=0; t=0; do while ((t&max_allele.) then fehler=1; recode_table[j,t]=allel; end; rdaten[i,6+2*(j-1)+k]=t; end; /* if (allel) */ else do; rdaten[i,6+2*(j-1)+k]=0; fehler=2; end; end; end; end; return (fehler); FINISH; START calc_laenge(anz_allele); hc_laenge=0; gc_laenge=0; do i=1 to &ma_number.; anz=anz_allele[i]; hc_laenge=hc_laenge+anz-1; gc_laenge=gc_laenge+(anz*(anz+1)/2)-1; end; erg=hc_laenge||gc_laenge; return (erg); FINISH; START hc(indiv,laenge,anz_allele); erg=J(laenge,1,0); start=0; do i=1 to &ma_number.; do k=1 to 2; allel=indiv[6+2*(i-1)+k]; if (allelallel2) then do; rette=allel1; allel1=allel2; allel2=rette; end; anz=anz_allele[i]; max_ind=(((anz+1)*anz)/2)-1; index=((anz+1)*(allel1-1))-(allel1*(allel1-1)/2)+(allel2+1-allel1); if (index<=max_ind) then erg[start+index]=erg[start+index]+1; start=start+max_ind; end; return (erg); FINISH; START create_fam_ptr(daten,fam_ptr); anz_fam=0; anz_obs=NROW(daten); alt_fam=0; do i=1 to anz_obs; if (daten[i,1]-alt_fam) then do; anz_fam=anz_fam+1; fam_ptr[anz_fam]=i; alt_fam=daten[i,1]; end; end; return (anz_fam); FINISH; fam_ptr=J(&max_fam.,1,0); anz_fam=create_fam_ptr(daten,fam_ptr); error=recode(daten,rdaten,recode_table,anz_allele); if (error) then print error; laenge=calc_laenge(anz_allele); perm=0; if (&simumf.>0) then do; perm=1; alle_hc=J(anz_fam,laenge[1],0); alle_gc=J(anz_fam,laenge[2],0); end; anz_sibs=0; hc_sibs_mean=J(laenge[1],1,0); gc_sibs_mean=J(laenge[2],1,0); hc_sibs_cov=J(laenge[1],laenge[1],0); gc_sibs_cov=J(laenge[2],laenge[2],0); fatal_error=0; do fam=1 to anz_fam; start_fam=fam_ptr[fam]; if (fam=anz_fam) then end_fam=anz_obs; else end_fam=fam_ptr[fam+1]-1; anz_aff=0; anz_unaff=0; aff_hcv=J(laenge[1],1,0); aff_gcv=J(laenge[2],1,0); unaff_hcv=J(laenge[1],1,0); unaff_gcv=J(laenge[2],1,0); do ind=start_fam to end_fam; help_fam=rdaten[ind,1]; help_ind=rdaten[ind,2]; if (rdaten[ind,3]*rdaten[ind,4]) then do; indiv=rdaten[ind:ind,]; hcv=hc(indiv,laenge[1],anz_allele); gcv=gc(indiv,laenge[2],anz_allele); if (indiv[6]-2) then do; anz_unaff=anz_unaff+1; unaff_hcv=unaff_hcv+hcv; unaff_gcv=unaff_gcv+gcv; end; else do; anz_aff=anz_aff+1; aff_hcv=aff_hcv+hcv; aff_gcv=aff_gcv+gcv; end; end; end; if (anz_aff*anz_unaff) then do; anz_sibs=anz_sibs+1; aff_hcv=aff_hcv/anz_aff; aff_gcv=aff_gcv/anz_aff; unaff_hcv=unaff_hcv/anz_unaff; unaff_gcv=unaff_gcv/anz_unaff; hcv=aff_hcv-unaff_hcv; gcv=aff_gcv-unaff_gcv; hc_sibs_mean=hc_sibs_mean+hcv; gc_sibs_mean=gc_sibs_mean+gcv; hc_sibs_cov=hc_sibs_cov+hcv*hcv`; gc_sibs_cov=gc_sibs_cov+gcv*gcv`; if (perm=1) then do; alle_hc[anz_sibs,]=hcv`; alle_gc[anz_sibs,]=gcv`; end; end; end; /* do fam=1 */ T_sq_hc_sibs=0; T_sq_gc_sibs=0; if (anz_sibs>1) then do; hc_sibs_mean=hc_sibs_mean/anz_sibs; gc_sibs_mean=gc_sibs_mean/anz_sibs; hc_sibs_cov=hc_sibs_cov-anz_sibs*hc_sibs_mean*hc_sibs_mean`; gc_sibs_cov=gc_sibs_cov-anz_sibs*gc_sibs_mean*gc_sibs_mean`; T_sq_hc_sibs=anz_sibs*((hc_sibs_mean)`) *GINV(hc_sibs_cov/(anz_sibs-1))*hc_sibs_mean; T_sq_gc_sibs=anz_sibs*((gc_sibs_mean)`) *GINV(gc_sibs_cov/(anz_sibs-1))*gc_sibs_mean; end; P_hc_sibs=1-probchi(T_sq_hc_sibs,laenge[1]); P_gc_sibs=1-probchi(T_sq_gc_sibs,laenge[2]); result=(anz_sibs||T_sq_hc_sibs||laenge[1]||P_hc_sibs) //(anz_sibs||T_sq_gc_sibs||laenge[2]||P_gc_sibs); if (anz_sibs<2) then perm=0; if (perm=1) then do; anz_sig_h=0; anz_sig_g=0; quelle=1; x=0; do i=1 to &simumf; hc_sibs_mean=J(laenge[1],1,0); gc_sibs_mean=J(laenge[2],1,0); hc_sibs_cov=J(laenge[1],laenge[1],0); gc_sibs_cov=J(laenge[2],laenge[2],0); do fam=1 to anz_sibs; call ranuni(quelle,x); if (x<.5) then hcv=alle_hc[fam,]; else hcv=-alle_hc[fam,]; hc_sibs_mean=hc_sibs_mean+hcv`; hc_sibs_cov=hc_sibs_cov+hcv`*hcv; call ranuni(quelle,x); if (x<.5) then gcv=alle_gc[fam,]; else gcv=-alle_gc[fam,]; gc_sibs_mean=gc_sibs_mean+gcv`; gc_sibs_cov=gc_sibs_cov+gcv`*gcv; end; hc_sibs_mean=hc_sibs_mean/anz_sibs; hc_sibs_cov=hc_sibs_cov-anz_sibs*hc_sibs_mean*hc_sibs_mean`; PT_sq_hc_sibs=0; PT_sq_hc_sibs=anz_sibs*((hc_sibs_mean)`) *GINV(hc_sibs_cov/(anz_sibs-1))*hc_sibs_mean; if (PT_sq_hc_sibs>=T_sq_hc_sibs) then anz_sig_h=anz_sig_h+1; gc_sibs_mean=gc_sibs_mean/anz_sibs; gc_sibs_cov=gc_sibs_cov-anz_sibs*gc_sibs_mean*gc_sibs_mean`; PT_sq_gc_sibs=0; PT_sq_gc_sibs=anz_sibs*((gc_sibs_mean)`) *GINV(gc_sibs_cov/(anz_sibs-1))*gc_sibs_mean; if (PT_sq_gc_sibs>=T_sq_gc_sibs) then anz_sig_g=anz_sig_g+1; end; /* simumf */ anz_sig_h=anz_sig_h/&simumf.; anz_sig_g=anz_sig_g/&simumf.; simumf=&simumf.; all_snp=1; result=result//(simumf||all_snp||anz_sig_h||anz_sig_g); end; CREATE res_file FROM result; APPEND FROM result; run; quit; run; data a; set res_file; file "&outfile." mod; if (_N_=1) then do; put; put "Marker: &ma_names."; put; put " no of "; put " families coding statistic df p-value"; put col1 9.0 " haplotype " col2 9.2 col3 5.0 col4 E9.2; end; if (_N_=2) then put " genotype " col2 9.2 col3 5.0 col4 E9.2; if (_N_=3) then do; put; put "Permutation based (" col1 "replicates) p-values:"; put "haplotype coding: " col3 E9.2 @; if (col2=1) then put " genotype coding: " col4 E9.2 @; else put; end; run; data a; set res_file end=schluss; array pwert[4]; retain pwert; retain familien; retain replikationen; length marker $200; if (_N_=1) then do; familien=col1; pwert[1]=col4; end; if (_N_=2) then pwert[3]=col4; if (_N_=3) then do; replikationen=col1; pwert[2]=col3; pwert[4]=col4; end; if (schluss=1) then do; marker="&ma_names."; keep pwert1--pwert4 familien replikationen marker; output; end; run; proc append base=result_hotel data=a; run; %mend;