sink("NHANES.CSFII.analysis"); z<-matrix(scan("//user//ray//csfii.book.data//sftcsfii.dat"), 1819,6,byrow=TRUE); n <- 1819; bmi <- z[,1]; age <- z[,2]; pir <- z[,3]; alc <- z[,4]; sft1 <- log(10+z[,5]); sft2 <- log(10+z[,6]); #sft1 <- z[,5]; #sft2 <- z[,6]; sft2 <- sft2 * mean(sft1) / mean(sft2); # # # cat("",fill=T); cat("",fill=T); cat("#######################################################",fill=T); cat("Reading in the CSFII Data",fill=T); cat("W = log(10 + saturated Fat)",fill=T); cat("#######################################################",fill=T); cat("",fill=T); cat("",fill=T); x <- cbind(bmi,age,pir,alc,sft1,sft2); #x <- x[ sort.list(x[,2]),]; dimnames(x) <- list(NULL,c("body.mass.index","patient.age", "poverty.ratio","alcohol", "sft.csfii.w","sft.csfii.t")); xx <- as.data.frame(x); attach(xx); a1 <- sft.csfii.t + sft.csfii.w; a2 <- abs(sft.csfii.t - sft.csfii.w); a3 <- cor(a1,a2); a4 <- mean(sft.csfii.w); a5 <- sqrt(var(sft.csfii.w)); a6 <- mean(sft.csfii.t); a7 <- sqrt(var(sft.csfii.t)); cat("Correlation for transformation = ",a3,fill=T); cat("Mean of W in csfii = ",a4,fill=T); cat("s.d. of W in csfii = ",a5,fill=T); cat("Mean of T in csfii = ",a6,fill=T); cat("s.d. of T in csfii = ",a7,fill=T); cat("",fill=T); cat("",fill=T); cat("#######################################################",fill=T); cat("Fitting a gam regressing T on Z,s(W)",fill=T); cat("#######################################################",fill=T); cat("",fill=T); cat("",fill=T); out.spline.gam <- gam(sft.csfii.t ~ alcohol + patient.age + poverty.ratio + body.mass.index + s(sft.csfii.w), family=gaussian, maxit=100, data=xx); print(summary(out.spline.gam )); spline.preplot <- preplot(out.spline.gam); oa <- order(sft.csfii.w); ob <- order(poverty.ratio); plotname <- labels(spline.preplot)[5] tempx <- get(plotname,spline.preplot) sx <- matrix( get("x",tempx),n,1); sy <- matrix( get("y",tempx),n,1); ssy <- predict(out.spline.gam,type="response"); a4 <- mean(sx); a5 <- sqrt(var(sx)); a8 <- mean(ssy); a9 <- sqrt(var(ssy)); cat("Mean of W in CSFII",a4,fill=T); cat("s.d. of W in CSFII",a5,fill=T); cat("Mean of fit in CSFII",a8,fill=T); cat("s.d. of fit in CSFII",a9,fill=T); cat("",fill=T); cat("",fill=T); cat("#######################################################",fill=T); cat("Next fit an ordinary linear model to the CSFII data",fill=T); cat("#######################################################",fill=T); cat("",fill=T); cat("",fill=T); out.lm <- lm(sft.csfii.t ~ alcohol + patient.age + poverty.ratio + body.mass.index + sft.csfii.w); print(summary(out.lm )); cat("",fill=T); cat("",fill=T); cat("#######################################################",fill=T); cat("Read in the NHANES data",fill=T); cat("#######################################################",fill=T); cat("",fill=T); cat("",fill=T); z<-matrix(scan("//user//ray//hanes.book.data//hanebook.asc"), 3145,11,byrow=TRUE); n <- 3145; bc <- z[,1]; age <- z[,2] * 25; pir <- z[,3]; bmi <- z[,4] * 100; alc <- z[,5]; fam.hist <- z[,6]; age.menarche <- z[,7]; pre.menopause <- z[,8]; race <- z[,9]; satfat <- log(10 + (z[,10] * 100)); y <- cbind(bc,bmi,age,pir,alc,fam.hist,age.menarche,pre.menopause,race,satfat); dimnames(y) <- list(NULL,c("breast.cancer","body.mass.index","patient.age", "poverty.ratio","alcohol", "family.history","age.menarche", "pre.menopausal","race","sft.csfii.w")); yy <- as.data.frame(y); attach(yy); cat("",fill=T); cat("",fill=T); cat("#######################################################",fill=T); cat("Form the fitted E(T|Z,W) using the CSFII gam model",fill=T); cat("#######################################################",fill=T); cat("",fill=T); cat("",fill=T); sfat.pred.csfii <- predict.gam(out.spline.gam,xx,type="response"); sfat.pred.hanes <- predict.gam(out.spline.gam,yy,type="response"); a4 <- mean(sft.csfii.w); a5 <- sqrt(var(sft.csfii.w)); a6 <- mean(sfat.pred.hanes); a7 <- sqrt(var(sfat.pred.hanes)); a1 <- mean(sfat.pred.csfii); a2 <- sqrt(var(sfat.pred.csfii)); cat("Mean of W in NHANES",a4,fill=T); cat("s.d. of W in NHANES",a5,fill=T); cat("Mean of fit in NHANES",a6,fill=T); cat("s.d. of fit in NHANES",a7,fill=T); cat("Mean of fit in CSFII",a8,fill=T); cat("s.d. of fit in CSFII",a9,fill=T); cat("Mean of fit in CSFII",a1,fill=T); cat("s.d. of fit in CSFII",a2,fill=T); a3 <- var(ssy - sfat.pred.csfii); cat("Should be zero ",a3,fill=T); cat("",fill=T); cat("",fill=T); cat("#######################################################",fill=T); cat("Ordinary logistic regression calibration",fill=T); cat("Fitted X's are called sfat.pred.hanes ",fill=T); cat("#######################################################",fill=T); cat("",fill=T); cat("",fill=T); out.glim <- glm(breast.cancer ~ body.mass.index + patient.age + poverty.ratio + alcohol + family.history + age.menarche + pre.menopausal + race + sfat.pred.hanes,family=binomial(link=logit)); print(summary(out.glim)); cat("",fill=T); cat("",fill=T); cat("#######################################################",fill=T); cat("Ordinary logistic regression ignoring error",fill=T); cat("#######################################################",fill=T); cat("",fill=T); cat("",fill=T); out.glim <- glm(breast.cancer ~ body.mass.index + patient.age + poverty.ratio + alcohol + family.history + age.menarche + pre.menopausal + race + sft.csfii.w,family=binomial(link=logit)); print(summary(out.glim)); sink();