# First, data are generated, and then # analyzed using methods M1 or M2. In this analysis # MCMC method is used. # The following library is needed library(MCMCpack) # Data simulation # set the seed set.seed(5) # Set the sample size n=10000 # generation of error free covariates z1=rbinom(n, 1, 0.23) z2=rbinom(n, 1, 0.31) # Generation of an instrumental variable with three categories. # Note that the instrumental variable need not be a categorical variable. # Also, there could be multiple instrumental variables. x0star=t(rmultinom(n, 1, prob=c(0.74, 0.22, 0.04)))%*%c(1, 2, 3) # Two dummy variables corresponding to the three-category # nominal instrumental variable. x1star=ifelse(x0star==2, 1, 0) x2star=ifelse(x0star==3, 1, 0) # Generation of X ## If needed change the coefficients of f(X|X^*, Z) of the following two lines ## pr2= exp(0.47+0.8*x1star+0.36*x2star-0.99*z1-2*z2) pr3= exp(-0.09 - 4.12*x1star+0.87*x2star-0.45*z1+1.75*z2) deno=1+pr2+pr3 pr1=1/deno pr2=pr2/deno pr3=pr3/deno prob=cbind(pr1, pr2, pr3) f1=function(i) sum(rmultinom(1, 1, prob[i, ])*(1:3)) myx=unlist(lapply(seq(1:n), f1)) # Generation of the response ## If needed change the coefficients of f(Y|X, Z) of the following line eta=1.13+ 0*as.numeric(myx==2) +1.98*as.numeric(myx==3) - 0.5*z1-2*z2 pry= 1/(1+exp(-eta)) y=rbinom(n, 1, pry) ### data where x is being recorded originaldata=data.frame(myx, y, z1, z2) head(originaldata) ### generation of the surrogate W ### Generation of the misclassification matrix misclassmat=matrix(c(0.96, 0.025, 0.025, 0.1, 0.85, 0.05, 0.2, 0.1, 0.7), byrow=F, ncol=3) ### For a different misclassification, change the entries of the misclassification matrix. f2=function(i) sum(rmultinom(1, 1, misclassmat[,i])*(1:3)) myw=unlist(lapply(myx, f2)) # For checking you may print table(myw, myx) ### ### ## setting the standard deviation of the prior distribution ## of the parameters priorsd=2 # later change it to 5 # DO NOT MAKE ANY CHANGES AFTER THIS LINE ## ## ## data without x, rather the data contain the binary response, w, ## instrumental variable(s) and the error-free covariates mydata=data.frame(myw, x1star, x2star, y, z1, z2) ## The frequentist analysis of the data that does not include the ## true exposure, myx, rather include the surrogate exposure ## measurements. This analysis is referred to as M2 in the article. ## For obtaining results under M1, one should replace mydata by ## originaldata in the following lines of code. ## out1=glm(y~as.factor(myw)+z1+z2, family=binomial, data=mydata) ## The Bayesian analysis of the data ## Setting the mcmc iterations. nmcmc=15000 ## Setting the number of burn-in samples burnin=5000 myk=ncol(model.matrix(out1)) logpriorfun <- function(beta){ sum(dnorm(beta, mean=0, sd=priorsd, log=TRUE)) } posterior.m2 <- MCMClogit(y ~ as.factor(myw) + z1 + z2, data = mydata, user.prior.density=logpriorfun, mcmc=nmcmc, logfun=TRUE) ## Collecting MCMC samples after the burn-in samples mcmcsamplesm2=posterior.m2 [-c(1:burnin), ] qntfunm2=function(i) quantile(mcmcsamplesm2[, i], prob=c(0.025, 0.975)) ## The following line of code returns the posterior mean, median, standard ## deviation, and the 95% credible interval for each of the model parameters. myresultm2=list(posterior_mean=apply(mcmcsamplesm2, 2, mean), posterior_median=apply(mcmcsamplesm2, 2, median), posterior_sd=apply(mcmcsamplesm2, 2, sd), credible_interval=as.numeric(unlist(lapply(1:myk, qntfunm2))))