## EM for the estimate the covariance matrix ## function to calculate the parameters par.est<-function(pM, pS){ sX2<-pS[1,1] sM2<-pS[2,2] sY2<-pS[3,3] sXM<-pS[1,2] sXY<-pS[1,3] sMY<-pS[2,3] mX <-pM[1] mM <-pM[2] mY <-pM[3] pa<-sXM/sX2 pb<-(sMY*sX2-sXM*sXY)/(sX2*sM2-sXM^2) pc<-(sXY*sM2-sXM*sMY)/(sX2*sM2-sXM^2) ps1<-sX2 ps2<-sM2-sXM^2/sX2 ps3<-(sX2*sM2*sY2-sX2*sMY^2-sM2*sXY^2-sY2*sXM^2+2*sXM*sXY*sMY)/(sX2*sM2-sXM^2) pm1<-mX pm2<-mM - pa*mX pm3<-mY - pb*mM - pc*mX return(c(pm2,pm3,pa,pb,pc,ps2,ps3)) } ## the M-step M_step<-function(sM, sS, N){ ## sM: first moment vector ## sS: second moment pM <- sM/N pS <- sS/(N-1) - N/(N-1)*pM%*%t(pM) return(list(pM=pM, pS=pS)) } ## the E-step E_step<-function(dset, pM, pS){ X<-dset[,1] M<-dset[,2] Y<-dset[,3] n<-dim(dset)[1] sX2<-rep(0,n) sM2<-rep(0,n) sY2<-rep(0,n) sXM<-rep(0,n) sXY<-rep(0,n) sMY<-rep(0,n) mX <-rep(0,n) mM <-rep(0,n) mY <-rep(0,n) for (i in 1:n){ ## process missing data in X if (!is.na(X[i])){ ## X is available if (!is.na(M[i])){ ## M is available if (!is.na(Y[i])){ ## X, M, Y is available mX[i]<-X[i] mM[i]<-M[i] mY[i]<-Y[i] sX2[i]<-X[i]^2 sY2[i]<-Y[i]^2 sM2[i]<-M[i]^2 sXM[i]<-X[i]*M[i] sXY[i]<-X[i]*Y[i] sMY[i]<-M[i]*Y[i] }else{ ## X & M observed but Y is missing mX[i]<-X[i] mM[i]<-M[i] mY[i]<-pM[3] + pS[3,1:2]%*%solve(pS[1:2,1:2])%*%(c(X[i]-pM[1], M[i]-pM[2])) sX2[i]<-mX[i]*mX[i] sM2[i]<-mM[i]*mM[i] sY2[i]<-mY[i]*mY[i]+pS[3,3]-pS[3,1:2]%*%solve(pS[1:2,1:2])%*%(pS[1:2,3]) sXM[i]<-mX[i]*mM[i] sXY[i]<-mX[i]*mY[i] sMY[i]<-mM[i]*mY[i] } }else{ ## M is missing if (!is.na(Y[i])){ ## X, Y is available but M is missing mX[i]<-X[i] mM[i]<-pM[2] + pS[2,c(1,3)]%*%solve(pS[c(1,3),c(1,3)])%*%(c(X[i]-pM[1],Y[i]-pM[3])) mY[i]<-Y[i] sX2[i]<-mX[i]*mX[i] sM2[i]<-mM[i]*mM[i]+pS[2,2]-pS[2,c(1,3)]%*%solve(pS[c(1,3),c(1,3)])%*%(pS[c(1,3),2]) sY2[i]<-mY[i]*mY[i] sXM[i]<-mX[i]*mM[i] sXY[i]<-mX[i]*mY[i] sMY[i]<-mM[i]*mY[i] }else{ ## X is observed but M and Y are missing mX[i]<-X[i] mM[i]<-pM[2] + pS[1,2]/pS[1,1]*(X[i]-pM[1]) mY[i]<-pM[3] + pS[1,3]/pS[1,1]*(X[i]-pM[1]) sX2[i]<-mX[i]*mX[i] sM2[i]<-mM[i]*mM[i] + pS[2,2]-pS[1,2]^2/pS[1,1] sY2[i]<-mY[i]*mY[i] + pS[3,3]-pS[1,3]^2/pS[1,1] sXM[i]<-mX[i]*mM[i] sXY[i]<-mX[i]*mY[i] sMY[i]<-mM[i]*mY[i] + pS[2,3]-pS[1,2]*pS[1,3]/pS[1,1] } } }else{ ## X is missing if (!is.na(M[i])){ ## M is available if (!is.na(Y[i])){ ## X is missing, M & Y are available mX[i]<-pM[1] + pS[1,2:3]%*%solve(pS[2:3,2:3])%*%(c(M[i]-pM[2], Y[i]-pM[3])) mM[i]<-M[i] mY[i]<-Y[i] sX2[i]<-mX[i]*mX[i]+pS[1,1]-pS[1,2:3]%*%solve(pS[2:3,2:3])%*%pS[2:3,1] sM2[i]<-mM[i]*mM[i] sY2[i]<-mY[i]*mY[i] sXM[i]<-mX[i]*mM[i] sXY[i]<-mX[i]*mY[i] sMY[i]<-mM[i]*mY[i] }else{ ## Y and X is missing but M is available mX[i]<-pM[1]+pS[1,2]/pS[2,2]*(M[i]-pM[2]) mM[i]<-M[i] mY[i]<-pM[3]+pS[2,3]/pS[2,2]*(M[i]-pM[2]) sX2[i]<-mX[i]*mX[i]+pS[1,1]-pS[1,2]^2/pS[2,2] sM2[i]<-mM[i]*mM[i] sY2[i]<-mY[i]*mY[i]+pS[3,3]-pS[2,3]^2/pS[2,2] sXM[i]<-mX[i]*mM[i] sXY[i]<-mX[i]*mY[i]+pS[1,3]-pS[1,2]*pS[2,3]/pS[2,2] sMY[i]<-mM[i]*mY[i] } }else{ ## X, M is missing mX[i]<-pM[1]+pS[1,3]/pS[3,3]*(Y[i]-pM[3]) mM[i]<-pM[2]+pS[2,3]/pS[3,3]*(Y[i]-pM[3]) mY[i]<-Y[i] sX2[i]<-mX[i]*mX[i]+pS[1,1]-pS[1,3]^2/pS[3,3] sM2[i]<-mM[i]*mM[i]+pS[2,2]-pS[2,3]^2/pS[3,3] sY2[i]<-mY[i]*mY[i] sXM[i]<-mX[i]*mM[i]+pS[1,2]-pS[1,3]*pS[2,3]/pS[3,3] sXY[i]<-mX[i]*mY[i] sMY[i]<-mM[i]*mY[i] } } } sM<-c(sum(mX),sum(mM),sum(mY)) sS<-array(c( sum(sX2),sum(sXM),sum(sXY),sum(sXM),sum(sM2),sum(sMY),sum(sXY),sum(sMY),sum(sY2)),dim=c(3,3)) return(list(sM=sM, sS=sS)) } ## bootstrap data using the stratified methods bootdata<-function(dset){ N<-dim(dset)[1] pat1<-NULL pat2<-NULL pat3<-NULL pat4<-NULL pat5<-NULL pat6<-NULL pat7<-NULL n1<-0 n2<-0 n3<-0 n4<-0 n5<-0 n6<-0 n7<-0 ## get the number of patterns of the missing data for (i in 1:N){ if (!is.na(dset[i,1])){ if (!is.na(dset[i,2])){ if (!is.na(dset[i,3])){ pat1<-rbind(pat1,dset[i,]) n1<-n1+1 }else{ pat2<-rbind(pat2,dset[i,]) n2<-n2+1 } }else{ if (!is.na(dset[i,3])){ pat3<-rbind(pat3,dset[i,]) n3<-n3+1 }else{ pat4<-rbind(pat4,dset[i,]) n4<-n4+1 } } }else{ if (!is.na(dset[i,2])){ if (!is.na(dset[i,3])){ pat5<-rbind(pat5,dset[i,]) n5<-n5+1 }else{ pat6<-rbind(pat6,dset[i,]) n6<-n6+1 } }else{ if (!is.na(dset[i,3])){ pat7<-rbind(pat7,dset[i,]) n7<-n7+1 } } } } ## resample the data if (n1>0) pat1<-pat1[sample(1:n1, replace=T),] if (n2>0) pat2<-pat2[sample(1:n2, replace=T),] if (n3>0) pat3<-pat3[sample(1:n3, replace=T),] if (n4>0) pat4<-pat4[sample(1:n4, replace=T),] if (n5>0) pat5<-pat5[sample(1:n5, replace=T),] if (n6>0) pat6<-pat6[sample(1:n6, replace=T),] if (n7>0) pat7<-pat7[sample(1:n7, replace=T),] temp<-rbind(pat1,pat2,pat3,pat4,pat5,pat6,pat7) return(temp) } ## Function to run EM, listwise delete, and pairwise delete just once runonce<-function(compdset, dset,N){ ##complete data case comp<-par.est(apply(compdset,2,mean), cov(compdset)) ## the list wise delete method isna<-is.na(dset) sumisna<-apply(isna,1,sum) listdata<-dset[sumisna==0,] temp<-cov(listdata,use='complete.obs') listdel<-par.est(c(mean(listdata[,1]),mean(listdata[,2]),mean(listdata[,3])),temp) ## pairwise delete temp<-cov(dset,use='pairwise.complete.obs') pairdel<-par.est(c(mean(dset[,1],na.rm=T),mean(dset[,2],na.rm=T),mean(dset[,3],na.rm=T)),temp) ## EM algorithm para$pM<-c(mean(listdata[,1]),mean(listdata[,2]),mean(listdata[,3])) para$pS<-temp e<-1 while (e>.0000001){ E.res<-E_step(dset,para$pM, para$pS) M.res<-M_step(E.res$sM, E.res$sS, N) e<-sum(abs(M.res$pM-para$pM))+sum(abs(M.res$pS-para$pS)) para<-M.res } em<-par.est(para$pM,para$pS) results<-c(comp, comp[3]*comp[4], listdel, listdel[3]*listdel[4], pairdel, pairdel[3]*pairdel[4], em, em[3]*em[4]) return(results) } ## jackknife to get the accelerated factor a jack<-function(dset){ N<-dim(dset)[1] index<-1:N jackres<-array(NA, dim=c(N,8)) for (i in 1:N){ jackdata<-dset[index[-i],] para$pM<-c(mean(jackdata[,1],na.rm=T),mean(jackdata[,2],na.rm=T),mean(jackdata[,3],na.rm=T)) para$pS<-cov(jackdata,use='complete.obs') e<-1 while (e>.0000001){ E.res<-E_step(jackdata,para$pM, para$pS) M.res<-M_step(E.res$sM, E.res$sS, N) e<-sum(abs(M.res$pM-para$pM))+sum(abs(M.res$pS-para$pS)) para<-M.res } tempres<-par.est(para$pM,para$pS) jackres[i,]<-c(tempres, tempres[3]*tempres[4]) } a<-rep(0,8) for (j in 1:8){ mu<-mean(jackres[,j]) a[j]<-1/6*sum((mu-jackres[,j])^3)/((sum((mu-jackres)^2))^3/2) } return(a) } ## Function to run the bootstrap method once bootonce<-function(compdset, dset, B, N, alpha){ ## run original data to get thetahat thetahat<-runonce(compdset, dset,N) results<-array(NA, dim=c(B,32)) for (b in 1:B){ bcomp<-compdset[sample(1:N, replace=T),] bdata<-bootdata(dset) results[b,]<-runonce(bcomp, bdata, N) } ## calculate the confidence interval alpha<-c(.025, .05, .95, .975) allCI<-NULL for (i in 1:32){ z0<-qnorm(sum(results[,i]