lab:bootstrap_ci
## EM for the estimate the covariance matrix
## function to calculate the parameters
par.est<-function(pM, pS){
sX2<-pS[1,1]
sM2<-pS[2,2]
sY2<-pS[3,3]
sXM<-pS[1,2]
sXY<-pS[1,3]
sMY<-pS[2,3]
mX <-pM[1]
mM <-pM[2]
mY <-pM[3]
pa<-sXM/sX2
pb<-(sMY*sX2-sXM*sXY)/(sX2*sM2-sXM^2)
pc<-(sXY*sM2-sXM*sMY)/(sX2*sM2-sXM^2)
ps1<-sX2
ps2<-sM2-sXM^2/sX2
ps3<-(sX2*sM2*sY2-sX2*sMY^2-sM2*sXY^2-sY2*sXM^2+2*sXM*sXY*sMY)/(sX2*sM2-sXM^2)
pm1<-mX
pm2<-mM - pa*mX
pm3<-mY - pb*mM - pc*mX
return(c(pm2,pm3,pa,pb,pc,ps2,ps3))
}
## the M-step
M_step<-function(sM, sS, N){
## sM: first moment vector
## sS: second moment
pM <- sM/N
pS <- sS/(N-1) - N/(N-1)*pM%*%t(pM)
return(list(pM=pM, pS=pS))
}
## the E-step
E_step<-function(dset, pM, pS){
X<-dset[,1]
M<-dset[,2]
Y<-dset[,3]
n<-dim(dset)[1]
sX2<-rep(0,n)
sM2<-rep(0,n)
sY2<-rep(0,n)
sXM<-rep(0,n)
sXY<-rep(0,n)
sMY<-rep(0,n)
mX <-rep(0,n)
mM <-rep(0,n)
mY <-rep(0,n)
for (i in 1:n){
## process missing data in X
if (!is.na(X[i])){
## X is available
if (!is.na(M[i])){
## M is available
if (!is.na(Y[i])){
## X, M, Y is available
mX[i]<-X[i]
mM[i]<-M[i]
mY[i]<-Y[i]
sX2[i]<-X[i]^2
sY2[i]<-Y[i]^2
sM2[i]<-M[i]^2
sXM[i]<-X[i]*M[i]
sXY[i]<-X[i]*Y[i]
sMY[i]<-M[i]*Y[i]
}else{
## X & M observed but Y is missing
mX[i]<-X[i]
mM[i]<-M[i]
mY[i]<-pM[3] + pS[3,1:2]%*%solve(pS[1:2,1:2])%*%(c(X[i]-pM[1], M[i]-pM[2]))
sX2[i]<-mX[i]*mX[i]
sM2[i]<-mM[i]*mM[i]
sY2[i]<-mY[i]*mY[i]+pS[3,3]-pS[3,1:2]%*%solve(pS[1:2,1:2])%*%(pS[1:2,3])
sXM[i]<-mX[i]*mM[i]
sXY[i]<-mX[i]*mY[i]
sMY[i]<-mM[i]*mY[i]
}
}else{
## M is missing
if (!is.na(Y[i])){
## X, Y is available but M is missing
mX[i]<-X[i]
mM[i]<-pM[2] + pS[2,c(1,3)]%*%solve(pS[c(1,3),c(1,3)])%*%(c(X[i]-pM[1],Y[i]-pM[3]))
mY[i]<-Y[i]
sX2[i]<-mX[i]*mX[i]
sM2[i]<-mM[i]*mM[i]+pS[2,2]-pS[2,c(1,3)]%*%solve(pS[c(1,3),c(1,3)])%*%(pS[c(1,3),2])
sY2[i]<-mY[i]*mY[i]
sXM[i]<-mX[i]*mM[i]
sXY[i]<-mX[i]*mY[i]
sMY[i]<-mM[i]*mY[i]
}else{
## X is observed but M and Y are missing
mX[i]<-X[i]
mM[i]<-pM[2] + pS[1,2]/pS[1,1]*(X[i]-pM[1])
mY[i]<-pM[3] + pS[1,3]/pS[1,1]*(X[i]-pM[1])
sX2[i]<-mX[i]*mX[i]
sM2[i]<-mM[i]*mM[i] + pS[2,2]-pS[1,2]^2/pS[1,1]
sY2[i]<-mY[i]*mY[i] + pS[3,3]-pS[1,3]^2/pS[1,1]
sXM[i]<-mX[i]*mM[i]
sXY[i]<-mX[i]*mY[i]
sMY[i]<-mM[i]*mY[i] + pS[2,3]-pS[1,2]*pS[1,3]/pS[1,1]
}
}
}else{
## X is missing
if (!is.na(M[i])){
## M is available
if (!is.na(Y[i])){
## X is missing, M & Y are available
mX[i]<-pM[1] + pS[1,2:3]%*%solve(pS[2:3,2:3])%*%(c(M[i]-pM[2], Y[i]-pM[3]))
mM[i]<-M[i]
mY[i]<-Y[i]
sX2[i]<-mX[i]*mX[i]+pS[1,1]-pS[1,2:3]%*%solve(pS[2:3,2:3])%*%pS[2:3,1]
sM2[i]<-mM[i]*mM[i]
sY2[i]<-mY[i]*mY[i]
sXM[i]<-mX[i]*mM[i]
sXY[i]<-mX[i]*mY[i]
sMY[i]<-mM[i]*mY[i]
}else{
## Y and X is missing but M is available
mX[i]<-pM[1]+pS[1,2]/pS[2,2]*(M[i]-pM[2])
mM[i]<-M[i]
mY[i]<-pM[3]+pS[2,3]/pS[2,2]*(M[i]-pM[2])
sX2[i]<-mX[i]*mX[i]+pS[1,1]-pS[1,2]^2/pS[2,2]
sM2[i]<-mM[i]*mM[i]
sY2[i]<-mY[i]*mY[i]+pS[3,3]-pS[2,3]^2/pS[2,2]
sXM[i]<-mX[i]*mM[i]
sXY[i]<-mX[i]*mY[i]+pS[1,3]-pS[1,2]*pS[2,3]/pS[2,2]
sMY[i]<-mM[i]*mY[i]
}
}else{
## X, M is missing
mX[i]<-pM[1]+pS[1,3]/pS[3,3]*(Y[i]-pM[3])
mM[i]<-pM[2]+pS[2,3]/pS[3,3]*(Y[i]-pM[3])
mY[i]<-Y[i]
sX2[i]<-mX[i]*mX[i]+pS[1,1]-pS[1,3]^2/pS[3,3]
sM2[i]<-mM[i]*mM[i]+pS[2,2]-pS[2,3]^2/pS[3,3]
sY2[i]<-mY[i]*mY[i]
sXM[i]<-mX[i]*mM[i]+pS[1,2]-pS[1,3]*pS[2,3]/pS[3,3]
sXY[i]<-mX[i]*mY[i]
sMY[i]<-mM[i]*mY[i]
}
}
}
sM<-c(sum(mX),sum(mM),sum(mY))
sS<-array(c( sum(sX2),sum(sXM),sum(sXY),sum(sXM),sum(sM2),sum(sMY),sum(sXY),sum(sMY),sum(sY2)),dim=c(3,3))
return(list(sM=sM, sS=sS))
}
## bootstrap data using the stratified methods
bootdata<-function(dset){
N<-dim(dset)[1]
pat1<-NULL
pat2<-NULL
pat3<-NULL
pat4<-NULL
pat5<-NULL
pat6<-NULL
pat7<-NULL
n1<-0
n2<-0
n3<-0
n4<-0
n5<-0
n6<-0
n7<-0
## get the number of patterns of the missing data
for (i in 1:N){
if (!is.na(dset[i,1])){
if (!is.na(dset[i,2])){
if (!is.na(dset[i,3])){
pat1<-rbind(pat1,dset[i,])
n1<-n1+1
}else{
pat2<-rbind(pat2,dset[i,])
n2<-n2+1
}
}else{
if (!is.na(dset[i,3])){
pat3<-rbind(pat3,dset[i,])
n3<-n3+1
}else{
pat4<-rbind(pat4,dset[i,])
n4<-n4+1
}
}
}else{
if (!is.na(dset[i,2])){
if (!is.na(dset[i,3])){
pat5<-rbind(pat5,dset[i,])
n5<-n5+1
}else{
pat6<-rbind(pat6,dset[i,])
n6<-n6+1
}
}else{
if (!is.na(dset[i,3])){
pat7<-rbind(pat7,dset[i,])
n7<-n7+1
}
}
}
}
## resample the data
if (n1>0) pat1<-pat1[sample(1:n1, replace=T),]
if (n2>0) pat2<-pat2[sample(1:n2, replace=T),]
if (n3>0) pat3<-pat3[sample(1:n3, replace=T),]
if (n4>0) pat4<-pat4[sample(1:n4, replace=T),]
if (n5>0) pat5<-pat5[sample(1:n5, replace=T),]
if (n6>0) pat6<-pat6[sample(1:n6, replace=T),]
if (n7>0) pat7<-pat7[sample(1:n7, replace=T),]
temp<-rbind(pat1,pat2,pat3,pat4,pat5,pat6,pat7)
return(temp)
}
## Function to run EM, listwise delete, and pairwise delete just once
runonce<-function(dset,N){
## the list wise delete method
isna<-is.na(dset)
sumisna<-apply(isna,1,sum)
listdata<-dset[sumisna==0,]
temp<-cov(listdata,use='complete.obs')
listdel<-par.est(c(mean(listdata[,1]),mean(listdata[,2]),mean(listdata[,3])),temp)
## pairwise delete
temp<-cov(dset,use='pairwise.complete.obs')
pairdel<-par.est(c(mean(dset[,1],na.rm=T),mean(dset[,2],na.rm=T),mean(dset[,3],na.rm=T)),temp)
## EM algorithm
para$pM<-c(mean(listdata[,1]),mean(listdata[,2]),mean(listdata[,3]))
para$pS<-temp
e<-1
while (e>.0000001){
E.res<-E_step(dset,para$pM, para$pS)
M.res<-M_step(E.res$sM, E.res$sS, N)
e<-sum(abs(M.res$pM-para$pM))+sum(abs(M.res$pS-para$pS))
para<-M.res
}
em<-par.est(para$pM,para$pS)
results<-c(listdel, listdel[3]*listdel[4], pairdel, pairdel[3]*pairdel[4], em, em[3]*em[4])
return(results)
}
## jackknife to get the accelerated factor a
jack<-function(dset){
N<-dim(dset)[1]
index<-1:N
jackres<-array(NA, dim=c(N,8))
for (i in 1:N){
jackdata<-dset[index[-i],]
para$pM<-c(mean(jackdata[,1],na.rm=T),mean(jackdata[,2],na.rm=T),mean(jackdata[,3],na.rm=T))
para$pS<-cov(jackdata,use='complete.obs')
e<-1
while (e>.0000001){
E.res<-E_step(jackdata,para$pM, para$pS)
M.res<-M_step(E.res$sM, E.res$sS, N)
e<-sum(abs(M.res$pM-para$pM))+sum(abs(M.res$pS-para$pS))
para<-M.res
}
tempres<-par.est(para$pM,para$pS)
jackres[i,]<-c(tempres, tempres[3]*tempres[4])
}
a<-rep(0,8)
for (j in 1:8){
mu<-mean(jackres[,j])
a[j]<-1/6*sum((mu-jackres[,j])^3)/((sum((mu-jackres)^2))^3/2)
}
return(a)
}
## Function to run the bootstrap method once
bootonce<-function(dset, B, N, alpha){
## run original data to get thetahat
thetahat<-runonce(dset,N)
results<-array(NA, dim=c(B,24))
for (b in 1:B){
bdata<-bootdata(dset)
results[b,]<-runonce(bdata, N)
}
## calculate the confidence interval
alpha<-c(.025, .05, .95, .975)
allCI<-NULL
for (i in 1:24){
z0<-qnorm(sum(results[,i]<thetahat[i])/B)
zalpha<-qnorm(alpha)
tt<-pnorm(2*z0+zalpha)
climit<-quantile(results[,i],tt)
allCI<-c(allCI,thetahat[i],climit)
}
return(allCI)
}
bootonce(dset, 100,100)
## Matrice to store the results
R<-100
pairdel<-array(NA,dim=c(R,7))
em<-array(NA,dim=c(R,7))
true<-array(NA,dim=c(R,7))
listdel<-array(NA,dim=c(R,7))
N<-100
for (j in 1:R){
## dset generation
X<-rnorm(N)
M<-.5+.5*X + sqrt(.1)*rnorm(N)
Y<-1+1*M + .1*X + sqrt(.3)*rnorm(N)
dset<-cbind(X,M,Y)
sampM<-c(sum(X),sum(M),sum(Y))
sampS<-array(c(sum(X^2),sum(X*M),sum(X*Y),sum(X*M),sum(M^2),sum(M*Y),sum(X*Y),sum(M*Y),sum(Y^2)),c(3,3))
## results from complete data analysis
para<-M_step(sampM, sampS, N)
true[j,]<-par.est(para$pM, para$pS)
## For the missing data analysis
for (i in 1:N){
if (runif(1)<.3) M[i]<-NA
if (runif(1)<.3) Y[i]<-NA
if (!is.na(M[i]) | !is.na(Y[i])){
if (runif(1)<.3) X[i]<-NA
}
}
dset<-cbind(X,M,Y)
e<-1
while (e>.0000001){
E.res<-E_step(dset,para$pM, para$pS)
M.res<-M_step(E.res$sM, E.res$sS, N)
e<-sum(abs(M.res$pM-para$pM))+sum(abs(M.res$pS-para$pS))
para<-M.res
}
em[j,]<-par.est(para$pM,para$pS)
## the list wise delete method
isna<-is.na(dset)
sumisna<-apply(isna,1,sum)
listdata<-dset[sumisna==0,]
temp<-cov(listdata,use='complete.obs')
para<-par.est(c(mean(listdata[,1]),mean(listdata[,2]),mean(listdata[,3])),temp)
listdel[j,]<-para
## pairwise delete
temp<-cov(dset,use='pairwise.complete.obs')
para<-par.est(c(mean(dset[,1],na.rm=T),mean(dset[,2],na.rm=T),mean(dset[,3],na.rm=T)),temp)
pairdel[j,]<-para
}
apply(true,2,mean)
apply(pairdel,2,mean)
apply(listdel,2,mean)
apply(em,2,mean)
## function to bootstrap for standard errors
emboot<-function(dset, B, N){
pairdel<-array(NA,dim=c(B,8))
em<-array(NA,dim=c(B,8))
true<-array(NA,dim=c(B,8))
listdel<-array(NA,dim=c(B,8))
for (b in 1:B){
bdset<-dset[sample(1:N,replace=T),]
## results from complete data analysis
sampM<-c(sum(X),sum(M),sum(Y))
sampS<-array(c(sum(X^2),sum(X*M),sum(X*Y),sum(X*M),sum(M^2),sum(M*Y),sum(X*Y),sum(M*Y),sum(Y^2)),c(3,3))
para<-M_step(sampM, sampS, N)
true[j,]<-par.est(para$pM, para$pS)
## For the missing data analysis
for (i in 1:N){
if (runif(1)<.3) M[i]<-NA
if (runif(1)<.3) Y[i]<-NA
if (!is.na(M[i]) | !is.na(Y[i])){
if (runif(1)<.3) X[i]<-NA
}
}
dset<-cbind(X,M,Y)
e<-1
while (e>.0000001){
E.res<-E_step(dset,para$pM, para$pS)
M.res<-M_step(E.res$sM, E.res$sS, N)
e<-sum(abs(M.res$pM-para$pM))+sum(abs(M.res$pS-para$pS))
para<-M.res
}
em[j,]<-par.est(para$pM,para$pS)
## the list wise delete method
isna<-is.na(dset)
sumisna<-apply(isna,1,sum)
listdata<-dset[sumisna==0,]
temp<-cov(listdata,use='complete.obs')
para<-par.est(c(mean(listdata[,1]),mean(listdata[,2]),mean(listdata[,3])),temp)
listdel[j,]<-para
## pairwise delete
temp<-cov(dset,use='pairwise.complete.obs')
para<-par.est(c(mean(dset[,1],na.rm=T),mean(dset[,2],na.rm=T),mean(dset[,3],na.rm=T)),temp)
pairdel[j,]<-para
}
}
lab/bootstrap_ci.txt · Last modified: 2016/01/24 09:48 by 127.0.0.1
