## EM for the mediation model
## function to calculate the parameters
est<-function(S){
sX2<-S[1]
sM2<-S[2]
sY2<-S[3]
sXM<-S[4]
sXY<-S[5]
sMY<-S[6]
mX <-S[7]
mM <-S[8]
mY <-S[9]
pa<-sXM/sX2
pb<-(sMY*sX2-sXM*sXY)/(sX2*sM2-sXM^2)
pc<-(sXY*sM2-sXM*sMY)/(sX2*sM2-sXM^2)
ps1<-sX2
ps2<-sM2-sXM^2/sX2
ps3<-(sX2*sM2*sY2-sX2*sMY^2-sM2*sXY^2-sY2*sXM^2+2*sXM*sXY*sMY)/(sX2*sM2-sXM^2)
pm1<-mX
pm2<-mM - pa*mX
pm3<-mY - pb*mM - pc*mX
return(c(pa,pb,pc,ps1,ps2,ps3,pm1,pm2,pm3))
}
## expectation step
EMe<-function(dset, para){
X<-dset[,1]
M<-dset[,2]
Y<-dset[,3]
pa<-para[1]
pb<-para[2]
pc<-para[3]
ps1<-para[4]
ps2<-para[5]
ps3<-para[6]
pm1<-para[7]
pm2<-para[8]
pm3<-para[9]
n<-dim(dset)[1]
sX2<-rep(0,n)
sM2<-rep(0,n)
sY2<-rep(0,n)
sXM<-rep(0,n)
sXY<-rep(0,n)
sMY<-rep(0,n)
mX <-rep(0,n)
mM <-rep(0,n)
mY <-rep(0,n)
m<-n-1
for (i in 1:n){
if (!is.na(M[i])){
if (!is.na(Y[i])){
## for complete dset case
mX[i] =X[i]
mM[i] =M[i]
mY[i] =Y[i]
sX2[i]= X[i]^2
sM2[i]= M[i]^2
sY2[i]= Y[i]^2
sXM[i]= X[i]*M[i]
sXY[i]= X[i]*Y[i]
sMY[i]= M[i]*Y[i]
}else{
## for complete X, M but missing Y
mX[i] =X[i]
mM[i] =M[i]
mY[i] = pm3 + pb*M[i]+pc*X[i]
sX2[i]= X[i]^2
sM2[i]= M[i]^2
sY2[i]= pm3^2 + pb^2*M[i]^2+pc^2*X[i]^2+2*pb*pc*X[i]*M[i]+2*pm3*pb*M[i]+2*pm3*pc*X[i]+ps3
sXM[i]= X[i]*M[i]
sXY[i]= pm3*X[i] + pb*X[i]*M[i]+pc*X[i]^2
sMY[i]= pm3*M[i] + pc*X[i]*M[i]+pb*M[i]^2
}
}else{
if (!is.na(Y[i])){
## for complete X, Y but missing M
mX[i] =X[i]
mM[i] =pm2 + pa*X[i]
mY[i] =Y[i]
sX2[i]= X[i]^2
sM2[i]= pm2^2 + 2*pm2*pa*X[i]+pa^2*X[i]^2+ps2
sY2[i]= Y[i]^2
sXM[i]= pm2*X[i]+pa*X[i]^2
sXY[i]= X[i]*Y[i]
sMY[i]= pm2*Y[i]+pa*X[i]*Y[i]
}else{
## for complete X but missing M, Y
mX[i] =X[i]
mM[i] = pm2 + pa*X[i]
mY[i] = pm3 + pm2*pb + (pa*pb+pc)*X[i]
sX2[i]= X[i]^2
sM2[i]= pm2^2+pa^2*X[i]^2+2*pm2*pa*X[i]+ps2
sY2[i]= (pm3+pb*pm2)^2 + (pa*pb+pc)^2*X[i]^2+ 2*(pm3+pb*pm2)*(pa*pb+pc)*X[i] + pb^2*ps2+ps3
sXM[i]= pm2*X[i]+pa*X[i]^2
sXY[i]= (pm3+pb*pm2)*X[i]+(pa*pb+pc)*X[i]^2
sMY[i]= pm2*(pm3+pb*pm2)+pm2*(pa*pb+pc)*X[i]+pa*(pm3+pb*pm2)*X[i]+pa*(pa*pb+pc)*X[i]^2+pb*ps2
}
}
}
## calculate the covariance matrix
CM <- rep(0, 9)
CM[7] <- sum(mX)/n
CM[8] <- sum(mM)/n
CM[9] <- sum(mY)/n
CM[1] <- sum(sX2)/m - n/m*CM[7]^2
CM[2] <- sum(sM2)/m - n/m*CM[8]^2
CM[3] <- sum(sY2)/m - n/m*CM[9]^2
CM[4] <- sum(sXM)/m - n/m*CM[7]*CM[8]
CM[5] <- sum(sXY)/m - n/m*CM[7]*CM[9]
CM[6] <- sum(sMY)/m - n/m*CM[9]*CM[8]
return(CM)
}
## Simulation to compare the EM method and the listwise delete method
## Matrice to store the results
R<-1000
pairdel<-array(NA,dim=c(R,9))
em<-array(NA,dim=c(R,9))
true<-array(NA,dim=c(R,9))
listdel<-array(NA,dim=c(R,9))
ml<-array(NA,dim=c(R,9))
sampN<-rep(0,R)
N<-100
for (j in 1:R){
## dset generation
X<-rnorm(N)
M<-.5*X + .1*rnorm(N)
Y<-.5*M + .1*X + .1*rnorm(N)
temp<-cov(cbind(X,M,Y))
par<-est(c(temp[1,1],temp[2,2],temp[3,3],temp[1,2],temp[1,3],temp[2,3],mean(X),mean(M),mean(Y)))
true[j,]<-par
## Create missing dset
for (i in 1:N){
p1<-1/(1+exp(2-.1*X))
if (runif(1)<p1[i]){ M[i]<-NA }
if (runif(1)<p1[i]){ Y[i]<-NA }
}
dset<-cbind(X,M,Y)
## EM methods
e<-1
while (e>.00001){
SS<-EMe(dset, par)
para<-est(SS)
e<-sum(abs(para-par))
par<-para
# print(par,digits=10)
}
em[j,]<-para
## ML method
## save the data
write.table(dset, "data.dat", na='.',row.names=F, col.names=F)
system('c:\\programs\\mplus\\mplus.exe mle.inp',show.output.on.console = F)
tempres<-scan('est.txt',quiet=T)
ml[j,]<-tempres[c(6,4,5,9,8,7,3,2,1)]
## the list wise delete method
isna<-is.na(dset)
sumisna<-apply(isna,1,sum)
listdata<-dset[sumisna==0,]
sampN[j]<-dim(listdata)[1]
temp<-cov(listdata,use='complete.obs')
par<-est(c(temp[1,1],temp[2,2],temp[3,3],temp[1,2],temp[1,3],temp[2,3],mean(listdata[,1]),mean(listdata[,2]),mean(listdata[,3])))
listdel[j,]<-par
## pairwise delete
temp<-cov(dset,use='pairwise.complete.obs')
par<-est(c(temp[1,1],temp[2,2],temp[3,3],temp[1,2],temp[1,3],temp[2,3],mean(dset[,1],na.rm=T),mean(dset[,2],na.rm=T),mean(dset[,3],na.rm=T)))
pairdel[j,]<-par
}
apply(true,2,mean)
apply(pairdel,2,mean)
apply(em,2,mean)
apply(listdel,2,mean)
apply(ml,2,mean)