====== Supplementary Material for Zhang (2015) ======
===== R code for data generation =====
library(sn)
library(mvtnorm)
library(normalp)
set.seed(20120301)
### normal data
mL<-5
mS<-2
vL<-2
vS<-1
vLS<-0
vE<-1
TT<-5
N<-500
R<-100
mu<-c(mL,mS)
sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2))
for (k in 1:R){
y<-array(NA, dim=c(N, TT))
LS<-array(NA, dim=c(N, 2))
error<-NULL
for (t in 1:TT){
error<-cbind(error, rnorm(N,df=df)*sqrt(vE))
}
for (i in 1:N){
LS[i,]<-rmvnorm(1, mu, sigma)
for (j in 1:TT){
y[i, j] <- LS[i,1] + LS[i,2]*j + error[i,j]
}
}
filename<-paste('data/data',k, '.txt',sep='')
write.table(y,filename,row.names=F,col.names=F)
}
### t data
mL<-5
mS<-2
vL<-2
vS<-1
vLS<-0
vE<-1/3
df<-3
TT<-5
N<-500
R<-100
mu<-c(mL,mS)
sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2))
for (k in 1:R){
y<-array(NA, dim=c(N, TT))
LS<-array(NA, dim=c(N, 2))
error<-NULL
for (t in 1:TT){
error<-cbind(error, rt(N,df=df)*sqrt(vE))
}
for (i in 1:N){
LS[i,]<-rmvnorm(1, mu, sigma)
for (j in 1:TT){
y[i, j] <- LS[i,1] + LS[i,2]*j + error[i,j]
}
}
filename<-paste('data/data',k, '.txt',sep='')
write.table(y,filename,row.names=F,col.names=F)
}
### Exponential power data
sig<-c(1,1,1,1)
beta<- c(-.8, -.8, .8, .8) #rep(-.8, 5)
mL<-5
mS<-2
vL<-2
vS<-1
vLS<-0
TT<-4
N<-500
p<-2/(1+beta)
s<-sig*sqrt(gamma(1/p)/gamma(3/p))/p^(1/p)
mu<-c(mL,mS)
sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2))
y<-array(NA, dim=c(N, TT))
LS<-array(NA, dim=c(N, 2))
for (k in 1:100){
error<-NULL
for (t in 1:TT){
error<-cbind(error, rnormp(N,0,s[t],p[t]))
}
for (i in 1:N){
LS[i,]<-rmvnorm(1, mu, sigma)
for (j in 1:TT){
y[i, j] <- LS[i,1] + LS[i,2]*j + error[i,j]
}
}
filename<-paste('data/t4data',k, '.txt',sep='')
write.table(y,filename,row.names=F,col.names=F)
}
## T=5
sig<-c(1,1,1,1,1)
beta<- c(-.8, -.8, 0, .8, .8) #rep(-.8, 5)
mL<-5
mS<-2
vL<-2
vS<-1
vLS<-0
TT<-5
N<-500
p<-2/(1+beta)
s<-sig*sqrt(gamma(1/p)/gamma(3/p))/p^(1/p)
mu<-c(mL,mS)
sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2))
y<-array(NA, dim=c(N, TT))
LS<-array(NA, dim=c(N, 2))
for (k in 1:100){
error<-NULL
for (t in 1:TT){
error<-cbind(error, rnormp(N,0,s[t],p[t]))
}
for (i in 1:N){
LS[i,]<-rmvnorm(1, mu, sigma)
for (j in 1:TT){
y[i, j] <- LS[i,1] + LS[i,2]*j + error[i,j]
}
}
filename<-paste('data/t5data',k, '.txt',sep='')
write.table(y,filename,row.names=F,col.names=F)
}
### Skew normal data
mL<-5
mS<-2
vL<-2
vS<-1
vLS<-0
vE<-sqrt(pi/(pi-1))
alpha <- 10
delta<-alpha/sqrt(1+alpha^2)
omega<-sqrt(1/(1-2*delta^2/pi))
s.mu<- -omega*delta*sqrt(2/pi)
TT<-5
N<-500
mu<-c(mL,mS)
sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2))
y<-array(NA, dim=c(N, TT))
LS<-array(NA, dim=c(N, 2))
my<-array(NA, dim=c(N, TT))
for (k in 1:100){
error<-NULL
for (t in 1:TT){
error<-cbind(error, rsn(N,s.mu, omega, alpha))
}
for (i in 1:N){
LS[i,]<-rmvnorm(1, mu, sigma)
for (j in 1:TT){
my[i,j]<- LS[i,1] + LS[i,2]*j
#y[i, j] <- rsn(1,my[i,j],vE,alpha) #+ error[i,j]
y[i, j] <- my[i,j] + error[i,j]
}
}
filename<-paste('data/data',k, '.txt',sep='')
write.table(y,filename,row.names=F,col.names=F)
}
===== Mplus code for bootstrap standard errors of growth curve models =====
TITLE: categorical DFM
DATA: FILE = data.txt;
VARIABLE: NAMES = y1-y4;
USEVARIABLES = y1-y4
ANALYSIS:
BOOTSTRAP = 1000;
MODEL:
i s | y1@0 y2@1 y3@2 y4@3;
y1-y4 (a);
===== SAS code =====
==== SAS codes for the growth curve model with the normal error distribution ====
PROC MCMC DATA=ndata NBI=5000 NMC=40000 THIN=2 SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE) STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL) OUTPOST=histnorm;
ODS SELECT PARAMETERS POSTSUMMARIES POSTINTERVALS GEWEKE ESS TADPANEL;
ARRAY b[2] L S;
ARRAY beta[2];
ARRAY Sigma_b[2,2];
ARRAY beta0[2] (0 0);
ARRAY sigma0[2,2] (1000 0 0 1000);
ARRAY V[2,2] (1 0 0 1);
PARMS beta {5 2};
PARMS sigma_b {1 0 0 1};
PARMS var_e 1;
PRIOR beta ~ MVN(beta0, sigma0);
PRIOR Sigma_b ~ IWISH(2, V);
PRIOR var_e ~ IGAMMA(0.001, SCALE=0.001);
RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id;
mu = L + S * time;
MODEL y ~ NORMAL(mu, var=var_e);
RUN;
==== SAS codes for the growth curve model with the t error distribution ====
PROC MCMC DATA=ndata NMC=40000 NBI=5000 THIN=2
DIC SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE)
STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL)
OUTPOST=histt;
ODS SELECT PARAMETERS POSTSUMMARIES POSTINTERVALS GEWEKE ESS TADPANEL;
ARRAY b[2] L S;
ARRAY beta[2];
ARRAY Sigma_b[2,2];
ARRAY beta0[2] (0 0);
ARRAY sigma0[2,2] (1000 0 0 1000);
ARRAY V[2,2] (1 0 0 1);
PARMS beta {5 2};
PARMS sigma_b {1 0 0 1};
PARMS sigma_e 1;
PARMS g1 0;
PARMS g2 0;
PARMS g3 0;
PARMS g4 0;
PARMS g5 0;
PRIOR beta ~ MVN(beta0, sigma0);
PRIOR Sigma_b ~ IWISH(2, V);
PRIOR sigma_e ~ IGAMMA(0.01, SCALE=0.01);
PRIOR g:~UNIFORM(-.6,5);
RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id;
mu = L + S * time;
g = g1*(time=1)+ g2*(time=2)+g3*(time=3)+g4*(time=4)+g5*(time=5);
p = 1+g;
ll = -LOG(sigma_e) + .5*LGAMMA(1.5*p)-1.5*LGAMMA(p/2)-log(p) - (abs(y-mu)/sigma_e)**(2/p) * (GAMMA(1.5*p)/GAMMA(.5*p))**(1/p);
var_e = sigma_e*sigma_e;
MODEL y ~ general(ll);
RUN;
==== SAS codes for the growth curve model with the exponential power error distribution ====
PROC MCMC DATA=ndata NMC=100000 NBI=5000 THIN=2 MONITOR=(_PARMS_ var_e)
DIC SEED=13 INIT=RANDOM DIAG=(ESS GEWEKE(F1=.2 F2=.5))
STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL)
OUTPOST=histged;
ODS SELECT PARAMETERS POSTSUMMARIES POSTINTERVALS GEWEKE ESS TADPANEL;
ARRAY b[2] L S;
ARRAY beta[2];
ARRAY Sigma_b[2,2];
ARRAY beta0[2] (0 0);
ARRAY sigma0[2,2] (1000 0 0 1000);
ARRAY V[2,2] (1 0 0 1);
PARMS beta {5 2};
PARMS sigma_b {1 0 0 1};
PARMS sigma_e 1;
PARMS g1 0;
PARMS g2 0;
PARMS g3 0;
PARMS g4 0;
PARMS g5 0;
PRIOR beta ~ MVN(beta0, sigma0);
PRIOR Sigma_b ~ IWISH(2, V);
PRIOR sigma_e ~ IGAMMA(0.01, SCALE=0.01);
PRIOR g:~UNIFORM(-.6,5);
RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id;
mu = L + S * time;
g = g1*(time=1)+ g2*(time=2)+g3*(time=3)+g4*(time=4)+g5*(time=5);
p = 1+g;
ll = -LOG(sigma_e) + .5*LGAMMA(1.5*p)-1.5*LGAMMA(p/2)-log(p) - (abs(y-mu)/sigma_e)**(2/p) * (GAMMA(1.5*p)/GAMMA(.5*p))**(1/p);
var_e = sigma_e*sigma_e;
MODEL y ~ general(ll);
RUN;
==== SAS codes for the growth curve model with the skew normal error distribution ====
PROC MCMC DATA=ndata NMC=40000 NBI=5000 THIN=2
DIC SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE)
STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL) MONITOR=(_PARMS_ var_e)
OUTPOST=histsn;
ODS SELECT PARAMETERS POSTSUMMARIES POSTINTERVALS GEWEKE ESS TADPANEL;
ARRAY b[2] L S;
ARRAY beta[2];
ARRAY Sigma_b[2,2];
ARRAY beta0[2] (0 0);
ARRAY sigma0[2,2] (1000 0 0 1000);
ARRAY V[2,2] (1 0 0 1);
PARMS beta {5 2};
PARMS sigma_b {1 0 0 1};
PARMS sigma_e 1;
PARMS alpha 0;
PRIOR beta ~ MVN(beta0, sigma0);
PRIOR Sigma_b ~ IWISH(2, V);
PRIOR sigma_e ~ IGAMMA(0.01, SCALE=0.01);
PRIOR alpha~UNIFORM(-.5,.5);
RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id;
mu = L + S * time;
var_e = sigma_e*sigma_e;
e = y - mu;
xi = -sigma_e*alpha/sqrt(1+alpha*alpha)*sqrt(2/3.1415926);
ll = log(2)-log(sigma_e)+logpdf('normal',(e-xi)/sigma_e,0,1)+logcdf('normal',alpha*(e-xi)/sigma_e,0,1);
MODEL e ~ general(ll);
RUN;
===== Procedure to run the simulation =====
The basic idea is to run SAS within R to conduct the simulation. First, an R script is used to copy data and then run a SAS script to conduct Bayesian analysis. Second, within the SAS script, the results are saved into .txt files. Third, R then reads in the SAS results and combines simulation results.
**//Bayesian analysis can be time consuming. The code has been run on high performance computers and was not tested on personal computers.//**
The R code for running the simulation for t data is
for (SS in c(100,300,500)){
for (TT in 4:5){
for (i in 1:100){
file.name<-paste('data/data',i,'.txt', sep='')
y<-read.table(file.name)
y<-y[1:SS, 1:TT]
file.name<-paste('data.txt', sep='')
dset<-cbind(rep(1:TT, each=SS),rep(1:SS,TT),c(y))
write.table(dset,'ndata.txt',row.names=F,col.names=F)
system(paste('sas tsas.sas', sep=''))
out<-paste('output/norm-SS-',SS,'TT', TT,'-', i, '.txt', sep='')
temp<-try(file.copy('histnorm.txt', out, overwrite=T))
out<-paste('output/t-SS-',SS,'TT', TT,'-', i, '.txt', sep='')
temp<-try(file.copy('histt.txt', out, overwrite=T))
}
}
}
The complete SAS script is
* Normal - t data;
data ndata;
infile "ndata.txt";
input time id y;
run;
PROC MCMC DATA=ndata NMC=40000 NBI=5000 THIN=2
DIC SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE(F1=.2 F2=.5))
STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL)
OUTPOST=histnorm;
ODS SELECT PARAMETERS POSTSUMMARIES
TADPANEL GEWEKE ESS DIC POSTINTERVALS;
ARRAY b[2] L S;
ARRAY beta[2];
ARRAY Sigma_b[2,2];
ARRAY beta0[2] (0 0);
ARRAY sigma0[2,2] (1000 0 0 1000);
ARRAY V[2,2] (1 0 0 1);
PARMS beta {5 2};
PARMS sigma_b {1 0 0 1};
PARMS var_e 1;
PRIOR beta ~ MVN(beta0, sigma0);
PRIOR Sigma_b ~ IWISH(2, V);
PRIOR var_e ~ IGAMMA(0.001, SCALE=0.001);
RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id;
mu = L + S * time;
MODEL y ~ NORMAL(mu, var=var_e);
RUN;
/*
proc sgplot data=histnorm;
series x=Iteration y=beta1;
yaxis label="beta0";
run;
*/
data _NULL_;
set histnorm;
file "histnorm.txt";
put Iteration beta1 beta2 Sigma_b1 Sigma_b2 Sigma_b4 var_e;
run;
PROC MCMC DATA=ndata NMC=80000 NBI=5000 THIN=2
DIC SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE(F1=.2 F2=.5))
STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL)
OUTPOST=histt;
ODS SELECT PARAMETERS POSTSUMMARIES
TADPANEL GEWEKE ESS DIC POSTINTERVALS;
ARRAY b[2] L S;
ARRAY beta[2];
ARRAY Sigma_b[2,2];
ARRAY beta0[2] (0 0);
ARRAY sigma0[2,2] (1000 0 0 1000);
ARRAY V[2,2] (1 0 0 1);
PARMS beta {5 2};
PARMS sigma_b {1 0 0 1};
PARMS var_e 1;
PARMS df 5;
PRIOR beta ~ MVN(beta0, sigma0);
PRIOR Sigma_b ~ IWISH(2, V);
PRIOR var_e ~ IGAMMA(0.01, SCALE=0.01);
PRIOR df~UNIFORM(1,500);
RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id;
mu = L + S * time;
MODEL y ~ t(mu, var=var_e, df);
RUN;
/*
proc sgplot data=histt;
series x=Iteration y=beta1;
yaxis label="beta0";
run;
*/
data _NULL_;
set histt;
file "histt.txt";
put Iteration beta1 beta2 Sigma_b1 Sigma_b2 Sigma_b4 var_e df;
run;