====== Supplementary Material for Zhang (2015) ====== ===== R code for data generation ===== library(sn) library(mvtnorm) library(normalp) set.seed(20120301) ### normal data mL<-5 mS<-2 vL<-2 vS<-1 vLS<-0 vE<-1 TT<-5 N<-500 R<-100 mu<-c(mL,mS) sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2)) for (k in 1:R){ y<-array(NA, dim=c(N, TT)) LS<-array(NA, dim=c(N, 2)) error<-NULL for (t in 1:TT){ error<-cbind(error, rnorm(N,df=df)*sqrt(vE)) } for (i in 1:N){ LS[i,]<-rmvnorm(1, mu, sigma) for (j in 1:TT){ y[i, j] <- LS[i,1] + LS[i,2]*j + error[i,j] } } filename<-paste('data/data',k, '.txt',sep='') write.table(y,filename,row.names=F,col.names=F) } ### t data mL<-5 mS<-2 vL<-2 vS<-1 vLS<-0 vE<-1/3 df<-3 TT<-5 N<-500 R<-100 mu<-c(mL,mS) sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2)) for (k in 1:R){ y<-array(NA, dim=c(N, TT)) LS<-array(NA, dim=c(N, 2)) error<-NULL for (t in 1:TT){ error<-cbind(error, rt(N,df=df)*sqrt(vE)) } for (i in 1:N){ LS[i,]<-rmvnorm(1, mu, sigma) for (j in 1:TT){ y[i, j] <- LS[i,1] + LS[i,2]*j + error[i,j] } } filename<-paste('data/data',k, '.txt',sep='') write.table(y,filename,row.names=F,col.names=F) } ### Exponential power data sig<-c(1,1,1,1) beta<- c(-.8, -.8, .8, .8) #rep(-.8, 5) mL<-5 mS<-2 vL<-2 vS<-1 vLS<-0 TT<-4 N<-500 p<-2/(1+beta) s<-sig*sqrt(gamma(1/p)/gamma(3/p))/p^(1/p) mu<-c(mL,mS) sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2)) y<-array(NA, dim=c(N, TT)) LS<-array(NA, dim=c(N, 2)) for (k in 1:100){ error<-NULL for (t in 1:TT){ error<-cbind(error, rnormp(N,0,s[t],p[t])) } for (i in 1:N){ LS[i,]<-rmvnorm(1, mu, sigma) for (j in 1:TT){ y[i, j] <- LS[i,1] + LS[i,2]*j + error[i,j] } } filename<-paste('data/t4data',k, '.txt',sep='') write.table(y,filename,row.names=F,col.names=F) } ## T=5 sig<-c(1,1,1,1,1) beta<- c(-.8, -.8, 0, .8, .8) #rep(-.8, 5) mL<-5 mS<-2 vL<-2 vS<-1 vLS<-0 TT<-5 N<-500 p<-2/(1+beta) s<-sig*sqrt(gamma(1/p)/gamma(3/p))/p^(1/p) mu<-c(mL,mS) sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2)) y<-array(NA, dim=c(N, TT)) LS<-array(NA, dim=c(N, 2)) for (k in 1:100){ error<-NULL for (t in 1:TT){ error<-cbind(error, rnormp(N,0,s[t],p[t])) } for (i in 1:N){ LS[i,]<-rmvnorm(1, mu, sigma) for (j in 1:TT){ y[i, j] <- LS[i,1] + LS[i,2]*j + error[i,j] } } filename<-paste('data/t5data',k, '.txt',sep='') write.table(y,filename,row.names=F,col.names=F) } ### Skew normal data mL<-5 mS<-2 vL<-2 vS<-1 vLS<-0 vE<-sqrt(pi/(pi-1)) alpha <- 10 delta<-alpha/sqrt(1+alpha^2) omega<-sqrt(1/(1-2*delta^2/pi)) s.mu<- -omega*delta*sqrt(2/pi) TT<-5 N<-500 mu<-c(mL,mS) sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2)) y<-array(NA, dim=c(N, TT)) LS<-array(NA, dim=c(N, 2)) my<-array(NA, dim=c(N, TT)) for (k in 1:100){ error<-NULL for (t in 1:TT){ error<-cbind(error, rsn(N,s.mu, omega, alpha)) } for (i in 1:N){ LS[i,]<-rmvnorm(1, mu, sigma) for (j in 1:TT){ my[i,j]<- LS[i,1] + LS[i,2]*j #y[i, j] <- rsn(1,my[i,j],vE,alpha) #+ error[i,j] y[i, j] <- my[i,j] + error[i,j] } } filename<-paste('data/data',k, '.txt',sep='') write.table(y,filename,row.names=F,col.names=F) } ===== Mplus code for bootstrap standard errors of growth curve models ===== TITLE: categorical DFM DATA: FILE = data.txt; VARIABLE: NAMES = y1-y4; USEVARIABLES = y1-y4 ANALYSIS: BOOTSTRAP = 1000; MODEL: i s | y1@0 y2@1 y3@2 y4@3; y1-y4 (a); ===== SAS code ===== ==== SAS codes for the growth curve model with the normal error distribution ==== PROC MCMC DATA=ndata NBI=5000 NMC=40000 THIN=2 SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE) STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL) OUTPOST=histnorm; ODS SELECT PARAMETERS POSTSUMMARIES POSTINTERVALS GEWEKE ESS TADPANEL; ARRAY b[2] L S; ARRAY beta[2]; ARRAY Sigma_b[2,2]; ARRAY beta0[2] (0 0); ARRAY sigma0[2,2] (1000 0 0 1000); ARRAY V[2,2] (1 0 0 1); PARMS beta {5 2}; PARMS sigma_b {1 0 0 1}; PARMS var_e 1; PRIOR beta ~ MVN(beta0, sigma0); PRIOR Sigma_b ~ IWISH(2, V); PRIOR var_e ~ IGAMMA(0.001, SCALE=0.001); RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id; mu = L + S * time; MODEL y ~ NORMAL(mu, var=var_e); RUN; ==== SAS codes for the growth curve model with the t error distribution ==== PROC MCMC DATA=ndata NMC=40000 NBI=5000 THIN=2 DIC SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE) STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL) OUTPOST=histt; ODS SELECT PARAMETERS POSTSUMMARIES POSTINTERVALS GEWEKE ESS TADPANEL; ARRAY b[2] L S; ARRAY beta[2]; ARRAY Sigma_b[2,2]; ARRAY beta0[2] (0 0); ARRAY sigma0[2,2] (1000 0 0 1000); ARRAY V[2,2] (1 0 0 1); PARMS beta {5 2}; PARMS sigma_b {1 0 0 1}; PARMS sigma_e 1; PARMS g1 0; PARMS g2 0; PARMS g3 0; PARMS g4 0; PARMS g5 0; PRIOR beta ~ MVN(beta0, sigma0); PRIOR Sigma_b ~ IWISH(2, V); PRIOR sigma_e ~ IGAMMA(0.01, SCALE=0.01); PRIOR g:~UNIFORM(-.6,5); RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id; mu = L + S * time; g = g1*(time=1)+ g2*(time=2)+g3*(time=3)+g4*(time=4)+g5*(time=5); p = 1+g; ll = -LOG(sigma_e) + .5*LGAMMA(1.5*p)-1.5*LGAMMA(p/2)-log(p) - (abs(y-mu)/sigma_e)**(2/p) * (GAMMA(1.5*p)/GAMMA(.5*p))**(1/p); var_e = sigma_e*sigma_e; MODEL y ~ general(ll); RUN; ==== SAS codes for the growth curve model with the exponential power error distribution ==== PROC MCMC DATA=ndata NMC=100000 NBI=5000 THIN=2 MONITOR=(_PARMS_ var_e) DIC SEED=13 INIT=RANDOM DIAG=(ESS GEWEKE(F1=.2 F2=.5)) STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL) OUTPOST=histged; ODS SELECT PARAMETERS POSTSUMMARIES POSTINTERVALS GEWEKE ESS TADPANEL; ARRAY b[2] L S; ARRAY beta[2]; ARRAY Sigma_b[2,2]; ARRAY beta0[2] (0 0); ARRAY sigma0[2,2] (1000 0 0 1000); ARRAY V[2,2] (1 0 0 1); PARMS beta {5 2}; PARMS sigma_b {1 0 0 1}; PARMS sigma_e 1; PARMS g1 0; PARMS g2 0; PARMS g3 0; PARMS g4 0; PARMS g5 0; PRIOR beta ~ MVN(beta0, sigma0); PRIOR Sigma_b ~ IWISH(2, V); PRIOR sigma_e ~ IGAMMA(0.01, SCALE=0.01); PRIOR g:~UNIFORM(-.6,5); RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id; mu = L + S * time; g = g1*(time=1)+ g2*(time=2)+g3*(time=3)+g4*(time=4)+g5*(time=5); p = 1+g; ll = -LOG(sigma_e) + .5*LGAMMA(1.5*p)-1.5*LGAMMA(p/2)-log(p) - (abs(y-mu)/sigma_e)**(2/p) * (GAMMA(1.5*p)/GAMMA(.5*p))**(1/p); var_e = sigma_e*sigma_e; MODEL y ~ general(ll); RUN; ==== SAS codes for the growth curve model with the skew normal error distribution ==== PROC MCMC DATA=ndata NMC=40000 NBI=5000 THIN=2 DIC SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE) STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL) MONITOR=(_PARMS_ var_e) OUTPOST=histsn; ODS SELECT PARAMETERS POSTSUMMARIES POSTINTERVALS GEWEKE ESS TADPANEL; ARRAY b[2] L S; ARRAY beta[2]; ARRAY Sigma_b[2,2]; ARRAY beta0[2] (0 0); ARRAY sigma0[2,2] (1000 0 0 1000); ARRAY V[2,2] (1 0 0 1); PARMS beta {5 2}; PARMS sigma_b {1 0 0 1}; PARMS sigma_e 1; PARMS alpha 0; PRIOR beta ~ MVN(beta0, sigma0); PRIOR Sigma_b ~ IWISH(2, V); PRIOR sigma_e ~ IGAMMA(0.01, SCALE=0.01); PRIOR alpha~UNIFORM(-.5,.5); RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id; mu = L + S * time; var_e = sigma_e*sigma_e; e = y - mu; xi = -sigma_e*alpha/sqrt(1+alpha*alpha)*sqrt(2/3.1415926); ll = log(2)-log(sigma_e)+logpdf('normal',(e-xi)/sigma_e,0,1)+logcdf('normal',alpha*(e-xi)/sigma_e,0,1); MODEL e ~ general(ll); RUN; ===== Procedure to run the simulation ===== The basic idea is to run SAS within R to conduct the simulation. First, an R script is used to copy data and then run a SAS script to conduct Bayesian analysis. Second, within the SAS script, the results are saved into .txt files. Third, R then reads in the SAS results and combines simulation results. **//Bayesian analysis can be time consuming. The code has been run on high performance computers and was not tested on personal computers.//** The R code for running the simulation for t data is for (SS in c(100,300,500)){ for (TT in 4:5){ for (i in 1:100){ file.name<-paste('data/data',i,'.txt', sep='') y<-read.table(file.name) y<-y[1:SS, 1:TT] file.name<-paste('data.txt', sep='') dset<-cbind(rep(1:TT, each=SS),rep(1:SS,TT),c(y)) write.table(dset,'ndata.txt',row.names=F,col.names=F) system(paste('sas tsas.sas', sep='')) out<-paste('output/norm-SS-',SS,'TT', TT,'-', i, '.txt', sep='') temp<-try(file.copy('histnorm.txt', out, overwrite=T)) out<-paste('output/t-SS-',SS,'TT', TT,'-', i, '.txt', sep='') temp<-try(file.copy('histt.txt', out, overwrite=T)) } } } The complete SAS script is * Normal - t data; data ndata; infile "ndata.txt"; input time id y; run; PROC MCMC DATA=ndata NMC=40000 NBI=5000 THIN=2 DIC SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE(F1=.2 F2=.5)) STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL) OUTPOST=histnorm; ODS SELECT PARAMETERS POSTSUMMARIES TADPANEL GEWEKE ESS DIC POSTINTERVALS; ARRAY b[2] L S; ARRAY beta[2]; ARRAY Sigma_b[2,2]; ARRAY beta0[2] (0 0); ARRAY sigma0[2,2] (1000 0 0 1000); ARRAY V[2,2] (1 0 0 1); PARMS beta {5 2}; PARMS sigma_b {1 0 0 1}; PARMS var_e 1; PRIOR beta ~ MVN(beta0, sigma0); PRIOR Sigma_b ~ IWISH(2, V); PRIOR var_e ~ IGAMMA(0.001, SCALE=0.001); RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id; mu = L + S * time; MODEL y ~ NORMAL(mu, var=var_e); RUN; /* proc sgplot data=histnorm; series x=Iteration y=beta1; yaxis label="beta0"; run; */ data _NULL_; set histnorm; file "histnorm.txt"; put Iteration beta1 beta2 Sigma_b1 Sigma_b2 Sigma_b4 var_e; run; PROC MCMC DATA=ndata NMC=80000 NBI=5000 THIN=2 DIC SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE(F1=.2 F2=.5)) STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL) OUTPOST=histt; ODS SELECT PARAMETERS POSTSUMMARIES TADPANEL GEWEKE ESS DIC POSTINTERVALS; ARRAY b[2] L S; ARRAY beta[2]; ARRAY Sigma_b[2,2]; ARRAY beta0[2] (0 0); ARRAY sigma0[2,2] (1000 0 0 1000); ARRAY V[2,2] (1 0 0 1); PARMS beta {5 2}; PARMS sigma_b {1 0 0 1}; PARMS var_e 1; PARMS df 5; PRIOR beta ~ MVN(beta0, sigma0); PRIOR Sigma_b ~ IWISH(2, V); PRIOR var_e ~ IGAMMA(0.01, SCALE=0.01); PRIOR df~UNIFORM(1,500); RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id; mu = L + S * time; MODEL y ~ t(mu, var=var_e, df); RUN; /* proc sgplot data=histt; series x=Iteration y=beta1; yaxis label="beta0"; run; */ data _NULL_; set histt; file "histt.txt"; put Iteration beta1 beta2 Sigma_b1 Sigma_b2 Sigma_b4 var_e df; run;