research:supplementary:brm2015
Table of Contents
Supplementary Material for Zhang (2015)
R code for data generation
- 1 |h show/hide code |h
library(sn) library(mvtnorm) library(normalp) set.seed(20120301) ### normal data mL<-5 mS<-2 vL<-2 vS<-1 vLS<-0 vE<-1 TT<-5 N<-500 R<-100 mu<-c(mL,mS) sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2)) for (k in 1:R){ y<-array(NA, dim=c(N, TT)) LS<-array(NA, dim=c(N, 2)) error<-NULL for (t in 1:TT){ error<-cbind(error, rnorm(N,df=df)*sqrt(vE)) } for (i in 1:N){ LS[i,]<-rmvnorm(1, mu, sigma) for (j in 1:TT){ y[i, j] <- LS[i,1] + LS[i,2]*j + error[i,j] } } filename<-paste('data/data',k, '.txt',sep='') write.table(y,filename,row.names=F,col.names=F) } ### t data mL<-5 mS<-2 vL<-2 vS<-1 vLS<-0 vE<-1/3 df<-3 TT<-5 N<-500 R<-100 mu<-c(mL,mS) sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2)) for (k in 1:R){ y<-array(NA, dim=c(N, TT)) LS<-array(NA, dim=c(N, 2)) error<-NULL for (t in 1:TT){ error<-cbind(error, rt(N,df=df)*sqrt(vE)) } for (i in 1:N){ LS[i,]<-rmvnorm(1, mu, sigma) for (j in 1:TT){ y[i, j] <- LS[i,1] + LS[i,2]*j + error[i,j] } } filename<-paste('data/data',k, '.txt',sep='') write.table(y,filename,row.names=F,col.names=F) } ### Exponential power data sig<-c(1,1,1,1) beta<- c(-.8, -.8, .8, .8) #rep(-.8, 5) mL<-5 mS<-2 vL<-2 vS<-1 vLS<-0 TT<-4 N<-500 p<-2/(1+beta) s<-sig*sqrt(gamma(1/p)/gamma(3/p))/p^(1/p) mu<-c(mL,mS) sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2)) y<-array(NA, dim=c(N, TT)) LS<-array(NA, dim=c(N, 2)) for (k in 1:100){ error<-NULL for (t in 1:TT){ error<-cbind(error, rnormp(N,0,s[t],p[t])) } for (i in 1:N){ LS[i,]<-rmvnorm(1, mu, sigma) for (j in 1:TT){ y[i, j] <- LS[i,1] + LS[i,2]*j + error[i,j] } } filename<-paste('data/t4data',k, '.txt',sep='') write.table(y,filename,row.names=F,col.names=F) } ## T=5 sig<-c(1,1,1,1,1) beta<- c(-.8, -.8, 0, .8, .8) #rep(-.8, 5) mL<-5 mS<-2 vL<-2 vS<-1 vLS<-0 TT<-5 N<-500 p<-2/(1+beta) s<-sig*sqrt(gamma(1/p)/gamma(3/p))/p^(1/p) mu<-c(mL,mS) sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2)) y<-array(NA, dim=c(N, TT)) LS<-array(NA, dim=c(N, 2)) for (k in 1:100){ error<-NULL for (t in 1:TT){ error<-cbind(error, rnormp(N,0,s[t],p[t])) } for (i in 1:N){ LS[i,]<-rmvnorm(1, mu, sigma) for (j in 1:TT){ y[i, j] <- LS[i,1] + LS[i,2]*j + error[i,j] } } filename<-paste('data/t5data',k, '.txt',sep='') write.table(y,filename,row.names=F,col.names=F) } ### Skew normal data mL<-5 mS<-2 vL<-2 vS<-1 vLS<-0 vE<-sqrt(pi/(pi-1)) alpha <- 10 delta<-alpha/sqrt(1+alpha^2) omega<-sqrt(1/(1-2*delta^2/pi)) s.mu<- -omega*delta*sqrt(2/pi) TT<-5 N<-500 mu<-c(mL,mS) sigma<-array(c(vL,vLS,vLS,vS), dim=c(2,2)) y<-array(NA, dim=c(N, TT)) LS<-array(NA, dim=c(N, 2)) my<-array(NA, dim=c(N, TT)) for (k in 1:100){ error<-NULL for (t in 1:TT){ error<-cbind(error, rsn(N,s.mu, omega, alpha)) } for (i in 1:N){ LS[i,]<-rmvnorm(1, mu, sigma) for (j in 1:TT){ my[i,j]<- LS[i,1] + LS[i,2]*j #y[i, j] <- rsn(1,my[i,j],vE,alpha) #+ error[i,j] y[i, j] <- my[i,j] + error[i,j] } } filename<-paste('data/data',k, '.txt',sep='') write.table(y,filename,row.names=F,col.names=F) }
Mplus code for bootstrap standard errors of growth curve models
- |h show/hide code |h
TITLE: categorical DFM DATA: FILE = data.txt; VARIABLE: NAMES = y1-y4; USEVARIABLES = y1-y4 ANALYSIS: BOOTSTRAP = 1000; MODEL: i s | y1@0 y2@1 y3@2 y4@3; y1-y4 (a);
SAS code
SAS codes for the growth curve model with the normal error distribution
- 1 |h show/hide code |h
PROC MCMC DATA=ndata NBI=5000 NMC=40000 THIN=2 SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE) STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL) OUTPOST=histnorm; ODS SELECT PARAMETERS POSTSUMMARIES POSTINTERVALS GEWEKE ESS TADPANEL; ARRAY b[2] L S; ARRAY beta[2]; ARRAY Sigma_b[2,2]; ARRAY beta0[2] (0 0); ARRAY sigma0[2,2] (1000 0 0 1000); ARRAY V[2,2] (1 0 0 1); PARMS beta {5 2}; PARMS sigma_b {1 0 0 1}; PARMS var_e 1; PRIOR beta ~ MVN(beta0, sigma0); PRIOR Sigma_b ~ IWISH(2, V); PRIOR var_e ~ IGAMMA(0.001, SCALE=0.001); RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id; mu = L + S * time; MODEL y ~ NORMAL(mu, var=var_e); RUN;
SAS codes for the growth curve model with the t error distribution
- 1 |h show/hide code |h
PROC MCMC DATA=ndata NMC=40000 NBI=5000 THIN=2 DIC SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE) STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL) OUTPOST=histt; ODS SELECT PARAMETERS POSTSUMMARIES POSTINTERVALS GEWEKE ESS TADPANEL; ARRAY b[2] L S; ARRAY beta[2]; ARRAY Sigma_b[2,2]; ARRAY beta0[2] (0 0); ARRAY sigma0[2,2] (1000 0 0 1000); ARRAY V[2,2] (1 0 0 1); PARMS beta {5 2}; PARMS sigma_b {1 0 0 1}; PARMS sigma_e 1; PARMS g1 0; PARMS g2 0; PARMS g3 0; PARMS g4 0; PARMS g5 0; PRIOR beta ~ MVN(beta0, sigma0); PRIOR Sigma_b ~ IWISH(2, V); PRIOR sigma_e ~ IGAMMA(0.01, SCALE=0.01); PRIOR g:~UNIFORM(-.6,5); RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id; mu = L + S * time; g = g1*(time=1)+ g2*(time=2)+g3*(time=3)+g4*(time=4)+g5*(time=5); p = 1+g; ll = -LOG(sigma_e) + .5*LGAMMA(1.5*p)-1.5*LGAMMA(p/2)-log(p) - (abs(y-mu)/sigma_e)**(2/p) * (GAMMA(1.5*p)/GAMMA(.5*p))**(1/p); var_e = sigma_e*sigma_e; MODEL y ~ general(ll); RUN;
SAS codes for the growth curve model with the exponential power error distribution
- 1 |h show/hide code |h
PROC MCMC DATA=ndata NMC=100000 NBI=5000 THIN=2 MONITOR=(_PARMS_ var_e) DIC SEED=13 INIT=RANDOM DIAG=(ESS GEWEKE(F1=.2 F2=.5)) STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL) OUTPOST=histged; ODS SELECT PARAMETERS POSTSUMMARIES POSTINTERVALS GEWEKE ESS TADPANEL; ARRAY b[2] L S; ARRAY beta[2]; ARRAY Sigma_b[2,2]; ARRAY beta0[2] (0 0); ARRAY sigma0[2,2] (1000 0 0 1000); ARRAY V[2,2] (1 0 0 1); PARMS beta {5 2}; PARMS sigma_b {1 0 0 1}; PARMS sigma_e 1; PARMS g1 0; PARMS g2 0; PARMS g3 0; PARMS g4 0; PARMS g5 0; PRIOR beta ~ MVN(beta0, sigma0); PRIOR Sigma_b ~ IWISH(2, V); PRIOR sigma_e ~ IGAMMA(0.01, SCALE=0.01); PRIOR g:~UNIFORM(-.6,5); RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id; mu = L + S * time; g = g1*(time=1)+ g2*(time=2)+g3*(time=3)+g4*(time=4)+g5*(time=5); p = 1+g; ll = -LOG(sigma_e) + .5*LGAMMA(1.5*p)-1.5*LGAMMA(p/2)-log(p) - (abs(y-mu)/sigma_e)**(2/p) * (GAMMA(1.5*p)/GAMMA(.5*p))**(1/p); var_e = sigma_e*sigma_e; MODEL y ~ general(ll); RUN;
SAS codes for the growth curve model with the skew normal error distribution
- 1 |h show/hide code |h
PROC MCMC DATA=ndata NMC=40000 NBI=5000 THIN=2 DIC SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE) STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL) MONITOR=(_PARMS_ var_e) OUTPOST=histsn; ODS SELECT PARAMETERS POSTSUMMARIES POSTINTERVALS GEWEKE ESS TADPANEL; ARRAY b[2] L S; ARRAY beta[2]; ARRAY Sigma_b[2,2]; ARRAY beta0[2] (0 0); ARRAY sigma0[2,2] (1000 0 0 1000); ARRAY V[2,2] (1 0 0 1); PARMS beta {5 2}; PARMS sigma_b {1 0 0 1}; PARMS sigma_e 1; PARMS alpha 0; PRIOR beta ~ MVN(beta0, sigma0); PRIOR Sigma_b ~ IWISH(2, V); PRIOR sigma_e ~ IGAMMA(0.01, SCALE=0.01); PRIOR alpha~UNIFORM(-.5,.5); RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id; mu = L + S * time; var_e = sigma_e*sigma_e; e = y - mu; xi = -sigma_e*alpha/sqrt(1+alpha*alpha)*sqrt(2/3.1415926); ll = log(2)-log(sigma_e)+logpdf('normal',(e-xi)/sigma_e,0,1)+logcdf('normal',alpha*(e-xi)/sigma_e,0,1); MODEL e ~ general(ll); RUN;
Procedure to run the simulation
The basic idea is to run SAS within R to conduct the simulation. First, an R script is used to copy data and then run a SAS script to conduct Bayesian analysis. Second, within the SAS script, the results are saved into .txt files. Third, R then reads in the SAS results and combines simulation results.
Bayesian analysis can be time consuming. The code has been run on high performance computers and was not tested on personal computers.
The R code for running the simulation for t data is
- 1 |h show/hide code |h
for (SS in c(100,300,500)){ for (TT in 4:5){ for (i in 1:100){ file.name<-paste('data/data',i,'.txt', sep='') y<-read.table(file.name) y<-y[1:SS, 1:TT] file.name<-paste('data.txt', sep='') dset<-cbind(rep(1:TT, each=SS),rep(1:SS,TT),c(y)) write.table(dset,'ndata.txt',row.names=F,col.names=F) system(paste('sas tsas.sas', sep='')) out<-paste('output/norm-SS-',SS,'TT', TT,'-', i, '.txt', sep='') temp<-try(file.copy('histnorm.txt', out, overwrite=T)) out<-paste('output/t-SS-',SS,'TT', TT,'-', i, '.txt', sep='') temp<-try(file.copy('histt.txt', out, overwrite=T)) } } }
The complete SAS script is
- 1 |h show/hide code |h
* Normal - t data; data ndata; infile "ndata.txt"; input time id y; run; PROC MCMC DATA=ndata NMC=40000 NBI=5000 THIN=2 DIC SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE(F1=.2 F2=.5)) STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL) OUTPOST=histnorm; ODS SELECT PARAMETERS POSTSUMMARIES TADPANEL GEWEKE ESS DIC POSTINTERVALS; ARRAY b[2] L S; ARRAY beta[2]; ARRAY Sigma_b[2,2]; ARRAY beta0[2] (0 0); ARRAY sigma0[2,2] (1000 0 0 1000); ARRAY V[2,2] (1 0 0 1); PARMS beta {5 2}; PARMS sigma_b {1 0 0 1}; PARMS var_e 1; PRIOR beta ~ MVN(beta0, sigma0); PRIOR Sigma_b ~ IWISH(2, V); PRIOR var_e ~ IGAMMA(0.001, SCALE=0.001); RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id; mu = L + S * time; MODEL y ~ NORMAL(mu, var=var_e); RUN; /* proc sgplot data=histnorm; series x=Iteration y=beta1; yaxis label="beta0"; run; */ data _NULL_; set histnorm; file "histnorm.txt"; put Iteration beta1 beta2 Sigma_b1 Sigma_b2 Sigma_b4 var_e; run; PROC MCMC DATA=ndata NMC=80000 NBI=5000 THIN=2 DIC SEED=17 INIT=RANDOM DIAG=(ESS GEWEKE(F1=.2 F2=.5)) STATISTICS(ALPHA=0.05)=(SUMMARY INTERVAL) OUTPOST=histt; ODS SELECT PARAMETERS POSTSUMMARIES TADPANEL GEWEKE ESS DIC POSTINTERVALS; ARRAY b[2] L S; ARRAY beta[2]; ARRAY Sigma_b[2,2]; ARRAY beta0[2] (0 0); ARRAY sigma0[2,2] (1000 0 0 1000); ARRAY V[2,2] (1 0 0 1); PARMS beta {5 2}; PARMS sigma_b {1 0 0 1}; PARMS var_e 1; PARMS df 5; PRIOR beta ~ MVN(beta0, sigma0); PRIOR Sigma_b ~ IWISH(2, V); PRIOR var_e ~ IGAMMA(0.01, SCALE=0.01); PRIOR df~UNIFORM(1,500); RANDOM b ~ MVN(beta, Sigma_b) SUBJECT=id; mu = L + S * time; MODEL y ~ t(mu, var=var_e, df); RUN; /* proc sgplot data=histt; series x=Iteration y=beta1; yaxis label="beta0"; run; */ data _NULL_; set histt; file "histt.txt"; put Iteration beta1 beta2 Sigma_b1 Sigma_b2 Sigma_b4 var_e df; run;
research/supplementary/brm2015.txt · Last modified: 2016/01/24 09:48 by 127.0.0.1