lab:zhang:sas_macros_for_simulation
/*** Setup the global parameters ***/ /*The parameters below should be changed accordingly*/ *%LET filename="c:\zzy\mnarmediation\active1.txt"; * data file directory and name; %LET varname=var00list; *specify variable names in the data file. Please use x for the input variable, m for the mediation variable, and y for the output variable. a1 and a2 are two auxiliary variables in the data file. You can use any names except for x, m, and y for naming the auxiliary variables; %LET missing=99999; *specify the missing data value; %LET nimpute = 100; *define the number of imputations K; %LET nboot = 1000; *define the number of bootstraps B; %LET alpha = 0.95; *define the confidence level; %LET seed = 2010; *random number seed; /*** End of setup of global parameters ***/ data all; input col1-col4; cards; 0 0 0 0 ; run; %MACRO runsim(i); data temp; number = &i; datastr=put(number, 4.); datastrcomp=compress(datastr,''); commd ='cp /pscratch/zzhang4/Private/mediation/data/simucond/data-noav-'||datastrcomp||'.txt data.txt'; run; data _null_; set temp; file 'cp.sh'; put commd; run; data _null_; X 'chmod 755 cp.sh'; X './cp.sh'; X 'rm -f cp.sh'; run; quit; /*In general, there is no need to change the codes below*/ /*Read data into sas*/ DATA dset; INFILE "data.txt"; INPUT &varname; ARRAY nvarlist &varname; DO OVER nvarlist; IF nvarlist = &missing THEN nvarlist = .; END; RUN; PROC MI DATA=dset SEED=&seed NIMPUTE=&nimpute OUT=imputed NOPRINT; VAR &varname; RUN; QUIT; /*Estimating model parameters for each imputed data set*/ PROC REG DATA=imputed OUTEST= est NOPRINT; MODEL y = x m; MODEL m = x; BY _Imputation_; RUN; QUIT; /*Collecting results from mutiple imputations*/ DATA temp; SET est; id =INT((_N_-.1)/2)+1; modelnum = MOD(_N_+1, 2)+1; RUN; DATA temp1; SET temp; ARRAY int[2] iY iM; ARRAY xpar[2] c a; ARRAY mpar[2] b tmp1; ARRAY sigma[2] sy sm; RETAIN a b c iY iM sy sm; BY id; IF FIRST.id THEN DO I = 1 to 2; int[I] = .; xpar[I] = .; mpar[I]=.; sigma[I]=.; END; int[modelnum] = intercept; xpar[modelnum] = x; mpar[modelnum] = m; sigma[modelnum] = _RMSE_; IF LAST.id THEN OUTPUT; KEEP _imputation_ a b c iY iM sy sm; RUN; /*Calcuating mediation effects*/ DATA temp2; SET temp1; ab=a*b; RUN; /*Saving the point estimates of model parameters and mediation effect from multiple imputation into a data set named 'pointest'*/ PROC MEANS DATA=temp2 NOPRINT; VAR a b c ab iY iM sy sm; OUTPUT OUT=pointest MEAN(a b c ab iY iM sy sm)=a b c ab iY iM sy sm; RUN; /*** Bootstraping data to obtain standard errors and confidence intervals ***/ DATA bootsamp1; DO sampnum = 1 to 500; DO i = 1 TO nobs; ran = ROUND(RANUNI(&seed) * nobs); SET dset nobs = nobs point = ran; OUTPUT; END; END; STOP; RUN; QUIT; /*** Imputing K data sets for each bootstrap sample ***/ PROC MI DATA=bootsamp1 SEED=&seed NIMPUTE=&nimpute OUT=imputed1 NOPRINT; EM MAXITER = 500; VAR &varname; BY sampnum; RUN; QUIT; DATA bootsamp2; DO sampnum = 501 to 1000; DO i = 1 TO nobs; ran = ROUND(RANUNI(&seed) * nobs); SET dset nobs = nobs point = ran; OUTPUT; END; END; STOP; RUN; QUIT; /*** Imputing K data sets for each bootstrap sample ***/ PROC MI DATA=bootsamp2 SEED=&seed NIMPUTE=&nimpute OUT=imputed2 NOPRINT; EM MAXITER = 500; VAR &varname; BY sampnum; RUN; QUIT; DATA imputed; set imputed1 imputed2; run; /*Estimate model parameters for each imputed data set (in total, there are B*K imputed data sets.)*/ PROC REG DATA=imputed OUTEST= est NOPRINT; MODEL y = x m; MODEL m = x; BY sampnum _Imputation_; RUN; QUIT; /*Collecting results from different imputed data sets*/ DATA temp; SET est; id =INT((_N_-.1)/2)+1; modelnum = MOD(_N_+1, 2)+1; RUN; DATA temp1; SET temp; ARRAY int[2] iY iM; ARRAY xpar[2] c a; ARRAY mpar[2] b tmp1; ARRAY sigma[2] sy sm; RETAIN a b c iY iM sy sm; BY id; IF FIRST.id THEN DO I = 1 to 2; int[I] = .; xpar[I] = .; mpar[I]=.; sigma[I]=.; END; int[modelnum] = intercept; xpar[modelnum] = x; mpar[modelnum] = m; sigma[modelnum] = _RMSE_; IF LAST.id THEN OUTPUT; KEEP sampnum _imputation_ a b c iY iM sy sm; RUN; DATA temp2; SET temp1; ab=a*b; RUN; /*Compute point estimates of model parameters and mediation effect for each bootstrap sample and the results are saved in the data file named 'bootest'. */ PROC MEANS DATA=temp2 NOPRINT; BY sampnum; VAR a b c ab iY iM sy sm; OUTPUT OUT=bootest MEAN(a b c ab iY iM sy sm)=a b c ab iY iM sy sm; RUN; /*** Calculate the BC intervals based on the point estimates from different bootstrap samples and produce a table containing the points estimates, standard errors, confidence intervals in the output window.***/ PROC IML; START main; USE pointest; READ ALL INTO Y; USE bootest; READ ALL INTO X; n=NROW(X); m=NCOL(X); bc_lo=J(1,m-3,0); bc_up=J(1,m-3,0); se=J(1,m-3,0); alphas=1-(1-&alpha)/2; zcrit = PROBIT(alphas); DO j=1 TO m-3; se[j]=SQRT((SSQ(X[,j+3]) -(SUM(X[,j+3]))**2/n)/(n-1)); number=0; DO i=1 TO n; IF X[i,j+3]<Y[j+2] THEN number=number+1; END; p=number/n; z0hat=PROBIT(p); q1=z0hat+(z0hat-zcrit); q2=z0hat+(z0hat+zcrit); alpha1=PROBNORM(q1); alpha2=PROBNORM(q2); vec=X[,j+3]; CALL SORT(vec,{1}); low=int(alpha1*(n+1)); up=int(alpha2*(n+1)); IF low<1 THEN low=1; IF up>n THEN up=n; bc_lo[j]=vec[low]; bc_up[j]=vec[up]; END; result=J(1,4,0); result[1] = Y[6]; result[2] = se[4]; result[3] = bc_lo[4] ; result[4] = bc_up[4]; create bcci from result; append from result; FINISH; RUN main; QUIT; /*Combine results*/ %MEND runsim; %MACRO batch; %LET r=1001; %DO %WHILE (&r <=2000); %runsim(&r); %LET r = %eval(&r+1); data all; set all bcci; run; %END; %MEND batch; %batch; data _NULL_; set all; file "all.txt"; put col1-col4; run; quit;
lab/zhang/sas_macros_for_simulation.txt · Last modified: 2016/01/24 09:48 by 127.0.0.1