lab:zhang:sas_macros_for_simulation
/*** Setup the global parameters ***/
/*The parameters below should be changed accordingly*/
*%LET filename="c:\zzy\mnarmediation\active1.txt"; * data file directory and name;
%LET varname=var00list; *specify variable names in the data file. Please
use x for the input variable, m for the mediation variable, and y for
the output variable. a1 and a2 are two auxiliary variables in the data
file. You can use any names except for x, m, and y for naming the
auxiliary variables;
%LET missing=99999; *specify the missing data value;
%LET nimpute = 100; *define the number of imputations K;
%LET nboot = 1000; *define the number of bootstraps B;
%LET alpha = 0.95; *define the confidence level;
%LET seed = 2010; *random number seed;
/*** End of setup of global parameters ***/
data all;
input col1-col4;
cards;
0 0 0 0
;
run;
%MACRO runsim(i);
data temp;
number = &i;
datastr=put(number, 4.);
datastrcomp=compress(datastr,'');
commd ='cp /pscratch/zzhang4/Private/mediation/data/simucond/data-noav-'||datastrcomp||'.txt
data.txt';
run;
data _null_;
set temp;
file 'cp.sh';
put commd;
run;
data _null_;
X 'chmod 755 cp.sh';
X './cp.sh';
X 'rm -f cp.sh';
run;
quit;
/*In general, there is no need to change the codes below*/
/*Read data into sas*/
DATA dset;
INFILE "data.txt";
INPUT &varname;
ARRAY nvarlist &varname;
DO OVER nvarlist;
IF nvarlist = &missing THEN nvarlist = .;
END;
RUN;
PROC MI DATA=dset SEED=&seed NIMPUTE=&nimpute OUT=imputed NOPRINT;
VAR &varname;
RUN; QUIT;
/*Estimating model parameters for each imputed data set*/
PROC REG DATA=imputed OUTEST= est NOPRINT;
MODEL y = x m;
MODEL m = x;
BY _Imputation_;
RUN; QUIT;
/*Collecting results from mutiple imputations*/
DATA temp;
SET est;
id =INT((_N_-.1)/2)+1;
modelnum = MOD(_N_+1, 2)+1;
RUN;
DATA temp1;
SET temp;
ARRAY int[2] iY iM;
ARRAY xpar[2] c a;
ARRAY mpar[2] b tmp1;
ARRAY sigma[2] sy sm;
RETAIN a b c iY iM sy sm;
BY id;
IF FIRST.id THEN DO I = 1 to 2;
int[I] = .;
xpar[I] = .;
mpar[I]=.;
sigma[I]=.;
END;
int[modelnum] = intercept;
xpar[modelnum] = x;
mpar[modelnum] = m;
sigma[modelnum] = _RMSE_;
IF LAST.id THEN OUTPUT;
KEEP _imputation_ a b c iY iM sy sm;
RUN;
/*Calcuating mediation effects*/
DATA temp2;
SET temp1;
ab=a*b;
RUN;
/*Saving the point estimates of model parameters and mediation effect
from multiple imputation into a data set named 'pointest'*/
PROC MEANS DATA=temp2 NOPRINT;
VAR a b c ab iY iM sy sm;
OUTPUT OUT=pointest MEAN(a b c ab iY iM sy sm)=a b c ab iY iM sy sm;
RUN;
/*** Bootstraping data to obtain standard errors and confidence
intervals ***/
DATA bootsamp1;
DO sampnum = 1 to 500;
DO i = 1 TO nobs;
ran = ROUND(RANUNI(&seed) * nobs);
SET dset
nobs = nobs
point = ran;
OUTPUT;
END;
END;
STOP;
RUN; QUIT;
/*** Imputing K data sets for each bootstrap sample ***/
PROC MI DATA=bootsamp1 SEED=&seed NIMPUTE=&nimpute OUT=imputed1 NOPRINT;
EM MAXITER = 500;
VAR &varname;
BY sampnum;
RUN; QUIT;
DATA bootsamp2;
DO sampnum = 501 to 1000;
DO i = 1 TO nobs;
ran = ROUND(RANUNI(&seed) * nobs);
SET dset
nobs = nobs
point = ran;
OUTPUT;
END;
END;
STOP;
RUN; QUIT;
/*** Imputing K data sets for each bootstrap sample ***/
PROC MI DATA=bootsamp2 SEED=&seed NIMPUTE=&nimpute OUT=imputed2 NOPRINT;
EM MAXITER = 500;
VAR &varname;
BY sampnum;
RUN; QUIT;
DATA imputed;
set imputed1 imputed2;
run;
/*Estimate model parameters for each imputed data set (in total, there
are B*K imputed data sets.)*/
PROC REG DATA=imputed OUTEST= est NOPRINT;
MODEL y = x m;
MODEL m = x;
BY sampnum _Imputation_;
RUN; QUIT;
/*Collecting results from different imputed data sets*/
DATA temp;
SET est;
id =INT((_N_-.1)/2)+1;
modelnum = MOD(_N_+1, 2)+1;
RUN;
DATA temp1;
SET temp;
ARRAY int[2] iY iM;
ARRAY xpar[2] c a;
ARRAY mpar[2] b tmp1;
ARRAY sigma[2] sy sm;
RETAIN a b c iY iM sy sm;
BY id;
IF FIRST.id THEN DO I = 1 to 2;
int[I] = .;
xpar[I] = .;
mpar[I]=.;
sigma[I]=.;
END;
int[modelnum] = intercept;
xpar[modelnum] = x;
mpar[modelnum] = m;
sigma[modelnum] = _RMSE_;
IF LAST.id THEN OUTPUT;
KEEP sampnum _imputation_ a b c iY iM sy sm;
RUN;
DATA temp2;
SET temp1;
ab=a*b;
RUN;
/*Compute point estimates of model parameters and mediation effect for
each bootstrap sample and the results are saved in the data file named
'bootest'. */
PROC MEANS DATA=temp2 NOPRINT;
BY sampnum;
VAR a b c ab iY iM sy sm;
OUTPUT OUT=bootest MEAN(a b c ab iY iM sy sm)=a b c ab iY iM sy sm;
RUN;
/*** Calculate the BC intervals based on the point estimates from
different bootstrap samples and produce a table containing the points
estimates, standard errors, confidence intervals in the output
window.***/
PROC IML;
START main;
USE pointest;
READ ALL INTO Y;
USE bootest;
READ ALL INTO X;
n=NROW(X);
m=NCOL(X);
bc_lo=J(1,m-3,0);
bc_up=J(1,m-3,0);
se=J(1,m-3,0);
alphas=1-(1-&alpha)/2;
zcrit = PROBIT(alphas);
DO j=1 TO m-3;
se[j]=SQRT((SSQ(X[,j+3]) -(SUM(X[,j+3]))**2/n)/(n-1));
number=0;
DO i=1 TO n;
IF X[i,j+3]<Y[j+2] THEN number=number+1;
END;
p=number/n;
z0hat=PROBIT(p);
q1=z0hat+(z0hat-zcrit);
q2=z0hat+(z0hat+zcrit);
alpha1=PROBNORM(q1);
alpha2=PROBNORM(q2);
vec=X[,j+3];
CALL SORT(vec,{1});
low=int(alpha1*(n+1));
up=int(alpha2*(n+1));
IF low<1 THEN low=1;
IF up>n THEN up=n;
bc_lo[j]=vec[low];
bc_up[j]=vec[up];
END;
result=J(1,4,0);
result[1] = Y[6];
result[2] = se[4];
result[3] = bc_lo[4] ;
result[4] = bc_up[4];
create bcci from result;
append from result;
FINISH;
RUN main;
QUIT;
/*Combine results*/
%MEND runsim;
%MACRO batch;
%LET r=1001;
%DO %WHILE (&r <=2000);
%runsim(&r);
%LET r = %eval(&r+1);
data all;
set all bcci;
run;
%END;
%MEND batch;
%batch;
data _NULL_;
set all;
file "all.txt";
put col1-col4;
run;
quit;
lab/zhang/sas_macros_for_simulation.txt · Last modified: 2016/01/24 09:48 by 127.0.0.1
