User Tools

Site Tools


lab:zhang:sas_macros_for_simulation
/*** Setup the global parameters ***/
/*The parameters below should be changed accordingly*/
*%LET filename="c:\zzy\mnarmediation\active1.txt"; * data file directory and name;
%LET varname=var00list;  *specify variable names in the data file. Please 
use x for the input variable, m for the mediation variable, and y for 
the output variable. a1 and a2 are two auxiliary variables in the data 
file. You can use any names except for x, m, and y for naming the 
auxiliary variables;
%LET missing=99999; *specify the missing data value;
%LET nimpute = 100; *define the number of imputations K;
%LET nboot = 1000;  *define the number of bootstraps B;
%LET alpha = 0.95;  *define the confidence level;
%LET seed = 2010;   *random number seed;
/*** End of setup of global parameters ***/
data all;
input col1-col4;
cards;
0 0 0 0
;
run;

%MACRO runsim(i);
data temp;
number = &i;
datastr=put(number, 4.);
datastrcomp=compress(datastr,'');
commd ='cp /pscratch/zzhang4/Private/mediation/data/simucond/data-noav-'||datastrcomp||'.txt 
data.txt';
run;

data _null_;
set temp;
file 'cp.sh';
put commd;
run;

data _null_;
X 'chmod 755 cp.sh';
X './cp.sh';
X 'rm -f cp.sh';
run;
quit;

/*In general, there is no need to change the codes below*/
/*Read data into sas*/
DATA dset;
  INFILE "data.txt";
  INPUT &varname;
  ARRAY nvarlist &varname;
  DO OVER nvarlist;
    IF nvarlist = &missing THEN nvarlist = .;  
  END;
RUN;

PROC MI DATA=dset SEED=&seed NIMPUTE=&nimpute OUT=imputed NOPRINT;
  VAR &varname;
RUN; QUIT;
/*Estimating model parameters for each imputed data set*/
PROC REG DATA=imputed OUTEST= est NOPRINT;
  MODEL y = x m;
  MODEL m = x;
  BY _Imputation_;
RUN; QUIT;

/*Collecting results from mutiple imputations*/
DATA temp;
  SET est;
  id =INT((_N_-.1)/2)+1;
  modelnum = MOD(_N_+1, 2)+1;
RUN;

DATA temp1;
  SET temp;
  ARRAY int[2] iY iM;
  ARRAY xpar[2] c a;
  ARRAY mpar[2] b tmp1;
  ARRAY sigma[2] sy sm;
  RETAIN  a b c iY iM sy sm;
	BY id;
	IF FIRST.id THEN DO I = 1 to 2;
		int[I] = .;
		xpar[I] = .;
		mpar[I]=.;
		sigma[I]=.;
	END;
	int[modelnum] = intercept;
	xpar[modelnum] = x;
	mpar[modelnum] = m;
	sigma[modelnum] = _RMSE_;
	IF LAST.id THEN OUTPUT;
	KEEP  _imputation_ a b c iY iM sy sm;
RUN;
/*Calcuating mediation effects*/
DATA temp2;
  SET temp1;
  ab=a*b;
RUN;

/*Saving the point estimates of model parameters and mediation effect 
from multiple imputation into a data set named 'pointest'*/
PROC MEANS DATA=temp2 NOPRINT;
  VAR a b c ab iY iM sy sm;
  OUTPUT OUT=pointest MEAN(a b c ab iY iM sy sm)=a b c ab iY iM sy sm;
RUN;

/*** Bootstraping data to obtain standard errors and confidence 
intervals ***/
DATA bootsamp1;
  DO sampnum = 1 to 500;
     DO i = 1 TO nobs;
        ran = ROUND(RANUNI(&seed) * nobs);
        SET dset
        nobs = nobs
        point = ran;
        OUTPUT;
     END;
  END;
  STOP;
RUN; QUIT;

/*** Imputing K data sets for each bootstrap sample ***/
PROC MI DATA=bootsamp1 SEED=&seed NIMPUTE=&nimpute OUT=imputed1 NOPRINT;
  EM MAXITER = 500;
  VAR &varname;
  BY sampnum;
RUN; QUIT;

DATA bootsamp2;
  DO sampnum = 501 to 1000;
     DO i = 1 TO nobs;
        ran = ROUND(RANUNI(&seed) * nobs);
        SET dset
        nobs = nobs
        point = ran;
        OUTPUT;
     END;
  END;
  STOP;
RUN; QUIT;

/*** Imputing K data sets for each bootstrap sample ***/
PROC MI DATA=bootsamp2 SEED=&seed NIMPUTE=&nimpute OUT=imputed2 NOPRINT;
  EM MAXITER = 500;
  VAR &varname;
  BY sampnum;
RUN; QUIT;

DATA imputed;
set imputed1 imputed2;
run;

/*Estimate model parameters for each imputed data set (in total, there 
are B*K imputed data sets.)*/
PROC REG DATA=imputed OUTEST= est NOPRINT;
  MODEL y = x m;
  MODEL m = x;
  BY sampnum _Imputation_;
RUN; QUIT;

/*Collecting results from different imputed data sets*/
DATA temp;
  SET est;
  id =INT((_N_-.1)/2)+1;
  modelnum = MOD(_N_+1, 2)+1;
RUN;

DATA temp1;
  SET temp;
  ARRAY int[2] iY iM;
  ARRAY xpar[2] c a;
  ARRAY mpar[2] b tmp1;
  ARRAY sigma[2] sy sm;
  RETAIN  a b c iY iM sy sm;
	BY id;
	IF FIRST.id THEN DO I = 1 to 2;
		int[I] = .;
		xpar[I] = .;
		mpar[I]=.;
		sigma[I]=.;
	END;
	int[modelnum] = intercept;
	xpar[modelnum] = x;
	mpar[modelnum] = m;
	sigma[modelnum] = _RMSE_;
	IF LAST.id THEN OUTPUT;
	KEEP  sampnum _imputation_ a b c iY iM sy sm;
RUN;

DATA temp2;
  SET temp1;
  ab=a*b;
RUN;

/*Compute point estimates of model parameters and mediation effect for 
each bootstrap sample and the results are saved in the data file named 
'bootest'. */
PROC MEANS DATA=temp2 NOPRINT;
  BY sampnum;
  VAR a b c ab iY iM sy sm;
  OUTPUT OUT=bootest MEAN(a b c ab iY iM sy sm)=a b c ab iY iM sy sm;
RUN;

/*** Calculate the BC intervals based on the point estimates from 
different bootstrap samples and produce a table containing the points 
estimates, standard errors, confidence intervals in the output 
window.***/
PROC IML;
  START main;
  USE pointest;
  READ ALL INTO Y;
  USE bootest;
  READ ALL INTO X;

  n=NROW(X);
  m=NCOL(X);

  bc_lo=J(1,m-3,0);
  bc_up=J(1,m-3,0);
  se=J(1,m-3,0);

  alphas=1-(1-&alpha)/2;
  zcrit = PROBIT(alphas);
  
  DO j=1 TO m-3;
	se[j]=SQRT((SSQ(X[,j+3]) -(SUM(X[,j+3]))**2/n)/(n-1)); 
	number=0;
	DO i=1 TO n;
		IF X[i,j+3]<Y[j+2] THEN number=number+1;
	END;
	p=number/n;
	z0hat=PROBIT(p);
        
	q1=z0hat+(z0hat-zcrit);
	q2=z0hat+(z0hat+zcrit);
	alpha1=PROBNORM(q1);
	alpha2=PROBNORM(q2);

	vec=X[,j+3];
	CALL SORT(vec,{1});

	low=int(alpha1*(n+1));
	up=int(alpha2*(n+1));
	IF low<1 THEN low=1;
	IF up>n THEN up=n;
	bc_lo[j]=vec[low];
	bc_up[j]=vec[up];
  END;
  result=J(1,4,0);
  result[1] = Y[6];
  result[2] = se[4];
  result[3] = bc_lo[4] ;
  result[4] = bc_up[4];

create bcci from result;
append from result;
  FINISH;
  RUN main;
QUIT;
/*Combine results*/
%MEND runsim;

%MACRO batch;
%LET r=1001;
%DO %WHILE (&r <=2000);
  %runsim(&r);
  %LET r = %eval(&r+1);
  data all;
   set all bcci;
run;
%END;
%MEND batch;

%batch;


data _NULL_;
set all;
file "all.txt";
put col1-col4;
run;
quit;

Page Tools