*~~~~~~~~~~~ Logistic DA Main Program ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*; * Instructions: * 1. save your data set and the program logisticDAmacros.sas in the same location; * 2. specify the path name of that location, as well as the other information as listed, below; * 3. save this (logisticDAmain.sas) program with your specifications; * 4. run this (logisticDAmain.sas) program. *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*; *=========================================================*; **** USER INPUT: PLEASE MODIFY/ENTER INFORMATION BELOW ****; *=========================================================*; %let path=C:\ \; *specify path where all files (data and macros) are located (e.g., C:\Documents and Settings\Desktop); %let datasetname = ; * specify name of data set (e.g., mydata.dat); %let DEPVAR=Y; /* THE MACRO VARIABLE DEPVAR CONTAINS THE DEPENDENT VARIABLE (e.g., Y) */ %let INDEPVAR=x1 x2 x3 x4 ; /* THE MACRO VARIABLE INDEPVAR CONTAINS THE PREDICTOR VARIABLES (e.g., x1 x2 x3 x4) */ %let p=4; * specify number of predictors (Xs); %let r=100; * specify number of bootstrap samples (enter zero for none); *===============================================*; *** USERS SHOULD NOT MODIFY BELOW THIS POINT ***; *===============================================*; title "You chose to use p=&p independent variables and &r bootstrap samples."; title2 "Bootstrap sample 0 will refer to the parent sample."; run; *********** PARENT SAMPLE *****************; data xydata; infile "&path\&datasetname"; input &DEPVAR &INDEPVAR; run; /* proc print data=xydata; run; */ proc corr data=xydata; run; *** ----------------------------------------------- ***; *** compute four fit measures for all subset models ***; *** ----------------------------------------------- ***; %include "&path\logisticDAmacros.sas"; * <-- macro path *; %allsubsets(&indepvar, 0); %ALLSUBSREG(0, xydata); *** allfits and submodels for original (parent) sample ***; /* proc print data=allfits0; title 'Sample data'; run; proc print data=submodels0; run; */ * macro to obtain number of observations; %macro obsnvars(ds,nvarsp,nobsp); %global dset nvars nobs; %let dset=&ds; %let dsid = %sysfunc(open(&dset)); %if &dsid %then %do; %let nobs =%sysfunc(attrn(&dsid,NOBS)); %let nvars=%sysfunc(attrn(&dsid,NVARS)); %let rc = %sysfunc(close(&dsid)); %end; %else %put Open for data set &dset failed - %sysfunc(sysmsg()); %mend obsnvars; %obsnvars(xydata,nvars,nobs) *%put &dset has &nvars variable(s) and &nobs observation(s).; data rsquares; set allfits0; *n = 296; *Nagelk = (1-(L0/LM)**(2/n))/(1-(L0)**(2/n)); Nagelkerke = (1-(exp(LogL0-LogLM))**(2/&nobs))/( 1-exp((2/&nobs)*LogL0) ); Estrella = 1 - (LogLM/LogL0)**((-2/&nobs)*LogL0); McFadden = 1- (LogLM/LogL0); run; /* proc print data=rsquares; title 'Likelihood-based fit measures'; run; */ data corrfits; set corrfits0; corr = y; corr_sq = y**2; run; /* proc print data=corrfits; title 'Correlation-based fit measures'; run; */ data subsetfits; merge rsquares corrfits; *by subset; proc print data=subsetfits; title 'All fit measures for parent sample'; run; data summary0; merge subsetfits submodels0; run; /* proc print data=summary0; title 'All (parent) sample results: summary'; run; */ *** ----------------------------------------------- ***; *** perform DA using four fit measures ***; *** ----------------------------------------------- ***; *** call DA macro --> CHANGE TO PATH WHERE MACRO IS SAVED! ***; *%include "&path\LogisticDAmacro.sas"; * <-- macro path *; %logistic_da(Nagelkerke, 0); %logistic_da(Estrella, 0); %logistic_da(McFadden, 0); %logistic_da(corr_sq, 0); title; run; *********** BOOTSTRAP SAMPLES *****************; **Selecting bootstrap samples; %macro bootstr; *** ----------------------------------------------- ***; *** Generate bootstrap samples ***; *** ----------------------------------------------- ***; %if &r > 0 %then %do; ** h is the bootstrap sample number ; %do h = 1 %to &r ; proc iml; start; print "now running bootstrap sample &h of &r samples..."; finish; run; filename outda "&path\logisticda.lst"; proc printto print=outda; run; ** this do loop draws a sample of size nobs, with replacement ; %do i = 1 %to &nobs ; data b1; set xydata; ** Create a random number using time of day for the seed ; ransamp = ranuni(-9) ; ** sort in random order ; proc sort ; by ransamp ; ** take the first case of the randomly sorted data set ; data boots ; set b1(obs=1) ; run ; ** Append the data to the sample -- ending with the number; ** of cases specified in the "i" loop above ; ** Assign the cases to a dataset that will change name with each ** iteration of the "h" loop above ; proc append base = boots&h data = boots; run; %end; /*proc print data=boots&h; title "boostrap data set &h"; */ *proc corr data=boots&h; run; *** ------------------------------------------------------------------- ***; *** compute four fit measures for all subset models (bootstrap samples) ***; *** ------------------------------------------------------------------- ***; data boots&h; set boots&h; run; *active data set; * compute four fit measures for all subset models; *%include "&path\allsubsets.sas"; * <-- macro path *; %allsubsets(&indepvar, &h); %ALLSUBSREG(&h, boots&h); /* proc print data=allfits&h; title "results for bootstrap sample &h"; run; proc print data=submodels&h; run; */ data rsquares&h; set allfits&h; Nagelkerke = (1-( exp( (2/&nobs)*(LogL0-LogLM) ) ))/(1-( exp( (2/&nobs)*LogL0 ) )); Estrella = 1 - (LogLM/LogL0)**((-2/&nobs)*LogL0); McFadden = 1- (LogLM/LogL0); run; /* proc print data=rsquares; title 'Likelihood-based fit measures (parent)'; run;*/ data corrfits&h; set corrfits&h; corr = y; corr_sq = y**2; run; /* proc print data=corrfits; title 'Correlation-based fit measures (parent)'; run; */ data subsetfits&h; merge rsquares&h corrfits&h; *by subset; /* proc print data=subsetfits; title 'All fit measures (parent)'; run; */ data summary&h; merge subsetfits&h submodels&h; run; /* proc print data=summary&h; title "results (summary) for bootstrap sample &h"; run; */ *** ------------------------------------------------------------------ ***; *** perform DA using all fit measures (bootstrap samples) ***; *** ------------------------------------------------------------------ ***; *%include "&path\LogisticDAmacro.sas"; * <-- macro path *; %logistic_da(Nagelkerke, &h); %logistic_da(Estrella, &h); %logistic_da(McFadden, &h); %logistic_da(corr_sq, &h); proc printto; %end; * ============ end of bootstraps loop ============*; /* proc print data=alldatable; title 'DA table, all measures'; run; proc print data=allda; title 'DA averages, all measures'; run; */ * results for each dominance type merged across fit measures; /* proc print data=allcomp; title 'Dominance (Dij) results: summary over all measures'; title2 'Bootstrap sample 0 is the parent sample'; run; proc print data=allcond; proc print data=allgen; run; */ *** ------------------------------------------------------------------ ***; *** compute reproducibility and CIs for general DA ***; *** ------------------------------------------------------------------ ***; ****** percentile 95 percent confidence intervals for general dominance measures *******; data genci; set allda; if in = 999; *extract general dominance measures; if sample > 0; *use only bootstrap samples; %do i=1 %to &p-1; %do j=&i+1 %to &p; gen&i&j = x&i-x&j; * create Gij variable; %end; %end; run; /* proc print data=genci; title 'genci data set'; run; */ data genci; set genci; proc sort; by measure; run; %do i=1 %to &p-1; %do j=&i+1 %to &p; proc univariate data=genci noprint; var gen&i&j; output out=cigen&i&j mean=Mean_G&i&j std=StDev_G&i&j pctlpts=2.5, 97.5 pctlpre=G&i&j pctlname=LL_95CI UL_95CI; by measure; run; proc print data=cigen&i&j; title "Percentile 95% CIs for G&i&j based on &r bootstrap samples"; title2 'LL and UL refer to the lower and upper limits of the CI, respectively'; run; %end; %end; ****** Reproducibility for Dij dominance measures *******; *complete; proc sort data=allcomp; by measure; run; proc transpose data=allcomp out=compda(rename=(col1=parent)); by measure; run; data compagree; set compda; if _NAME_ ^= "fit"; if _NAME_ ^= "sample"; run; /* proc print data=compagree; title 'Reproducibility proportion values (reprod) for complete dominance'; run; */ data compagree; set compagree; agree=0; %do k=2 %to &r+1; if parent = -1 and col&k = -1 then agree=agree+1; if parent = 1 and col&k = 1 then agree=agree+1; if parent ^= 1 and parent ^= -1 and col&k ^= 1 and col&k ^= -1 then agree=agree+1; %end; reprod=agree/&r; Dij = _NAME_; run; proc print data=compagree; var measure Dij parent agree reprod; title 'Reproducibility proportion values (reprod) for complete dominance'; run; * conditional; proc sort data=allcond; by measure; run; proc transpose data=allcond out=condda(rename=(col1=parent)); by measure; run; data condagree; set condda; if _NAME_ ^= "fit"; if _NAME_ ^= "sample"; run; /* proc print data=condagree; title 'Reproducibility proportion values (reprod) for conditional dominance'; run; */ data condagree; set condagree; agree=0; %do k=2 %to &r+1; if parent = -1 and col&k = -1 then agree=agree+1; if parent = 1 and col&k = 1 then agree=agree+1; if parent ^= 1 and parent ^= -1 and col&k ^= 1 and col&k ^= -1 then agree=agree+1; %end; reprod=agree/&r; Dij = _NAME_; run; proc print data=condagree; var measure Dij parent agree reprod; title 'Reproducibility proportion values (reprod) for conditional dominance'; run; *general; proc sort data=allgen; by measure; run; data allgen; set allgen; drop x1--x&p; run; proc transpose data=allgen out=genda(rename=(col1=parent)); by measure; run; data genagree; set genda; if _NAME_ ^= "fit"; if _NAME_ ^= "sample"; run; /* proc print data=genagree; title 'Reproducibility proportion values (reprod) for general dominance'; run; */ data genagree; set genagree; agree=0; %do k=2 %to &r+1; if parent = -1 and col&k = -1 then agree=agree+1; if parent = 1 and col&k = 1 then agree=agree+1; if parent ^= 1 and parent ^= -1 and col&k ^= 1 and col&k ^= -1 then agree=agree+1; %end; reprod=agree/&r; Dij = _NAME_; run; proc print data=genagree; var measure Dij parent agree reprod; title 'Reproducibility proportion values (reprod) for general dominance'; run; %end; *if loop; %mend bootstr; %bootstr;