*~~~~~~~~~~~ Logistic DA Main Program ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*;
* Instructions:
* 1. save your data set and the program logisticDAmacros.sas in the same location;
* 2. specify the path name of that location, as well as the other information as listed, below;
* 3. save this (logisticDAmain.sas) program with your specifications;
* 4. run this (logisticDAmain.sas) program.
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*;
*=========================================================*;
**** USER INPUT: PLEASE MODIFY/ENTER INFORMATION BELOW ****;
*=========================================================*;
%let path=C:\ \; *specify path where all files (data and macros) are located (e.g., C:\Documents and Settings\Desktop);
%let datasetname = ; * specify name of data set (e.g., mydata.dat);
%let DEPVAR=Y;   /* THE MACRO VARIABLE DEPVAR CONTAINS THE DEPENDENT VARIABLE (e.g., Y) */
%let INDEPVAR=x1 x2 x3 x4 ;   /* THE MACRO VARIABLE INDEPVAR CONTAINS THE PREDICTOR VARIABLES (e.g., x1 x2 x3 x4) */ 
%let p=4;	* specify number of predictors (Xs);
%let r=100;   * specify number of bootstrap samples (enter zero for none);
*===============================================*;
*** USERS SHOULD NOT MODIFY BELOW THIS POINT ***;
*===============================================*;


title "You chose to use p=&p independent variables and &r bootstrap samples.";
title2 "Bootstrap sample 0 will refer to the parent sample.";
run;

***********  PARENT SAMPLE *****************;
data xydata;
 infile "&path\&datasetname";
 input &DEPVAR &INDEPVAR;
run;

/* proc print data=xydata; run; */
proc corr data=xydata;
run;

*** ----------------------------------------------- ***;
*** compute four fit measures for all subset models ***;
*** ----------------------------------------------- ***;
%include "&path\logisticDAmacros.sas";  * <-- macro path *;
%allsubsets(&indepvar, 0);
%ALLSUBSREG(0, xydata);



*** allfits and submodels for original (parent) sample ***;
/*
proc print data=allfits0; title 'Sample data'; run;
proc print data=submodels0; run;
*/

* macro to obtain number of observations;
%macro obsnvars(ds,nvarsp,nobsp);
   %global dset nvars nobs;
   %let dset=&ds;
   %let dsid = %sysfunc(open(&dset));
   %if &dsid %then
      %do;
         %let nobs =%sysfunc(attrn(&dsid,NOBS));
         %let nvars=%sysfunc(attrn(&dsid,NVARS));
         %let rc = %sysfunc(close(&dsid));
      %end;
   %else
      %put Open for data set &dset failed - %sysfunc(sysmsg());
%mend obsnvars;
%obsnvars(xydata,nvars,nobs)
*%put &dset has &nvars variable(s) and &nobs observation(s).;

data rsquares; set allfits0;
 *n = 296;
 *Nagelk = (1-(L0/LM)**(2/n))/(1-(L0)**(2/n));
 Nagelkerke = (1-(exp(LogL0-LogLM))**(2/&nobs))/( 1-exp((2/&nobs)*LogL0) );
 Estrella = 1 - (LogLM/LogL0)**((-2/&nobs)*LogL0);
 McFadden = 1- (LogLM/LogL0);
run;
/*
proc print data=rsquares; title 'Likelihood-based fit measures'; run;
*/
data corrfits; set corrfits0;
 corr = y; corr_sq = y**2;
run;
/* 
proc print data=corrfits; title 'Correlation-based fit measures'; run;
*/

data subsetfits;
 merge rsquares corrfits; *by subset;
proc print data=subsetfits; title 'All fit measures for parent sample'; run;
data summary0; merge subsetfits submodels0; 
run;
/*
proc print data=summary0; title 'All (parent) sample results: summary'; run;
*/

*** ----------------------------------------------- ***;
***   perform DA using four fit measures            ***;
*** ----------------------------------------------- ***;
*** call DA macro --> CHANGE TO PATH WHERE MACRO IS SAVED! ***;
*%include "&path\LogisticDAmacro.sas";  * <-- macro path *;
%logistic_da(Nagelkerke, 0);
%logistic_da(Estrella, 0);
%logistic_da(McFadden, 0);
%logistic_da(corr_sq, 0);

title;
run;


***********  BOOTSTRAP SAMPLES *****************;


**Selecting bootstrap samples;
%macro bootstr;
*** ----------------------------------------------- ***;
***           Generate bootstrap samples            ***;
*** ----------------------------------------------- ***;

%if &r > 0 %then %do;

** h is the bootstrap sample number ;
%do h = 1 %to &r ;
proc iml;
 start;
 print "now running bootstrap sample &h of &r samples..."; 
finish;
run;

filename outda "&path\logisticda.lst";
proc printto print=outda;
run;

** this do loop draws a sample of size nobs, with replacement ;
%do i = 1 %to &nobs ;

data b1; set xydata;
** Create a random number using time of day for the seed ;
ransamp = ranuni(-9) ;
** sort in random order ;
proc sort ; by ransamp ;
** take the first case of the randomly sorted data set ;
data boots ; set b1(obs=1) ; run ;
** Append the data to the sample -- ending with the number;
** of cases specified in the "i" loop above ;
** Assign the cases to a dataset that will change name with each
** iteration of the "h" loop above ;
proc append base = boots&h data = boots;
run;
%end;
/*proc print data=boots&h; title "boostrap data set &h"; */
*proc corr data=boots&h;
run;

*** ------------------------------------------------------------------- ***;
*** compute four fit measures for all subset models (bootstrap samples) ***;
*** ------------------------------------------------------------------- ***;

data boots&h; set boots&h; run; *active data set;
* compute four fit measures for all subset models;
*%include "&path\allsubsets.sas";  * <-- macro path *;
%allsubsets(&indepvar, &h);
%ALLSUBSREG(&h, boots&h);

/*
proc print data=allfits&h; title "results for bootstrap sample &h"; run;
proc print data=submodels&h; run;
*/

data rsquares&h; set allfits&h;
 Nagelkerke = (1-( exp( (2/&nobs)*(LogL0-LogLM) ) ))/(1-( exp( (2/&nobs)*LogL0 ) ));
 Estrella = 1 - (LogLM/LogL0)**((-2/&nobs)*LogL0);
 McFadden = 1- (LogLM/LogL0);
run;
/* proc print data=rsquares; title 'Likelihood-based fit measures (parent)'; run;*/

data corrfits&h; set corrfits&h;
 corr = y; corr_sq = y**2;
run;
/* proc print data=corrfits; title 'Correlation-based fit measures (parent)'; run; */

data subsetfits&h;
 merge rsquares&h corrfits&h; *by subset;
/* proc print data=subsetfits; title 'All fit measures (parent)'; run; */
data summary&h; merge subsetfits&h submodels&h; 
run;
/*
proc print data=summary&h; title "results (summary) for bootstrap sample &h"; run;
*/



*** ------------------------------------------------------------------ ***;
***       perform DA using all fit measures (bootstrap samples)        ***;
*** ------------------------------------------------------------------ ***;

*%include "&path\LogisticDAmacro.sas";    * <-- macro path *;
%logistic_da(Nagelkerke, &h);
%logistic_da(Estrella, &h);
%logistic_da(McFadden, &h);
%logistic_da(corr_sq, &h);

proc printto;

%end;
* ============ end of bootstraps loop ============*;

/*
proc print data=alldatable; title 'DA table, all measures'; run;
proc print data=allda; title 'DA averages, all measures'; run;
*/
* results for each dominance type merged across fit measures;
/*
proc print data=allcomp; title 'Dominance (Dij) results: summary over all measures'; 
title2 'Bootstrap sample 0 is the parent sample'; run;
proc print data=allcond;
proc print data=allgen;
run;
*/

*** ------------------------------------------------------------------ ***;
***       compute reproducibility and CIs for general DA               ***;
*** ------------------------------------------------------------------ ***;

****** percentile 95 percent confidence intervals for general dominance measures *******;
data genci; set allda;
 if in = 999; 		*extract general dominance measures;
 if sample > 0; 	*use only bootstrap samples;
 %do i=1 %to &p-1;
  %do j=&i+1 %to &p;
   gen&i&j = x&i-x&j;	* create Gij variable;
  %end;
 %end;
run;
/*
proc print data=genci; title 'genci data set'; run;
*/

data genci; set genci; 
proc sort; by measure; run;
%do i=1 %to &p-1;
 %do j=&i+1 %to &p;
  proc univariate data=genci noprint;
   var gen&i&j;
   output out=cigen&i&j mean=Mean_G&i&j std=StDev_G&i&j
     pctlpts=2.5, 97.5 pctlpre=G&i&j pctlname=LL_95CI UL_95CI;
   by measure;
  run;
  proc print data=cigen&i&j; 
   title "Percentile 95% CIs for G&i&j based on &r bootstrap samples"; 
   title2 'LL and UL refer to the lower and upper limits of the CI, respectively'; 
  run;
 %end;
%end;



****** Reproducibility for Dij dominance measures *******;
*complete;
proc sort data=allcomp; by measure; run;
proc transpose data=allcomp
     out=compda(rename=(col1=parent));
     by measure;
run;
data compagree; set compda;
 if _NAME_ ^= "fit";
 if _NAME_ ^= "sample";
run;
/*
proc print data=compagree; 
title 'Reproducibility proportion values (reprod) for complete dominance';
run;
*/
data compagree; set compagree;
 agree=0;
 %do k=2 %to &r+1;
  if parent = -1 and col&k = -1 then agree=agree+1;
  if parent = 1 and col&k = 1 then agree=agree+1;
  if parent ^= 1 and parent ^= -1 and col&k ^= 1 and col&k ^= -1 then agree=agree+1;
 %end;
 reprod=agree/&r;
 Dij = _NAME_;
run;
proc print data=compagree; 
 var measure Dij parent agree reprod; 
 title 'Reproducibility proportion values (reprod) for complete dominance';
run;


* conditional;
proc sort data=allcond; by measure; run;
proc transpose data=allcond
     out=condda(rename=(col1=parent));
     by measure;
run;
data condagree; set condda;
 if _NAME_ ^= "fit";
 if _NAME_ ^= "sample";
run;
/*
proc print data=condagree; 
title 'Reproducibility proportion values (reprod) for conditional dominance';
run;
*/
data condagree; set condagree;
 agree=0;
 %do k=2 %to &r+1;
  if parent = -1 and col&k = -1 then agree=agree+1;
  if parent = 1 and col&k = 1 then agree=agree+1;
  if parent ^= 1 and parent ^= -1 and col&k ^= 1 and col&k ^= -1 then agree=agree+1;
 %end;
 reprod=agree/&r;
 Dij = _NAME_;
run;
proc print data=condagree; 
 var measure Dij parent agree reprod; 
 title 'Reproducibility proportion values (reprod) for conditional dominance';
run;


*general;
proc sort data=allgen; by measure; run;
data allgen; set allgen; drop x1--x&p; run;
proc transpose data=allgen
     out=genda(rename=(col1=parent));
     by measure;
run;
data genagree; set genda;
 if _NAME_ ^= "fit";
 if _NAME_ ^= "sample";
run;
/*
proc print data=genagree; 
title 'Reproducibility proportion values (reprod) for general dominance';
run;
*/
data genagree; set genagree;
 agree=0;
 %do k=2 %to &r+1;
  if parent = -1 and col&k = -1 then agree=agree+1;
  if parent = 1 and col&k = 1 then agree=agree+1;
  if parent ^= 1 and parent ^= -1 and col&k ^= 1 and col&k ^= -1 then agree=agree+1;
 %end;
 reprod=agree/&r;
 Dij = _NAME_;
run;
proc print data=genagree; 
 var measure Dij parent agree reprod; 
 title 'Reproducibility proportion values (reprod) for general dominance';
run;

%end; *if loop;
%mend bootstr;
%bootstr;