/******************************************************************************
                    MV DOMINANCE ANALYSIS MACRO
                    
This macro executes dominance analysis as described by Razia Azen & David Budescu.  
Dominance analysis quantifies the importance of each predictor as its average increment 
to the model r-squared, across all possible submodel sizes.
This macro was written by Razia Azen with valuable contributions from Robert 
Ceurvorst.

NOTE:  This program is limited to at most 10 predictors!

-------------
INSTRUCTIONS:
-------------
1. In this macro, change the %macro dom line (which appears just below these 
instructions) according to the following directions:
%macro dom (data=_last_, dep=, indep=,
      noprint=0, short=0, predtype='type_of_predictors', 
	  seed=random_number);
      
data=      SAS dataset to be used.  Default is last dataset created.

noprint=1  Suppresses printing of the input dataset.

short=1    Suppresses listing of each predictor's contributions to individual 
           submodels.

predtype= 'f' if the predictors are fixed;     
          'r' if the predictors are random.  Default is 'r'.         

seed=     Six digits.  Default is 0 (uses clock value).    


2.  Specify the number of Y (dependent) variables, X (independent) variabls, 
and B (bootstrap samples) below (immediately following these instructions).


3. Save this macro, and add the following two lines (below) to the
SAS program in which you've read the data set to be analyzed:

%include 'k:\sasmacro\mvDAmacro.sas';  *** CHANGE TO PATH WHERE MACRO IS SAVED ***;
%dom;

3.  Run the program edited in step 3.

-- END OF INSTRUCTIONS --
******************************************************************************/

/********** CHANGE ONLY THE LINES BELOW (See steps 1 and 2 above)!!! **************/
%let q = 2;
%let p = 4;
%let B = 1;
%macro dom (data=_last_, dep=, indep=, noprint=0, short=0, predtype='r', seed=0);
/**************************************************************************/

option nosource;

/*
* writes log file to the hard drive;
filename logKR "c:\Documents and Settings\Razia Azen\My Documents\Research\DA\MVDA\KRudolph.log";
proc printto log=KRlog;
run;

* writes lst file to the hard drive;
filename lstKR "c:/my files/papers/paper2/KRudolph.lst";
proc printto print=KRoutput;
run;
*/

%if &indep= and &p= %then %do;
  %put YOU MUST SPECIFY INDEP=list of predictors OR P=no. of predictors (IF THEY ARE NAMED X1-Xp).;
  %goto done;
%end;

%else %if &indep ne %then %do;
  %if %index(&indep,%str(%'))>0 or %index(&indep,%str(%"))>0 
      %then %let indep = %substr(&indep,2,%length(&indep)-2);
  %if %index(&indep,-) > 0 %then %expand;
  %let p=1; 
  %do %while(%length(%scan(&indep,&p))>0);
      %let x&p = %scan(&indep,&p);
      %let p = %eval(&p+1);
  %end;
  %let p = %eval(&p-1);
%end;

%else %if &p > 0 %then %do; 
  %let indep=;
  %do i=1 %to &p; 
      %let indep=&indep x&i;
      %let x&i=x&i;
  %end;  
%end;

%if &dep= and &q= %then %do;
  %put YOU MUST SPECIFY DEP=no. of criteria (IF THEY ARE NAMED Y1-Yq).;
  %goto done;
%end;
%else %if &dep ne %then %do;
  %if %index(&dep,%str(%'))>0 or %index(&dep,%str(%"))>0 
      %then %let dep = %substr(&dep,2,%length(&dep)-2);
  %if %index(&dep,-) > 0 %then %expand;
  %let q=1; 
  %do %while(%length(%scan(&dep,&q))>0);
      %let y&q = %scan(&dep,&q);
      %let q = %eval(&q+1);
  %end;
  %let q = %eval(&q-1);
%end;
%else %if &q > 0 %then %do; 
  %let dep=;
  %do i=1 %to &q; 
      %let dep=&dep y&i;
      %let y&i=y&i;
  %end;  
%end;

%if &p>10 %then %do;
  %put  THE MAXIMUM NUMBER OF PREDICTORS IS 10.  YOU HAVE &P..;
  %goto done;
%end;
%if &q>&p %then %do;
  %put  THE NUMBER OF PREDICTORS MUST BE LESS THAN THE NUMBER OF CRITERIA.;
  %goto done;
%end;

%LET DATANAME=&SYSLAST;
option nonotes;
DATA _NULL_;  FILE PRINT LINESLEFT=WITH;  CALL SYMPUT('WITH',WITH);
DATA _NULL_;
  FILE PRINT NOTITLES LINESLEFT=WITHOUT;
  CALL SYMPUT ('MTITLE',TRIM(LEFT(WITHOUT-&WITH+2)));
  CALL SYMPUT('NMODELS', 2**&p -1);
RUN;

data original; set &data;
* eliminate cases with relevant missing values;
 missing = 0;
 %do i=1 %to &q;
  if y&i = . then missing = 1;
 %end;
 %do i=1 %to &p;
  if x&i = . then missing = 1;
 %end;
 if missing = 0;
 keep &dep &indep;
run;

%if &noprint=0 %then %do;
proc print;
title&mtitle 'Input data set: complete cases and relevant variables only'; run;
%end;


TITLE&MTITLE;
%DONE: option notes _last_=&syslast;
run;

/*
data labels; set;  
  keep &indep &dep; 
  length dvlabel $40;
  call label(&dep,dvlabel);
  call symput('dvl',trim(dvlabel));
  if _n_=1 then stop;  
run;   
*/

/*********************************************************
     PART 1:  regression/DA of the original data 
*********************************************************/
proc corr data=original;
 var &dep &indep;
run;
proc reg data=original;
 model y1--y&q = x1--x&p/adjrsq selection=adjrsq cp aic;
 title 'regression results for original sample';
run;

option notes;
* create regression output to use for ordering models;
proc reg corr data=original outest=onereg (keep=_in_ _rsq_ &indep) noprint;
  model y1=&indep / stb pcorr2 scorr2 ;
  model y1=&indep / selection=adjrsq best=&nmodels %if &short=1 %then noprint;;
run; quit; option nonotes;

%macro subsets;
data dom;
  set onereg;
  model=0;
  if %substr(&sysver,1,1)>7 then if _n_>1;
  %do i=1 %to &p;
   if x&i=. then x&i=0; else x&i=1;
   model=model+x&i*&i*10**(&p-&i);
  %end;
  if _IN_=. then delete;
  keep x1--x&p _IN_ _RSQ_ model;
  proc sort;
   by _IN_ descending model;
 run;
run;
%mend;
%subsets;
/* proc print; title 'R-squared values for all subset models'; run; */


* Macro to obtain univariate model fits;
%macro uvfits(t, data, type);
title;
data sample; set &data;

proc IML;
reset noprint nolog;
start;
depvar = &t;
*print depvar;
* Step 1. read in the data to an IML matrix and define Y and X *;
*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*; 


 * read in the data file (into DATA);
 use sample;
  read all into DATA;
 close sample;

 * read in the binary matrix (into alldom);
 * this matrix contains 1/0 for each predictor and in (number in model)
 	for all subset models;
 use dom;
  read all into allDOM;
 close dom;
 *print AllDOM;

 n=nrow(DATA);
 p = &p;
 *p=ncol(alldom)-1;
 *print p;
 ncol=ncol(alldom);
 null=J(1,ncol,0);
 alldom=null//alldom;
 dom1=alldom[,1:p]; dom2=alldom[,p+1]; dom=dom2||dom1;
 *print dom; 
 *dom represents the subset models: number of predictors in the first column and the
 included predictors (1/0) in columns 2--p;
 *dom=dom[,2:ncol];
 r=ncol(DATA);
 q = &q;
 *print q;

*!!!!!!!!!!!!!!!!  IMPORTANT !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*; 
*  if necessary, arrange data in the order Y1, ..., Yq, X1, ..., Xp  *;
*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*;
/*DataY=DATA[,p+1:p+q]; DataX=DATA[,1:p];*/
/*DATA=DataY||DataX; print 'data arranged Y1-Yq,X1-Xp'; print Data;*/


* Step 2. compute fits (e.g. r2) vector for the origina sample (all subsets) *;
*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*;

 * to check data is read in correctly, obtain r2 for each subset model;
 Rsqy&t=J(2**p,1,99);

* define Y;
Y&t = Data[,&t];
YCheck&t = Y&t[1:5,];
*print YCheck&t;

* define X for each subset model (i.e., subset of the DATA matrix);
do i=1 to 2**p;

 varsin=dom2(|i,|);
 ONE=J(n,1,1);
 if varsin > 0 then X=J(n,varsin,0);
 k=0;
 do j=1 to p;
  if dom(|i,j+1|)=1 then do;
   k=k+1;
   X(|,k|)=Data[,q+j];
  end;
 end;
 if varsin=0 then X=ONE;
 *print i X;


 * Obtain covariance matrices (for X and Y);
 In=I(n);
 SXX=(1/(n-1))*X`*(In-(1/n)*ONE*ONE`)*X;
 *print SXX;
 SYY&t=(1/(n-1))*Y&t`*(In-(1/n)*ONE*ONE`)*Y&t;
 *print SYY;
 *SY2Y2=(1/(n-1))*Y2`*(In-(1/n)*ONE*ONE`)*Y2;

 * Obtain residual covariance matrices;

 if varsin ^= 0 then intX=ONE||X; *X with intercept variable;
 else intX=X;
 BX&t=inv(intX`*intX)*intX`*Y&t;  *predicting Yt from X;
 ResY&t=Y&t-intX*BX&t;
 SResY&t=(1/(n-1))*ResY&t`*(In-(1/n)*ONE*ONE`)*ResY&t;
 *print SResY;
 * SResY is the cov matrix of Y1 after using X as predictors;
 ResY&t=Y&t-intX*BX&t;
 SResY&t=(1/(n-1))*ResY&t`*(In-(1/n)*ONE*ONE`)*ResY&t;


 * Obtain MV R-squared measure;
 Rsq1=1-(det(SResY&t)/det(SYY&t));
 *Rsq2=1-(det(SResY2)/det(SY2Y2));
 *Rsq2=1-(det(SResX)/det(SXX));
 *print Rsq1; *print Rsq2;

 Rsqy&t(|i,|)=Rsq1;
 *Rsqy2(|i,|)=Rsq2;
end;

uvfits=dom||rsqy&t;
/*
print "model fit values in the original sample using criterion y&t";
print uvfits;*[colname={in x1 x2 x3 y1r2 y2r2}];
*/
  create uv&type&t from uvfits;*[colname=cols];
   append from uvfits;
  close uv&type&t;

finish;
run; quit;
/*
proc print data=uvtable&t;
 title "model fit values in the original sample using criterion y&t";
run;
*/

%mend uvfits;

* Obtain univariate model fits for the original data;
%macro uvda;
%do k = 1 %to &q;
  %uvfits(&k, original, raw); 
%end;
%mend;
%uvda;


* Macro to obtain multivariate model fits;

/****************************************************************/
/************************** MVDA Macro **************************/
/****************************************************************/

%macro mvda(data, mvfit, type, p);

proc IML;
reset noprint nolog;
start;

/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ 
/* Step 1. read in the data to an IML matrix and define Y and X */
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ 

* read in the data file (into DATA);
 use original;
  read all into DATA;
 close original;
DataCheck = DATA[1:5,];


 * read in the binary matrix (into alldom);
 * this matrix contains 1/0 for each predictor and in (number in model)
 	for all subset models;
 use dom;
 read all into allDOM;
 close dom;
 *print AllDOM;

 n=nrow(DATA);
 p = &p;
 *p=ncol(alldom)-1;
 ncol=ncol(alldom);
 null=J(1,ncol,0);
 alldom=null//alldom;
 dom1=alldom[,1:p]; dom2=alldom[,p+1]; dom=dom2||dom1;
 *dom represents the subset models: number of predictors in the first column and the
 included predictors (1/0) in columns 2--p;
 *print dom; 
 r=ncol(DATA);
 q=r-p;
 *print q p;

*!!!!!!!!!!!!!!!!  IMPORTANT !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*; 
*  if necessary, arrange data in the order Y1, ..., Yq, X1, ..., Xp  *;
*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*;
/*
DataY=DATA[,p+1:p+q]; DataX=DATA[,1:p];
DATA=DataY||DataX; print 'data arranged Y1-Yq,X1-Xp'; print Data;
*/

/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/* Step 2. compute fits (e.g. mvr2) vector for the original	*/ 
/* 		sample (all subsets) 				*/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/

 * to check data is read in correctly, obtain r2 for each subset model;
 R2=J(2**p,1,99);
 P2=J(2**p,1,99);

* define Y;
Y = Data[,1:q];
*YCheck = Y[1:5,];
*print YCheck;
*Y1=Data[,1];
*Y2=Data[,2];
*print Y1 Y2;
*Y=Y1;
*Y=Y2;

* define X for each subset model (i.e., subset of the DATA matrix);
do i=1 to 2**p; *loop through rows of dom (subset models);

 varsin=dom2(|i,|); *number of predictors in subset model;
 *print varsin;
 ONE=J(n,1,1);
 if varsin > 0 then X=J(n,varsin,0); *define X matrix;
 k=0;
 do j=1 to p; *loop through p predictors;
  if dom(|i,j+1|)=1 then do; *check if predictor is included in subset model;
   k=k+1;
   X(|,k|)=Data[,q+j]; *assign included predictors data to X;
  end;
 end;
 if varsin=0 then X=ONE;
/*
 XCheck = X[1:5,];
 print i XCheck;
*/

* Obtain covariance matrices (for X and Y);
In=I(n);
SXX=(1/(n-1))*X`*(In-(1/n)*ONE*ONE`)*X;
*print SXX;
SYY=(1/(n-1))*Y`*(In-(1/n)*ONE*ONE`)*Y;
*print SYY;


* Obtain residual covariance matrices;

if varsin = 0 then intX=X;
else intX=ONE||X; *X with intercept variable; 
BX=inv(intX`*intX)*intX`*Y;  *predicting Y from X;
ResY=Y-intX*BX;
SResY=(1/(n-1))*ResY`*(In-(1/n)*ONE*ONE`)*ResY;
*print SResY;
* SResY is the cov matrix of Y after using X as preditors;

intY=ONE||Y; *Y with intercept;
BY=inv(intY`*intY)*intY`*X;  *predicting X from Y;
ResX=X-intY*BY;
SResX=(1/(n-1))*ResX`*(In-(1/n)*ONE*ONE`)*ResX;
* SResX is the cov matrix of X after using Y as predictors;
*print SResX;

* Obtain MV R-squared measure;
Rsq1=1-(det(SResY)/det(SYY));
Rsq2=1-(det(SResX)/det(SXX));
*print Rsq1; 
*print Rsq2;
* Obtain MV P-squared measure;
Psq=(q - trace(inv(SYY)*SResY))/q;
*print Psq;

R2(|i,|)=Rsq1;
P2(|i,|)=Psq;
end;
mvrfits=dom||R2;
mvpfits=dom||P2;
/*
print 'MV R-squared values in the original sample';
print mvrfits; *[colname={in x1 x2 x3 mvr2}];
print 'MV P-squared values in the original sample';
print mvpfits; *[colname={in x1 x2 x3 mvr2}];
*/
  create mvr&type from mvrfits;*[colname=cols];
   append from mvrfits;
  close mvr&type;
  create mvp&type from mvpfits;*[colname=cols];
   append from mvpfits;
  close mvp&type;

finish;
run;


/*
  %let indep=;
  %do i=1 %to &p; 
      %let indep=&indep x&i;
      %let x&i=x&i;
  %end;  

%if &p>10 %then %do;
  %put  THE MAXIMUM NUMBER OF PREDICTORS IS 10.  YOU HAVE &P..;
  %goto done;
%end;

%LET DATANAME=&SYSLAST;
option nonotes;
DATA _NULL_;  FILE PRINT LINESLEFT=WITH;  CALL SYMPUT('WITH',WITH);
DATA _NULL_;
  FILE PRINT NOTITLES LINESLEFT=WITHOUT;
  CALL SYMPUT ('MTITLE',TRIM(LEFT(WITHOUT-&WITH+2)));
  CALL SYMPUT('NMODELS', 2**&p -1);
RUN;
*/

%mend mvda;

* Obtain multivariate model fits for the original data;
%mvda(original, mvp, raw, &p);
%mvda(original, mvr, raw, &p);
proc print data=mvrraw;
 title 'model fit values in the original sample using mvr-squared';
run;

proc print data=mvpraw;
 title 'model fit values in the original sample using mvp-squared';
run;


* Macro to perfrom DA on the appropriate table;
%macro da(table, fit);

data table; set &table;

proc IML;
 start;

 use table;
  read all into domin;
 close table;

 p = &p;
*print domin;
 * dom is rearranged to contain, for each subset model, the number of predictors
  in and the r2 of the model, followed by 1/0 values for predictors in/out;
  dom1=domin[,1];  dom2=domin[,2:ncol(domin)-1];  dom3=domin[,ncol(domin)];
  dom=dom1||dom3||dom2;  

domin2 = dom2||dom1||dom3; *to be used for dominance matrices;
  create dommat from domin2;
   append from domin2;
  close dommat;
free dom1 dom2 dom3;
*print dom;  
* generate table of additional contributions; 
  /*
   null=J(1,ncol(dom),0);
   dom=null//dom;
     */
nrow=nrow(dom);
* dom contains a top row of zeros for the null model;
   full=J(1,p,99);  fullrsq=J(1,1,99);
   reduced=J(1,p,99);  redrsq=J(1,1,99);
   contrib=J(nrow,p,0);  * additional contributions matrix;

    do i=1 to nrow;      * for each model;
     do j=1 to p;        * for each predictor;

       if dom(|i,j+2|)=0 then do; * if predictor is not in subset model;
        reduced=dom[i,3:ncol(dom)];    * the 1/0 row represents the reduced model;
        full=reduced;             * the full model is same as reduced model;

         do k=1 to p;          
          if k=j then full(|1,k|)=1;   * add the jth predictor to the full model;
         end;    

         do r=1 to nrow;               * for each model;
          comp=dom[r,3:ncol(dom)];          * comp is the 1/0 row of dom;
          if comp=full then fullrsq=dom[r,2];    * r2 of row is fullrsq, or; 
          if comp=reduced then redrsq=dom[r,2];  * r2 of row is redrsq;
         end;

         contrib(|i,j|)=fullrsq-redrsq;          * contrib is r2 difference;
       end;

       else do;
        contrib(|i,j|)=.;         * if predictor is in model, contib is .;
       end;

     end;
    end;

   contrib=dom||contrib;
   contrib=contrib[1:nrow,];
*print contrib;

  cols = {IN RSQ %do i=1 %to &p; X&i %end; %do i=1 %to &p; CP&i %end;};

  create rsqtable from contrib[colname=cols];
   append from contrib;
  close rsqtable;


finish;
run; quit;

option nonotes;

 proc print data=rsqtable;  id IN RSQ;  format RSQ CP1-CP&p 5.4; 
 *title&mtitle 'Dominance Table:  Additional Contributions of Predictors Across All Subset Regression Models';
 *%UNQUOTE(TITLE%EVAL(&MTITLE+1)) 'CPi indicates the additional contribution of predictor i to the model r-square';
 title "Dominance Table:  Additional Contributions using &fit";

proc summary nway; var CP1--CP&p; class in;
 output out=avgcont (drop=_type_) mean=&indep;
run;
proc print data=avgcont; title "mean contribution by model size (conditional dominance) using &fit";

proc means noprint; var &indep; 
 output out=meanc mean=&indep;
run;
proc print data=meanc(drop = _TYPE_); title "mean contribution overall (general dominance) using &fit";
/*
data avgcont; set labels meanc (in=y) avgcont;
  if y then in=999;
proc print double;  id IN;  var &indep;  
  format &indep 5.4  IN %if &p<11 %then 1.0; %else 2.0;;
title&mtitle 'Dominance Analysis:  Overall Average Contributions of Predictors (First Row)';
%UNQUOTE(TITLE%EVAL(&MTITLE+1)) 'And Average Contributions to Models of Each Size (Remaining Rows)'; run;
proc transpose prefix=size out=meanc (rename=(SIZE999=OVERALL _NAME_=VAR)); id IN; var &indep; run;
proc sort; by descending overall;
title&mtitle 'Dominance Analysis:  Average Predictor Contributions Overall';
proc print; id _character_; format _numeric_ 5.4; run;
*/


*TITLE&MTITLE;
%DONE: option notes _last_=&syslast;
run;


/**********************************************
          DOMINANCE MATRICES (Sample)  
**********************************************/

Proc IML;
start;
use dommat;
 read all into damat;
close dommat;
* damat is a matrix that consists of p+2 columns: 1/0 for x1 - xp, in, Rsq;
* the null model should not be included;
damat = damat[2:nrow(damat),];

*print damat;
** sample dominance matrices;
Dc&fit=J(&p,&p,0); Da&fit=J(&p,&p,0); Dg&fit=J(&p,&p,0);
RSQ=damat[,ncol(damat)]; *rsq vector (without null model);

/***************************************************/
/*****           Complete dominance		        ****/
/***************************************************/
do i=1 to &p-1;
 do j=i+1 to &p;

 * define matrix of constants ("comp") to determine complete dominance between
 each pair of predictors;

        comp=J(2**(&p-2),2**&p-1,0);

        * Xh is any subset model that does not include i and j;
        * Xh contains the columns of damat that do not involve i or j;
        Xh=J(nrow(damat),&p-2,99);
		Xhcol=0;			* index column number in Xh;
		do h=1 to &p;
		 if (i ^= h & j ^= h) then do;	* find non i,j columns;
		  Xhcol=Xhcol+1;				* update column number in Xh;
		  Xh[,Xhcol] = damat[,h];		* assign column to Xh;
		 end;
		end;

		* contrast rows (subsets) representing XiXh and XjXh;
        comprow=1;
        do r=1 to 2**&p-2;               * for each pair of rows;
         do s=r+1 to 2**&p-1;
          if Xh[r,]=Xh[s,] then do;     * if the rows of Xh are the same;
                                        * and if i and j are contrasted;
           if (damat[r,i]=1 & damat[r,j]=0 & damat[s,i]=0 &
                damat[s,j]=1) then do;
                 comp[comprow,r]=1;
                 comp[comprow,s]=-1;
                 comprow=comprow+1;
           end;
          end;

         end;   * do r loop;
        end;    * do s loop;


*** Determine the complete Dij value ***;

  cdiffij=comp*RSQ;

  zero=J(nrow(cdiffij), ncol(cdiffij),0);
  ijdom=nrow(cdiffij);

  * obtain complete dominance matrices;
     * undetermined case (all differences are zero);
     if cdiffij=zero then do;
        Dc&fit[i,j]=0.5;
        Dc&fit[j,i]=Dc&fit[i,j];
     end;
     * else, check signs of difference elements;
     if cdiffij ^= zero then do;
      nonneg=0; nonpos=0;
      do k=1 to nrow(cdiffij);
       if cdiffij[k,]>=0 then nonneg=nonneg+1;
       if cdiffij[k,]<=0 then nonpos=nonpos+1;
      end;
      * dominance case;
      if nonneg=ijdom then Dc&fit[i,j]=1; else Dc&fit[i,j]=0;
      if nonpos=ijdom then Dc&fit[j,i]=1; else Dc&fit[j,i]=0;
      * undetermined case (differences have different signs);
      if (Dc&fit[i,j]=0 & Dc&fit[j,i]=0) then do;
        Dc&fit[i,j]=0.5;
        Dc&fit[j,i]=Dc&fit[i,j];
      end;
     end;


/***************************************************/
/*****     Conditional dominance                ****/
/***************************************************/
* define matrix of constants ("avg") to determine average (within model
size) dominance between each pair of predictors;

subsets=damat;
* subsets is a matrix that consists of p+2 columns (and 
the null model in the top row): 1/0 for x1 - xp, in, Rsq;

  avg=J(&p,2**&p-1,0);
  do c=1 to &p;         * for each model size;
   num=0;               * index number of subsets per size;
   do m=1 to 2**&p-1;	* for each model, determine number of models
   						of size is c-1 to which the ith predictor makes
   						an additional contribution;
	if (subsets[m,&p+1]=c-1 & subsets[m,i]=0) then num=num+1;
   end;
   if num=0 then num=1;	* for the case of the null model (size=0);
   do r=1 to 2**&p-1;    * determine subsets to contrast;
    * consider additional contributions for model size (c);
    if (subsets[r,&p+1]=c-1 | subsets[r,&p+1]=c) then do;
     * average contribution of models that inlcude i but not j;
     if (subsets[r,i]=1 & subsets[r,j]=0) then avg[c,r]=1/num;
     * average contribution of models that inlcude j but not i;
     if (subsets[r,i]=0 & subsets[r,j]=1) then avg[c,r]=-1/num;
    end;
   end;
  end;

*** Determine the average Dij value ***;

  adiffij=avg*RSQ;

  zero=J(nrow(adiffij), ncol(adiffij),0);
  ijdom=nrow(adiffij);

  * obtain average dominance matrices;
     * undetermined case (all differences are zero);
     if adiffij=zero then do;
        Da&fit[i,j]=0.5;
        Da&fit[j,i]=Da&fit[i,j];
     end;
     * else, check signs of difference elements;
     if adiffij ^= zero then do;
      nonneg=0; nonpos=0;
      do k=1 to nrow(adiffij);
       if adiffij[k,]>=0 then nonneg=nonneg+1;
       if adiffij[k,]<=0 then nonpos=nonpos+1;
      end;
      * dominance case;
      if nonneg=ijdom then Da&fit[i,j]=1; else Da&fit[i,j]=0;
      if nonpos=ijdom then Da&fit[j,i]=1; else Da&fit[j,i]=0;
      * undetermined case (differences have different signs);
      if (Da&fit[i,j]=0 & Da&fit[j,i]=0) then do;
        Da&fit[i,j]=0.5;
        Da&fit[j,i]=Da&fit[i,j];
      end;
     end;


/***************************************************/
/*****         General dominance                ****/
/***************************************************/
* define matrix of constants ("glob") to determine global (overall average)
dominance between each pair of predictors;

  glob=J(1,2**&p-1,99);
  do g=1 to 2**&p-1;
   glob[,g]=avg[+,g]/&p;
  end;

*** Determine the global Dij value ***;

  gdiffij=glob*RSQ;

  zero=J(nrow(gdiffij), ncol(gdiffij),0);
  ijdom=nrow(gdiffij);

  * obtain global dominance matrices;
     * undetermined case (all differences are zero);
     if gdiffij=zero then do;
        Dg&fit[i,j]=0.5;
        Dg&fit[j,i]=Dg&fit[i,j];
     end;
     * else, check signs of difference elements;
     if gdiffij ^= zero then do;
      nonneg=0; nonpos=0;
      do k=1 to nrow(gdiffij);
       if gdiffij[k,]>=0 then nonneg=nonneg+1;
       if gdiffij[k,]<=0 then nonpos=nonpos+1;
      end;
      * dominance case;
      if nonneg=ijdom then Dg&fit[i,j]=1; else Dg&fit[i,j]=0;
      if nonpos=ijdom then Dg&fit[j,i]=1; else Dg&fit[j,i]=0;
      * undetermined case (differences have different signs);
      if (Dg&fit[i,j]=0 & Dg&fit[j,i]=0) then do;
        Dg&fit[i,j]=0.5;
        Dg&fit[j,i]=Dg&fit[i,j];
      end;
     end;


end; *(i loop);
end; *(j loop);

print 'complete dominance matrix'; print Dc&fit;
print 'conditional dominance matrix'; print Da&fit;
print 'general dominance matrix'; print Dg&fit;

create Dc&fit from Dc&fit;
 append from Dc&fit;
close Dc&fit;
create Da&fit from Da&fit;
 append from Da&fit;
close Da&fit;
create Dg&fit from Dg&fit;
 append from Dg&fit;
close Dg&fit;

* create a file with sample dominance values;
* variables: i, j, cell value;
Sample_dc = J(&p*(&p-1), 3, 0); Sample_da = J(&p*(&p-1), 3, 0);
Sample_dg = J(&p*(&p-1), 3, 0); row=0;
do i=1 to nrow(Dc&fit);
 do j=1 to ncol(Dc&fit);
  if i ^= j then do;
   row=row+1;
   Sample_dc[row,1]=i; Sample_dc[row,2]=j; Sample_dc[row,3]=Dc&fit[i,j]; 
   Sample_da[row,1]=i; Sample_da[row,2]=j; Sample_da[row,3]=Da&fit[i,j]; 
   Sample_dg[row,1]=i; Sample_dg[row,2]=j; Sample_dg[row,3]=Dg&fit[i,j]; 
  end;
 end;
end;
create sample_dc&fit from Sample_dc[colname={i j dij}];
 append from Sample_dc;
close sample_dc&fit;
create sample_da&fit from Sample_da[colname={i j dij}];
 append from Sample_da;
close sample_da&fit;
create sample_dg&fit from Sample_dg[colname={i j dij}];
 append from Sample_dg;
close sample_dg&fit;


finish;
run;

%mend da;

* Obtain DA for the original data;
%macro uv;
%do k = 1 %to &q;
  %da(uvraw&k, rsqy&k);
%end;
%mend;
%uv;

%da(mvrraw, mvrsq);
%da(mvpraw, mvpsq);


/*************************************************************************
* 		PART 2:  Bootstrapping Loop
**************************************************************************/
* input:  data set containing Y, data set containing X;
data depvar;
 set original;
 keep &dep;
run;

data indepvar;
 set original;
 keep &indep;
run;

* steps:  1. resample, 2. compute r-squared vector, 3. repeat B times;
option ls=70 nonotes nosource nosource2;
proc IML;
reset noprint; * nolog;
start;

*A. Generate a vector of B seeds (seedvec);
/***********************************************************************/
/* The vector seedvec contains B random seeds from a uniform           */
/* distribution, generated using the initial value (seed) specified    */
/* above.  The vector is then saved to an external file, seeds,        */
/* which is subsequently read into the SAS dataset (seeds).            */
/***********************************************************************/ 

seedvec=J(&B,1,0);
 do i=1 to &B;
   seedvec(|i,|) = uniform(&seed)*100000;
 end;

create seeds from seedvec;
 append from seedvec;
close seeds;

finish;
run; quit;


/*==================================================================*/
/*     start 1 to B loop                      			      		*/
/*==================================================================*/

%do r = 1 %to &B;

proc IML;
reset noprint; * nolog;
start;
* obtain Aij and Dij for each ij pair;

* read in the original data set (into DATA);
 use depvar;
  read all into YData;
 close depvar;
 use indepvar;
  read all into XData;
 close indepvar;
DATA = YData || XData;
Y = YData;
X = XData;

* read in the seeds vector (into seedvec);
use seeds;
 read all into seedvec;
close seeds;

 n=nrow(DATA);
 r=ncol(DATA);

*** check input;
if &r=1 then do;
 print 'Check that the data were read properly!!!';
 first5Y = J(1,5,0); first5Y=Y[1:5,];
 first5X = J(&p,5,0); first5X=X[1:5,];
 print 'The number of observations used is:' n;
 print 'The total number of variables used is:' r;
 print 'The dependent values for the first five cases are:' first5Y;
 print 'The predictor values for the first five cases are:' first5X;
 print 'You requested' &B 'bootstrap samples';
 print 'The bootstrap procedure is now running... PLEASE WAIT...';
end;
print 'Now running resample' &r;


/***********************************************************************/
/* For bootstrapping run i, the i-th entry of the vector seedvec       */
/* (resample) is used to generate a vector (integers) whose entries    */
/* consist of the integers 1 to n sampled randomly, with replacement,  */
/* from a uniform distribution.                                        */
/***********************************************************************/      
 integers=J(n,1,0);
 
 * Specify seed for resampling (resample);
 resample=J(1,1,0);
 resample=seedvec(|&r,|);
 
 * generate a vector (integers) containing the integers 1 to n sampled
 randomly, with replacement, from a uniform distribution whose initial seed is
 resample;
 Do i=1 to n;
  integers(|i|)=int(uniform(resample)*(n-1)+1); 
 end; 
*print &r integers;

%if &predtype='r' %then %do;  
/*=====================================================================*/
/***                 PAIR RESAMPLING BOOTSTRAP                       ***/
/*=====================================================================*/
/***********************************************************************/
/* The i-th entry in integers is an integer (j) between 1 and n which  */
/* indicates the entry (row) number of the data matrix to be taken     */
/* as the i-th entry (row) in the matrix obsboot.  This is repeated    */
/* for i = 1 to n.  						       */
/***********************************************************************/    
 * obsboot is a matrix of randomly sampled observations;
 obsboot=J(n,r,0);
 Do i=1 to n;
  j=integers(|i, |);
  obsboot(|i, |)=DATA(|j, |); 
 end;

 * convert bootstrapped data to external file;
 ***************************************************;
create bootdata from obsboot[colname={&dep &indep}];
 append from obsboot;
close bootdata;


%end;
/*===================== END of Pair Resampling =======================*/


%if &predtype='f' %then %do; 
/*=====================================================================*/
/***                RESIDUAL RESAMPLING BOOTSTRAP                    ***/
/*=====================================================================*/
/***********************************************************************/
/* From the matrix DATA, whose first column represents Y and the       */
/* remaining columns represent X (as specified above), the vector of   */
/* Least-squares predicted values, YHat, and the vector of the         */
/* Least Squares residuals, Res, are computed.                         */
/***********************************************************************/

* fit a model with an intercept to obtain YHat and Res;
 ONE=J(n,1,1);
 X1=ONE||X;
 YHat=X1*INV(X1`*X1)*X1`*Y;
 Res=YHat-Y;


/***********************************************************************/
/* The i-th entry in integers is an integer between 1 and n (j) which  */
/* indicates the entry (row) number of the residuals vector, Res,      */
/* to be added to the i-th predicted value, Y-Hat.  This is repeated   */
/* for i = 1 to n.  The vector Yboot consists of the initial predicted */
/* values (YHat) with the randomly selected residuals added.  The      */
/* matrix Fixdata consists of Yboot and the original X matrix.         */
/***********************************************************************/      

 * Resboot is a vector of randomly sampled residuals;
 Resboot=J(n,1,0);
 Do i=1 to n;
  j=integers(|i, |);
  Resboot(|i, |)=Res(|j, |); 
 end;

 * add random residuals to Y-hat;
 Yboot=YHat+Resboot; 

 * an n by r matrix of bootstrapped Y and fixed Xs;
 Fixdata=Yboot||X;       

 * convert bootstrapped data to external file;
 ***************************************************;
create bootdata from Fixdata[colname={&dep &indep}];
 append from obsboot;
close Fixdata;

%end;
/*=================== END of Residual Resampling =====================*/

finish; 
run; quit;

* Obtain univariate model fits for the resample;
%macro uvda;
%do k = 1 %to &q;
  %uvfits(&k, bootdata, boot); 
%end;
%mend;
%uvda;

* Obtain multivariate model fits for the resample;
%mvda(bootdata, mvpsq, boot, &p);
%mvda(bootdata, mvrsq, boot, &p);

* Macro to obtain DA for the resample;
%macro bootda(fitstable, fit);

/* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/* 		Computing Dij		               */
/* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/

* regression of bootstrapped data;
 ***********************************;

data resample;                             
set bootdata;
run;

proc reg corr data=resample noprint outest=regfile (keep=_in_ _rsq_ &indep);
  model y1=&indep / selection=adjrsq;
run; quit; option nonotes;

* order all subset models in lexicographical order;
 data bootmat;
  set regfile;
  *%if %substr(&sysver,1,1)=8 %then if _n_>1;;
  %do i=1 %to &p;
    &&x&i=(&&x&i>.); 
  %end;
  if _IN_=. then delete;
  keep &indep _IN_ _RSQ_;
 run;
proc sort; by _IN_ %do i=1 %to &p; descending &&x&i %end;; run;


* obtain Aij and Dij for each ij pair;
proc IML;
reset noprint; * nolog;
start;
/*************************************************************************
* 		COMPUTE DOMINANCE MATRICES (resample)
**************************************************************************/
use bootmat;
 read all into damat;
close bootmat;
* damat is a matrix that consists of p+2 columns: 1/0 for x1 - xp, in, Rsq;

use Dc&fit;
 read all into sampDc&fit;
close Dc&fit;
use Da&fit;
 read all into sampDa&fit;
close Da&fit;
use Dg&fit;
 read all into sampDg&fit;
close Dg&fit;

* at first run, initialize frequency matrices and sample pattern counter;
if &r =1 then do;
 fDc1&fit=J(&p, &p, 0); fDc0&fit=J(&p, &p, 0); fDcn&fit=J(&p, &p, 0); 
 fDa1&fit=J(&p, &p, 0); fDa0&fit=J(&p, &p, 0); fDan&fit=J(&p, &p, 0);
 fDg1&fit=J(&p, &p, 0); fDg0&fit=J(&p, &p, 0); fDgn&fit=J(&p, &p, 0);
 patcount=J(1,3,0); *columns correspond to dominance types;
end;

* at subsequent runs retrieve files as sum matrices;
if &r ^= 1 then do;
use sumDc1&fit;
 read all into fDc1&fit;
close sumDc1&fit;
use sumDc0&fit;
 read all into fDc0&fit;
close sumDc0&fit;
use sumDcn&fit;
 read all into fDcn&fit;
close sumDcn&fit;

use sumDa1&fit;
 read all into fDa1&fit;
close sumDa1&fit;
use sumDa0&fit;
 read all into fDa0&fit;
close sumDa0&fit;
use sumDan&fit;
 read all into fDan&fit;
close sumDan&fit;

use sumDg1&fit;
 read all into fDg1&fit;
close sumDg1&fit;
use sumDg0&fit;
 read all into fDg0&fit;
close sumDg0&fit;
use sumDgn&fit;
 read all into fDgn&fit;
close sumDgn&fit;

use patfreq;
 read all into patcount;
close patfreq;

end;

* r-squared vector is the last column of damat (without null model);
RSQ=damat[,ncol(damat)];
*print &r RSQ;

Dc=J(&p,&p,0); Da=J(&p,&p,0); Dg=J(&p,&p,0);


/***************************************************/
/*****         Complete dominance		****/
/***************************************************/
do i=1 to &p-1;
 do j=i+1 to &p;

 * define matrix of constants ("comp") to determine complete dominance between
 each pair of predictors;

        comp=J(2**(&p-2),2**&p-1,0);

        * Xh is any subset model that does not include i and j;
        * Xh contains the columns of damat that do not involve i or j;
        Xh=J(nrow(damat),&p-2,99);
		Xhcol=0;			* index column number in Xh;
		do h=1 to &p;
		 if (i ^= h & j ^= h) then do;	* find non i,j columns;
		  Xhcol=Xhcol+1;				* update column number in Xh;
		  Xh[,Xhcol] = damat[,h];		* assign column to Xh;
		 end;
		end;

		* contrast rows (subsets) representing XiXh and XjXh;
        comprow=1;
        do r=1 to 2**&p-2;               * for each pair of rows;
         do s=r+1 to 2**&p-1;
          if Xh[r,]=Xh[s,] then do;     * if the rows of Xh are the same;
                                        * and if i and j are contrasted;
           if (damat[r,i]=1 & damat[r,j]=0 & damat[s,i]=0 &
                damat[s,j]=1) then do;
                 comp[comprow,r]=1;
                 comp[comprow,s]=-1;
                 comprow=comprow+1;
           end;
          end;

         end;   * do r loop;
        end;    * do s loop;


*** Determine the complete Dij value ***;

  cdiffij=comp*RSQ;

  zero=J(nrow(cdiffij), ncol(cdiffij),0);
  ijdom=nrow(cdiffij);

  * obtain complete dominance matrices;
     * undetermined case (all differences are zero);
     if cdiffij=zero then do;
        Dc[i,j]=0.5;
        Dc[j,i]=Dc[i,j];
     end;
     * else, check signs of difference elements;
     if cdiffij ^= zero then do;
      nonneg=0; nonpos=0;
      do k=1 to nrow(cdiffij);
       if cdiffij[k,]>=0 then nonneg=nonneg+1;
       if cdiffij[k,]<=0 then nonpos=nonpos+1;
      end;
      * dominance case;
      if nonneg=ijdom then Dc[i,j]=1; else Dc[i,j]=0;
      if nonpos=ijdom then Dc[j,i]=1; else Dc[j,i]=0;
      * undetermined case (differences have different signs);
      if (Dc[i,j]=0 & Dc[j,i]=0) then do;
        Dc[i,j]=0.5;
        Dc[j,i]=Dc[i,j];
      end;
     end;


/***************************************************/
/*****         Conditional dominance            ****/
/***************************************************/
* define matrix of constants ("avg") to determine average (within model
size) dominance between each pair of predictors;

subsets=damat;
* subsets is a matrix that consists of p+2 columns (and 
the null model in the top row): 1/0 for x1 - xp, in, Rsq;

  avg=J(&p,2**&p-1,0);
  do c=1 to &p;         * for each model size;
   num=0;               * index number of subsets per size;
   do m=1 to 2**&p-1;	* for each model, determine number of models
   						of size is c-1 to which the ith predictor makes
   						an additional contribution;
	if (subsets[m,&p+1]=c-1 & subsets[m,i]=0) then num=num+1;
   end;
   if num=0 then num=1;	* for the case of the null model (size=0);
   do r=1 to 2**&p-1;    * determine subsets to contrast;
    * consider additional contributions for model size (c);
    if (subsets[r,&p+1]=c-1 | subsets[r,&p+1]=c) then do;
     * average contribution of models that inlcude i but not j;
     if (subsets[r,i]=1 & subsets[r,j]=0) then avg[c,r]=1/num;
     * average contribution of models that inlcude j but not i;
     if (subsets[r,i]=0 & subsets[r,j]=1) then avg[c,r]=-1/num;
    end;
   end;
  end;

*** Determine the average Dij value ***;

  adiffij=avg*RSQ;

  zero=J(nrow(adiffij), ncol(adiffij),0);
  ijdom=nrow(adiffij);

  * obtain average dominance matrices;
     * undetermined case (all differences are zero);
     if adiffij=zero then do;
        Da[i,j]=0.5;
        Da[j,i]=Da[i,j];
     end;
     * else, check signs of difference elements;
     if adiffij ^= zero then do;
      nonneg=0; nonpos=0;
      do k=1 to nrow(adiffij);
       if adiffij[k,]>=0 then nonneg=nonneg+1;
       if adiffij[k,]<=0 then nonpos=nonpos+1;
      end;
      * dominance case;
      if nonneg=ijdom then Da[i,j]=1; else Da[i,j]=0;
      if nonpos=ijdom then Da[j,i]=1; else Da[j,i]=0;
      * undetermined case (differences have different signs);
      if (Da[i,j]=0 & Da[j,i]=0) then do;
        Da[i,j]=0.5;
        Da[j,i]=Da[i,j];
      end;
     end;


/***************************************************/
/*****         General dominance                ****/
/***************************************************/
* define matrix of constants ("glob") to determine global (overall average)
dominance between each pair of predictors;

  glob=J(1,2**&p-1,99);
  do g=1 to 2**&p-1;
   glob[,g]=avg[+,g]/&p;
  end;

*** Determine the global Dij value ***;

  gdiffij=glob*RSQ;
  *print 'global dominance comparisons';
  *print i j gdiffij;

  zero=J(nrow(gdiffij), ncol(gdiffij),0);
  ijdom=nrow(gdiffij);

  * obtain global dominance matrices;
     * undetermined case (all differences are zero);
     if gdiffij=zero then do;
        Dg[i,j]=0.5;
        Dg[j,i]=Dg[i,j];
     end;
     * else, check signs of difference elements;
     if gdiffij ^= zero then do;
      nonneg=0; nonpos=0;
      do k=1 to nrow(gdiffij);
       if gdiffij[k,]>=0 then nonneg=nonneg+1;
       if gdiffij[k,]<=0 then nonpos=nonpos+1;
      end;
      * dominance case;
      if nonneg=ijdom then Dg[i,j]=1; else Dg[i,j]=0;
      if nonpos=ijdom then Dg[j,i]=1; else Dg[j,i]=0;
      * undetermined case (differences have different signs);
      if (Dg[i,j]=0 & Dg[j,i]=0) then do;
        Dg[i,j]=0.5;
        Dg[j,i]=Dg[i,j];
      end;
     end;


end; *(i loop);
end; *(j loop);


**** compare current D matrices to sample D matrices ****;
if Dc=sampDc&fit then patcount[,1]=patcount[,1]+1;
if Da=sampDa&fit then patcount[,2]=patcount[,2]+1;
if Dg=sampDg&fit then patcount[,3]=patcount[,3]+1;
create patfreq from patcount;
 append from patcount;
close patfreq;


thisDc1=J(&p,&p,0); thisDc0=J(&p,&p,0); thisDcn=J(&p,&p,0); 
thisDa1=J(&p,&p,0); thisDa0=J(&p,&p,0); thisDan=J(&p,&p,0); 
thisDg1=J(&p,&p,0); thisDg0=J(&p,&p,0); thisDgn=J(&p,&p,0); 
do i=1 to &p;
 do j=1 to &p;
  if i ^=j then do;
   if Dc[i,j]=1 then thisDc1[i,j]=1;
   if Dc[i,j]=0 then thisDc0[i,j]=1;
   if Dc[i,j]=0.5 then thisDcn[i,j]=1;
   if Da[i,j]=1 then thisDa1[i,j]=1;
   if Da[i,j]=0 then thisDa0[i,j]=1;
   if Da[i,j]=0.5 then thisDan[i,j]=1;
   if Dg[i,j]=1 then thisDg1[i,j]=1;
   if Dg[i,j]=0 then thisDg0[i,j]=1;
   if Dg[i,j]=0.5 then thisDgn[i,j]=1;
  end;
 end;
end;

**** add current D matrices to previous D matrices ****;
fDc1&fit = fDc1&fit+thisDc1; 
fDa1&fit = fDa1&fit+thisDa1; fDg1&fit = fDg1&fit+thisDg1;
fDc0&fit = fDc0&fit+thisDc0; 
fDa0&fit = fDa0&fit+thisDa0; fDg0&fit = fDg0&fit+thisDg0;
fDcn&fit = fDcn&fit+thisDcn; 
fDan&fit = fDan&fit+thisDan; fDgn&fit = fDgn&fit+thisDgn;

create sumDc1&fit from fDc1&fit;
 append from fDc1&fit;
close sumDc1&fit;
create sumDc0&fit from fDc0&fit;
 append from fDc0&fit;
close sumDc0&fit;
create sumDcn&fit from fDcn&fit;
 append from fDcn&fit;
close sumDcn&fit;

create sumDa1&fit from fDa1&fit;
 append from fDa1&fit;
close sumDa1&fit;
create sumDa0&fit from fDa0&fit;
 append from fDa0&fit;
close sumDa0&fit;
create sumDan&fit from fDan&fit;
 append from fDan&fit;
close sumDan&fit;

create sumDg1&fit from fDg1&fit;
 append from fDg1&fit;
close sumDg1&fit;
create sumDg0&fit from fDg0&fit;
 append from fDg0&fit;
close sumDg0&fit;
create sumDgn&fit from fDgn&fit;
 append from fDgn&fit;
close sumDgn&fit;

*at last run create summary tables containing the following columns:
i, j, f=1, f=0, f=0.5 pij pji;
if &r=&B then do;
 row=1;
 summary_c=J(&p*(&p-1), 8, -9);
 summary_a=J(&p*(&p-1), 8, -9);
 summary_g=J(&p*(&p-1), 8, -9);
 do i=1 to &p;
  do j=1 to &p;
   if i^=j then do;
    summary_c[row,1]=i; summary_c[row,2]=j; 
	summary_c[row,3]=fDc1&fit[i,j]; 
	summary_c[row,4]=fDc0&fit[i,j]; summary_c[row,5]=fDcn&fit[i,j]; 
	summary_c[row,6]=fDc1&fit[i,j]/&B; summary_c[row,7]=fDc0&fit[i,j]/&B; 
    summary_c[row,8]=fDcn&fit[i,j]/&B;   
    summary_a[row,1]=i; summary_a[row,2]=j; 
	summary_a[row,3]=fDa1&fit[i,j]; 
	summary_a[row,4]=fDa0&fit[i,j]; summary_a[row,5]=fDan&fit[i,j];  
	summary_a[row,6]=fDa1&fit[i,j]/&B; summary_a[row,7]=fDa0&fit[i,j]/&B;  
    summary_a[row,8]=fDan&fit[i,j]/&B;   
    summary_g[row,1]=i; summary_g[row,2]=j; 
	summary_g[row,3]=fDg1&fit[i,j]; 
	summary_g[row,4]=fDg0&fit[i,j]; summary_g[row,5]=fDgn&fit[i,j]; 
	summary_g[row,6]=fDg1&fit[i,j]/&B; summary_g[row,7]=fDg0&fit[i,j]/&B;  
    summary_g[row,8]=fDgn&fit[i,j]/&B;   
    row=row+1;
   end;
  end;
 end;
 create csummary&fit from summary_c[colname={i j f1 f0 fn Pij Pji pno}];
  append from summary_c;
 close csummary&fit;
 create asummary&fit from summary_a[colname={i j f1 f0 fn Pij Pji pno}];
  append from summary_a;
 close asummary&fit;
 create gsummary&fit from summary_g[colname={i j f1 f0 fn Pij Pji pno}];
  append from summary_g;
 close gsummary&fit;
 create patsum from patcount[colname={complete conditional general}];
  append from patcount;
 close patsum;
end;

finish;
run; quit;


%mend bootda;

* Obtain DA for the bootstrap data set;
%macro uvboot;
%do k = 1 %to &q;
  %bootda(uvboot&k, rsqy&k);
%end;
%mend;
%uvboot;

%bootda(mvrboot, mvrsq);
%bootda(mvpboot, mvpsq);

%end; ***** end the 1 to B loop ****;


%macro summary(fit);
/*************************************************************************/
/* 		PART 3:  Probabilities and reproducibilities					 */
/*************************************************************************/
* input:  nine p by p matrices of COUNTS;
* stpes:  convert to probabilities, compute test statistics / CIs, plots;
* output:  tables;
data average&fit;
 set asummary&fit;
 dominance='conditional';
 pair=i*10+j;
run; 
data complete&fit;
 set csummary&fit;
 dominance='complete';
 pair=i*10+j;
run; 
data global&fit;
 set gsummary&fit;
 dominance='general';
 pair=i*10+j;
run; 

data merged&fit;
 set complete&fit average&fit global&fit;
run;
/* proc print data=merged; title 'results'; run; */
data results&fit; set merged&fit;
 array f[3] f1 f0 fn;
  do domtype=1 to 3;
   domfreq = f[domtype];
   output;
  end;
run;
data results&fit; set results&fit;
  if domtype=1 then Dij=1;
  if domtype=2 then Dij=0;
  if domtype=3 then Dij=0.5;
run;
proc sort data=results&fit; by dominance pair; run;
proc means data=results&fit noprint;
 var Dij; freq domfreq;
 by dominance pair;
 output out=meansout&fit mean=Dbar STD=SE;
run;


data samplea&fit;
 set Sample_da&fit; pair=i*10+j; dominance='conditional';
run;
data samplec&fit;
 set Sample_dc&fit; pair=i*10+j; dominance='complete';
run;
data sampleg&fit;
 set Sample_dg&fit; pair=i*10+j; dominance='general';
run;
data sample&fit;
 set samplec&fit samplea&fit sampleg&fit;
run;


******* SUMMARY OUTPUT *******;
* variables: dominance-type, i, j, p(iDj), p(jDi), p(noD), Dbar, SE, 
sample-value, pattern-prob;
*** merge sample, results and meansout data sets by dominance and pair;
proc sort data=sample&fit; by dominance pair; run;
data results&fit; set results&fit; if domtype=1; run;
proc sort data=results&fit; by dominance pair; run;
proc sort data=meansout&fit; by dominance pair; run;
data printout&fit;
 merge results&fit sample&fit meansout&fit;
  by dominance pair;
run;

data table&fit;
 set printout&fit(drop=domtype domfreq _TYPE_ _FREQ_);
/* i_dom_j=pij; j_dom_i=pji; no_dom=pno; */
 Pijno=pno;
 Dij_mean = dbar;
 Dij_se = se;
 if dij=1 then reprod = pij;
 if dij=0 then reprod = pji;
 if dij=0.5 then reprod = pijno;
 format Dij_se F6.3;
run;
data table&fit; set table&fit(drop = f1 f0 fn pno dbar); run;
proc sort; by dominance pair; run;
proc print data=table&fit; 
 title "summary of results using &fit: all pairs"; 
 title2 'Pij, Pji, Pijno are dominance probabilities and';
 title3 'reprod are reproducibility values';
 var dominance i j Dij Dij_mean dij_SE Pij Pji Pijno reprod;
run;
data plots&fit; set table&fit;
 if Pij ge Pji;
 format Dij_se F6.3;
run;
proc print data=plots&fit; 
 title "summary of results using &fit: pair arranged by dominant predictor"; 
 title2 'Pij, Pji, Pijno are dominance probabilities and';
 title3 'reprod are reproducibility values';
 var dominance i j Dij Dij_mean dij_SE Pij Pji Pijno reprod;
run;

%mend summary;

* obtain summary for each DA (i.e., using each fit statistic);
%macro uvsummary;
%do k = 1 %to &q;
  %summary(rsqy&k);
%end;
%mend;
%uvsummary;

%summary(mvrsq);
%summary(mvpsq);


%mend dom;
%dom;


/***********************************************************
   The following macros expand a variable list containing
   hyphens into a list specifying each individual variable. 
***********************************************************/

%MACRO EXPAND;
  %LET LNGTH = %LENGTH(&INDEP);  %LET TEMP=;
  %DO _INDEX_ = 1 %TO &LNGTH;
    %LET ITEM = %SCAN(&INDEP,&_INDEX_,%QUOTE( ));
    %IF %LENGTH(&ITEM) EQ 0 %THEN %GOTO DONE;
    %IF %INDEX(&ITEM,-) > 0 %THEN %EXPANDED;
    %LET TEMP = &TEMP &ITEM;
  %END;  %DONE:  %LET INDEP = &TEMP;
%MEND EXPAND;

%MACRO EXPANDED;
  %LET DASH = %INDEX(&ITEM,-);
  %DO I = %EVAL(&DASH-1) %TO 1 %BY -1;
    %LET ALPHANUM = %SUBSTR(&ITEM,&I,1);
    %DO II = 0 %TO 9;  %IF &ALPHANUM EQ &II %THEN %GOTO FOUND;  %END;
    %GOTO DONE;
  %FOUND:  %END;
  %DONE:  %LET PREFX = %SUBSTR(&ITEM,1,&I);
  %LET LOWER = %SUBSTR(&ITEM,%EVAL(&I+1),%EVAL(&DASH-&I-1));
  %LET UPPER = %SUBSTR(&ITEM,%EVAL(&DASH+&I+1));
  %LET ITEM=; %DO II = &LOWER %TO &UPPER; %LET ITEM = &ITEM &PREFX.&II;
  %END;
%MEND EXPANDED;