/****************************************************************************** DOMINANCE PROBABILITY MACRO This macro executes dominance analysis as described by Razia Azen & David Budescu in Psychological Methods, 2003. Dominance analysis quantifies the importance of each predictor as its average increment to the model r-squared, across all possible submodel sizes. This macro was written by Razia Azen with valuable contributions from Robert Ceurvorst. NOTE: This program is limited to at most 10 predictors! ------------- INSTRUCTIONS: ------------- 1. In this macro, change the %macro dom line (which appears just below these instructions) according to the following directions: %macro dom (data=_last_, dep=Y, indep='list of predictors', p=n_of_predictors, noprint=0, short=0, B=n_of_bootstraps, predtype='type_of_predictors', seed=random_number); If defaults are used, then only p= OR indep= is necessary, e.g., "%dom(p=4)" will use the last data set created and operate on variables Y and X1-X4. Either p= OR indep= is required -- not both. If both are specified, p is determined by counting the variables in the indep= list. data= SAS dataset to be used. Default is last dataset created. dep= Name of dependent variable. Default is Y. indep= List of predictors in quotes. OR p= No. of predictors IF and ONLY IF they are named X1-Xp, in which case indep= is not required. noprint=1 Suppresses printing of the input dataset. short=1 Suppresses listing of each predictor's contributions to individual submodels. B= The number of bootstrap runs. Default is 1000. predtype= 'f' if the predictors are fixed; 'r' if the predictors are random. Default is 'r'. seed= Six digits. Default is 0 (uses clock value). For example, with 4 predictors named X1, X2, X3, X4 and all default values: (data=_last_, dep=Y, p=4, indep=, noprint=0, short=0, B=1000, predtype='r', seed=0); Or, with 4 predictors named A, B, C, D and all default values: (data=_last_, dep=Y, p=, indep='A B C D', noprint=0, short=0, B=1000, predtype='r', seed=0); 2. Save this macro, and add the following two lines (below) to the SAS program in which you've read the data set to be analyzed: %include 'k:\sasmacro\uniDAmacro.sas'; *** CHANGE TO PATH WHERE MACRO IS SAVED ***; %dom; 3. Run the program edited in step 2. -- END OF INSTRUCTIONS -- ******************************************************************************/ option nosource; /********** CHANGE ONLY THE LINE BELOW (See step 1 above)!!! **************/ %macro dom (data=_last_, dep=Y, p=, indep=' ', noprint=0, short=0, B=1000, predtype='r', seed=0); /**************************************************************************/ %if &indep= and &p= %then %do; %put YOU MUST SPECIFY INDEP=list of predictors OR P=no. of predictors (IF THEY ARE NAMED X1-Xp).; %goto done; %end; %else %if &indep ne %then %do; %if %index(&indep,%str(%'))>0 or %index(&indep,%str(%"))>0 %then %let indep = %substr(&indep,2,%length(&indep)-2); %if %index(&indep,-) > 0 %then %expand; %let p=1; %do %while(%length(%scan(&indep,&p))>0); %let x&p = %scan(&indep,&p); %let p = %eval(&p+1); %end; %let p = %eval(&p-1); %end; %else %if &p > 0 %then %do; %let indep=; %do i=1 %to &p; %let indep=&indep x&i; %let x&i=x&i; %end; %end; %if &p>10 %then %do; %put THE MAXIMUM NUMBER OF PREDICTORS IS 10. YOU HAVE &P..; %goto done; %end; %LET DATANAME=&SYSLAST; option nonotes; DATA _NULL_; FILE PRINT LINESLEFT=WITH; CALL SYMPUT('WITH',WITH); DATA _NULL_; FILE PRINT NOTITLES LINESLEFT=WITHOUT; CALL SYMPUT ('MTITLE',TRIM(LEFT(WITHOUT-&WITH+2))); CALL SYMPUT('NMODELS', 2**&p -1); RUN; data original; set &data; %if &noprint=0 %then %do; proc print; title&mtitle 'Input data set (check to make sure it is correct)'; run; %end; data labels; set; keep &indep; length dvlabel $40; call label(&dep,dvlabel); call symput('dvl',trim(dvlabel)); if _n_=1 then stop; run; /********************************************************* PART 1: regression/DA of the original data *********************************************************/ title 'regression results for original data set'; option notes; proc reg corr data=original outest=onereg (keep=_in_ _rsq_ &indep); model &dep=&indep / stb pcorr2 scorr2 ; model &dep=&indep / selection=adjrsq best=&nmodels %if &short=1 %then noprint;; run; quit; option nonotes; * order all subset models in lexicographical order; data modelmat; set onereg; %if %substr(&sysver,1,1)>7 %then if _n_>1;; %do i=1 %to &p; &&x&i=(&&x&i>.); %end; if _IN_=. then delete; keep &indep _IN_ _RSQ_; proc sort; by _IN_ %do i=1 %to &p; descending &&x&i %end;; run; proc print; title 'R-squared values for all subset models'; run; /*** %if &short=0 %then %do; proc print; id _IN_ _RSQ_; format _RSQ_ 5.4; run; %end; ***/ /********************************************** DOMINANCE ANALYSIS TABLE **********************************************/ option notes; title; proc IML; reset noprint; start; * read the subset models matrix into DOM; use modelmat; read all into DOM; close modelmat; * dom contains, for each subset model, 1/0 values for the predictors, the number of predictors in (_IN_), and the model r2 (_RSQ_); p=ncol(dom)-2; ncol=ncol(dom); * dom is rearranged to contain, for each subset model, the number of predictors in and the r2 of the model, followed by 1/0 values for predictors in/out; dom1=dom[,1:p]; dom2=dom[,(p+1):ncol]; dom=dom2||dom1; free dom1 dom2; * generate table of additional contributions; null=J(1,ncol,0); dom=null//dom; nrow=nrow(dom); * dom now contains a top row of zeros for the null model; full=J(1,p,99); fullrsq=J(1,1,99); reduced=J(1,p,99); redrsq=J(1,1,99); contrib=J(nrow,p,0); * additional contributions matrix; do i=1 to nrow; * for each model; do j=1 to p; * for each predictor; if dom(|i,j+2|)=0 then do; * if predictor is not in subset model; reduced=dom[i,3:ncol]; * the 1/0 row represents the reduced model; full=reduced; * the full model is same as reduced model; do k=1 to p; if k=j then full(|1,k|)=1; * add the jth predictor to the full model; end; do r=1 to nrow; * for each model; comp=dom[r,3:ncol]; * comp is the 1/0 row of dom; if comp=full then fullrsq=dom[r,2]; * r2 of row is fullrsq, or; if comp=reduced then redrsq=dom[r,2]; * r2 of row is redrsq; end; contrib(|i,j|)=fullrsq-redrsq; * contrib is r2 difference; end; else do; contrib(|i,j|)=.; * if predictor is in model, contib is .; end; end; end; contrib=dom||contrib; contrib=contrib[1:nrow-1,]; cols = {IN RSQ %do i=1 %to &p; &&X&i %end; %do i=1 %to &p; CP&i %end;}; create rsqtable from contrib[colname=cols]; append from contrib; close rsqtable; finish; run; quit; option nonotes; %if &short=0 %then %do; proc print data=rsqtable; id IN RSQ; format RSQ CP1-CP&p 5.4; title&mtitle 'Dominance Table: Additional Contributions of Predictors Across All Subset Regression Models'; %UNQUOTE(TITLE%EVAL(&MTITLE+1)) 'CPi indicates the additional contribution of predictor i to the model r-square'; %end; proc summary nway; var CP1--CP&p; class in; output out=avgcont (drop=_type_) mean=&indep; run; proc means noprint; var &indep; output out=meanc mean=; run; data avgcont; set labels meanc (in=y) avgcont; if y then in=999; proc print double; id IN; var &indep; format &indep 5.4 IN %if &p<11 %then 1.0; %else 2.0;; title&mtitle 'Dominance Analysis: Overall Average Contributions of Predictors (First Row)'; %UNQUOTE(TITLE%EVAL(&MTITLE+1)) 'And Average Contributions to Models of Each Size (Remaining Rows)'; run; proc transpose prefix=size out=meanc (rename=(SIZE999=OVERALL _NAME_=VAR)); id IN; var &indep; run; proc sort; by descending overall; title&mtitle 'Dominance Analysis: Average Predictor Contributions Overall and to Models of Each Size'; proc print; id _character_; format _numeric_ 5.4; run; TITLE&MTITLE; %DONE: option notes _last_=&syslast; run; /********************************************** DOMINANCE MATRICES (Sample) **********************************************/ Proc IML; start; use modelmat; read all into damat; close modelmat; Dcsample=J(&p,&p,0); Dasample=J(&p,&p,0); Dgsample=J(&p,&p,0); RSQ=damat[,ncol(damat)]; /***************************************************/ /***** Complete dominance ****/ /***************************************************/ do i=1 to &p-1; do j=i+1 to &p; * define matrix of constants ("comp") to determine complete dominance between each pair of predictors; comp=J(2**(&p-2),2**&p-1,0); * Xh is any subset model that does not include i and j; * Xh contains the columns of damat that do not involve i or j; Xh=J(nrow(damat),&p-2,99); Xhcol=0; * index column number in Xh; do h=1 to &p; if (i ^= h & j ^= h) then do; * find non i,j columns; Xhcol=Xhcol+1; * update column number in Xh; Xh[,Xhcol] = damat[,h]; * assign column to Xh; end; end; * contrast rows (subsets) representing XiXh and XjXh; comprow=1; do r=1 to 2**&p-2; * for each pair of rows; do s=r+1 to 2**&p-1; if Xh[r,]=Xh[s,] then do; * if the rows of Xh are the same; * and if i and j are contrasted; if (damat[r,i]=1 & damat[r,j]=0 & damat[s,i]=0 & damat[s,j]=1) then do; comp[comprow,r]=1; comp[comprow,s]=-1; comprow=comprow+1; end; end; end; * do r loop; end; * do s loop; *** Determine the complete Dij value ***; cdiffij=comp*RSQ; zero=J(nrow(cdiffij), ncol(cdiffij),0); ijdom=nrow(cdiffij); * obtain complete dominance matrices; * undetermined case (all differences are zero); if cdiffij=zero then do; Dcsample[i,j]=0.5; Dcsample[j,i]=Dcsample[i,j]; end; * else, check signs of difference elements; if cdiffij ^= zero then do; nonneg=0; nonpos=0; do k=1 to nrow(cdiffij); if cdiffij[k,]>=0 then nonneg=nonneg+1; if cdiffij[k,]<=0 then nonpos=nonpos+1; end; * dominance case; if nonneg=ijdom then Dcsample[i,j]=1; else Dcsample[i,j]=0; if nonpos=ijdom then Dcsample[j,i]=1; else Dcsample[j,i]=0; * undetermined case (differences have different signs); if (Dcsample[i,j]=0 & Dcsample[j,i]=0) then do; Dcsample[i,j]=0.5; Dcsample[j,i]=Dcsample[i,j]; end; end; /***************************************************/ /***** Conditional dominance ****/ /***************************************************/ * define matrix of constants ("avg") to determine average (within model size) dominance between each pair of predictors; subsets=damat; * subsets is a matrix that consists of p+2 columns (and the null model in the top row): 1/0 for x1 - xp, in, Rsq; avg=J(&p,2**&p-1,0); do c=1 to &p; * for each model size; num=0; * index number of subsets per size; do m=1 to 2**&p-1; * for each model, determine number of models of size is c-1 to which the ith predictor makes an additional contribution; if (subsets[m,&p+1]=c-1 & subsets[m,i]=0) then num=num+1; end; if num=0 then num=1; * for the case of the null model (size=0); do r=1 to 2**&p-1; * determine subsets to contrast; * consider additional contributions for model size (c); if (subsets[r,&p+1]=c-1 | subsets[r,&p+1]=c) then do; * average contribution of models that inlcude i but not j; if (subsets[r,i]=1 & subsets[r,j]=0) then avg[c,r]=1/num; * average contribution of models that inlcude j but not i; if (subsets[r,i]=0 & subsets[r,j]=1) then avg[c,r]=-1/num; end; end; end; *** Determine the average Dij value ***; adiffij=avg*RSQ; zero=J(nrow(adiffij), ncol(adiffij),0); ijdom=nrow(adiffij); * obtain average dominance matrices; * undetermined case (all differences are zero); if adiffij=zero then do; Dasample[i,j]=0.5; Dasample[j,i]=Dasample[i,j]; end; * else, check signs of difference elements; if adiffij ^= zero then do; nonneg=0; nonpos=0; do k=1 to nrow(adiffij); if adiffij[k,]>=0 then nonneg=nonneg+1; if adiffij[k,]<=0 then nonpos=nonpos+1; end; * dominance case; if nonneg=ijdom then Dasample[i,j]=1; else Dasample[i,j]=0; if nonpos=ijdom then Dasample[j,i]=1; else Dasample[j,i]=0; * undetermined case (differences have different signs); if (Dasample[i,j]=0 & Dasample[j,i]=0) then do; Dasample[i,j]=0.5; Dasample[j,i]=Dasample[i,j]; end; end; /***************************************************/ /***** General dominance ****/ /***************************************************/ * define matrix of constants ("glob") to determine global (overall average) dominance between each pair of predictors; glob=J(1,2**&p-1,99); do g=1 to 2**&p-1; glob[,g]=avg[+,g]/&p; end; *** Determine the global Dij value ***; gdiffij=glob*RSQ; zero=J(nrow(gdiffij), ncol(gdiffij),0); ijdom=nrow(gdiffij); * obtain global dominance matrices; * undetermined case (all differences are zero); if gdiffij=zero then do; Dgsample[i,j]=0.5; Dgsample[j,i]=Dgsample[i,j]; end; * else, check signs of difference elements; if gdiffij ^= zero then do; nonneg=0; nonpos=0; do k=1 to nrow(gdiffij); if gdiffij[k,]>=0 then nonneg=nonneg+1; if gdiffij[k,]<=0 then nonpos=nonpos+1; end; * dominance case; if nonneg=ijdom then Dgsample[i,j]=1; else Dgsample[i,j]=0; if nonpos=ijdom then Dgsample[j,i]=1; else Dgsample[j,i]=0; * undetermined case (differences have different signs); if (Dgsample[i,j]=0 & Dgsample[j,i]=0) then do; Dgsample[i,j]=0.5; Dgsample[j,i]=Dgsample[i,j]; end; end; end; *(i loop); end; *(j loop); create Dcsample from Dcsample; append from Dcsample; close Dcsample; create Dasample from Dasample; append from Dasample; close Dasample; create Dgsample from Dgsample; append from Dgsample; close Dgsample; * create a file with sample dominance values; * variables: i, j, cell value; Sample_dc = J(&p*(&p-1), 3, 0); Sample_da = J(&p*(&p-1), 3, 0); Sample_dg = J(&p*(&p-1), 3, 0); row=0; do i=1 to nrow(Dcsample); do j=1 to ncol(Dcsample); if i ^= j then do; row=row+1; Sample_dc[row,1]=i; Sample_dc[row,2]=j; Sample_dc[row,3]=Dcsample[i,j]; Sample_da[row,1]=i; Sample_da[row,2]=j; Sample_da[row,3]=Dasample[i,j]; Sample_dg[row,1]=i; Sample_dg[row,2]=j; Sample_dg[row,3]=Dgsample[i,j]; end; end; end; create sample_dc from Sample_dc[colname={i j dij}]; append from Sample_dc; close sample_dc; create sample_da from Sample_da[colname={i j dij}]; append from Sample_da; close sample_da; create sample_dg from Sample_dg[colname={i j dij}]; append from Sample_dg; close sample_dg; finish; run; /************************************************************************* * PART 2: Bootstrapping Loop **************************************************************************/ * input: data set containing Y, data set containing X; data depvar; set original; keep &dep; run; data indepvar; set original; keep &indep; run; * steps: 1. resample, 2. compute r-squared vector, 3. repeat B times; option ls=70 nonotes nosource nosource2; proc IML; reset noprint; * nolog; start; *A. Generate a vector of B seeds (seedvec); /***********************************************************************/ /* The vector seedvec contains B random seeds from a uniform */ /* distribution, generated using the initial value (seed) specified */ /* above. The vector is then saved to an external file, seeds, */ /* which is subsequently read into the SAS dataset (seeds). */ /***********************************************************************/ seedvec=J(&B,1,0); do i=1 to &B; seedvec(|i,|) = uniform(&seed)*100000; end; create seeds from seedvec; append from seedvec; close seeds; finish; run; quit; /*====================================================================*/ /* start 1 to B loop */ /*====================================================================*/ %do r = 1 %to &B; proc IML; reset noprint; * nolog; start; * obtain Aij and Dij for each ij pair; * read in the original data set (into DATA); use depvar; read all into YData; close depvar; use indepvar; read all into XData; close indepvar; DATA = YData || XData; Y = YData; X = XData; * read in the seeds vector (into seedvec); use seeds; read all into seedvec; close seeds; n=nrow(DATA); r=ncol(DATA); *** check input; if &r=1 then do; print 'Check that the data were read properly!!!'; first5Y = J(1,5,0); first5Y=Y[1:5,]; first5X = J(&p,5,0); first5X=X[1:5,]; print 'The number of observations used is:' n; print 'The total number of variables used is:' r; print 'The dependent values for the first five cases are:' first5Y; print 'The predictor values for the first five cases are:' first5X; print 'You requested' &B 'bootstrap samples'; print 'The bootstrap procedure is now running... PLEASE WAIT...'; end; /***********************************************************************/ /* For bootstrapping run i, the i-th entry of the vector seedvec */ /* (resample) is used to generate a vector (integers) whose entries */ /* consist of the integers 1 to n sampled randomly, with replacement, */ /* from a uniform distribution. */ /***********************************************************************/ integers=J(n,1,0); * Specify seed for resampling (resample); resample=J(1,1,0); resample=seedvec(|&r,|); * generate a vector (integers) containing the integers 1 to n sampled randomly, with replacement, from a uniform distribution whose initial seed is resample; Do i=1 to n; integers(|i|)=int(uniform(resample)*(n-1)+1); end; *print &r integers; %if &predtype='r' %then %do; /*=====================================================================*/ /*** PAIR RESAMPLING BOOTSTRAP ***/ /*=====================================================================*/ /***********************************************************************/ /* The i-th entry in integers is an integer (j) between 1 and n which */ /* indicates the entry (row) number of the data matrix to be taken */ /* as the i-th entry (row) in the matrix obsboot. This is repeated */ /* for i = 1 to n. */ /***********************************************************************/ * obsboot is a matrix of randomly sampled observations; obsboot=J(n,r,0); Do i=1 to n; j=integers(|i, |); obsboot(|i, |)=DATA(|j, |); end; * convert bootstrapped data to external file; ***************************************************; create bootdata from obsboot[colname={&dep &indep}]; append from obsboot; close bootdata; %end; /*===================== END of Pair Resampling =======================*/ %if &predtype='f' %then %do; /*=====================================================================*/ /*** RESIDUAL RESAMPLING BOOTSTRAP ***/ /*=====================================================================*/ /***********************************************************************/ /* From the matrix DATA, whose first column represents Y and the */ /* remaining columns represent X (as specified above), the vector of */ /* Least-squares predicted values, YHat, and the vector of the */ /* Least Squares residuals, Res, are computed. */ /***********************************************************************/ * fit a model with an intercept to obtain YHat and Res; ONE=J(n,1,1); X1=ONE||X; YHat=X1*INV(X1`*X1)*X1`*Y; Res=YHat-Y; /***********************************************************************/ /* The i-th entry in integers is an integer between 1 and n (j) which */ /* indicates the entry (row) number of the residuals vector, Res, */ /* to be added to the i-th predicted value, Y-Hat. This is repeated */ /* for i = 1 to n. The vector Yboot consists of the initial predicted */ /* values (YHat) with the randomly selected residuals added. The */ /* matrix Fixdata consists of Yboot and the original X matrix. */ /***********************************************************************/ * Resboot is a vector of randomly sampled residuals; Resboot=J(n,1,0); Do i=1 to n; j=integers(|i, |); Resboot(|i, |)=Res(|j, |); end; * add random residuals to Y-hat; Yboot=YHat+Resboot; * an n by r matrix of bootstrapped Y and fixed Xs; Fixdata=Yboot||X; * convert bootstrapped data to external file; ***************************************************; create bootdata from Fixdata[colname={&dep &indep}]; append from Fixdata; close bootdata; %end; /*=================== END of Residual Resampling =====================*/ finish; run; quit; * produce bootstrap data sets; /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ /* Computing Dij */ /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ * regression of bootstrapped data; ***********************************; data resample; set bootdata; run; proc reg corr data=resample noprint outest=regfile (keep=_in_ _rsq_ &indep); model &dep=&indep / selection=adjrsq; run; quit; option nonotes; * order all subset models in lexicographical order; data bootmat; set regfile; *%if %substr(&sysver,1,1)>7 %then if _n_>1;; %do i=1 %to &p; &&x&i=(&&x&i>.); %end; if _IN_=. then delete; keep &indep _IN_ _RSQ_; run; proc sort; by _IN_ %do i=1 %to &p; descending &&x&i %end;; run; * obtain Aij and Dij for each ij pair; proc IML; reset noprint; * nolog; start; /************************************************************************* * COMPUTE DOMINANCE MATRICES (resample) **************************************************************************/ use bootmat; read all into damat; close bootmat; * damat is a matrix that consists of p+2 columns: 1/0 for x1 - xp, in, Rsq; use Dcsample; read all into sampleDc; close Dcsample; use Dasample; read all into sampleDa; close Dasample; use Dgsample; read all into sampleDg; close Dgsample; * at first run, initialize frequency matrices and sample pattern counter; if &r =1 then do; freqDc1=J(&p, &p, 0); freqDc0=J(&p, &p, 0); freqDcn=J(&p, &p, 0); freqDa1=J(&p, &p, 0); freqDa0=J(&p, &p, 0); freqDan=J(&p, &p, 0); freqDg1=J(&p, &p, 0); freqDg0=J(&p, &p, 0); freqDgn=J(&p, &p, 0); patcount=J(1,3,0); *columns correspond to dominance types; end; * at subsequent runs retrieve files as sum matrices; if &r ^= 1 then do; use sumDc1; read all into freqDc1; close sumDc1; use sumDc0; read all into freqDc0; close sumDc0; use sumDcn; read all into freqDcn; close sumDcn; use sumDa1; read all into freqDa1; close sumDa1; use sumDa0; read all into freqDa0; close sumDa0; use sumDan; read all into freqDan; close sumDan; use sumDg1; read all into freqDg1; close sumDg1; use sumDg0; read all into freqDg0; close sumDg0; use sumDgn; read all into freqDgn; close sumDgn; use patfreq; read all into patcount; close patfreq; end; * r-squared vector is the last column of damat (without null model); RSQ=damat[,ncol(damat)]; *print &r RSQ; Dc=J(&p,&p,0); Da=J(&p,&p,0); Dg=J(&p,&p,0); /***************************************************/ /***** Complete dominance ****/ /***************************************************/ do i=1 to &p-1; do j=i+1 to &p; * define matrix of constants ("comp") to determine complete dominance between each pair of predictors; comp=J(2**(&p-2),2**&p-1,0); * Xh is any subset model that does not include i and j; * Xh contains the columns of damat that do not involve i or j; Xh=J(nrow(damat),&p-2,99); Xhcol=0; * index column number in Xh; do h=1 to &p; if (i ^= h & j ^= h) then do; * find non i,j columns; Xhcol=Xhcol+1; * update column number in Xh; Xh[,Xhcol] = damat[,h]; * assign column to Xh; end; end; * contrast rows (subsets) representing XiXh and XjXh; comprow=1; do r=1 to 2**&p-2; * for each pair of rows; do s=r+1 to 2**&p-1; if Xh[r,]=Xh[s,] then do; * if the rows of Xh are the same; * and if i and j are contrasted; if (damat[r,i]=1 & damat[r,j]=0 & damat[s,i]=0 & damat[s,j]=1) then do; comp[comprow,r]=1; comp[comprow,s]=-1; comprow=comprow+1; end; end; end; * do r loop; end; * do s loop; *** Determine the complete Dij value ***; cdiffij=comp*RSQ; zero=J(nrow(cdiffij), ncol(cdiffij),0); ijdom=nrow(cdiffij); * obtain complete dominance matrices; * undetermined case (all differences are zero); if cdiffij=zero then do; Dc[i,j]=0.5; Dc[j,i]=Dc[i,j]; end; * else, check signs of difference elements; if cdiffij ^= zero then do; nonneg=0; nonpos=0; do k=1 to nrow(cdiffij); if cdiffij[k,]>=0 then nonneg=nonneg+1; if cdiffij[k,]<=0 then nonpos=nonpos+1; end; * dominance case; if nonneg=ijdom then Dc[i,j]=1; else Dc[i,j]=0; if nonpos=ijdom then Dc[j,i]=1; else Dc[j,i]=0; * undetermined case (differences have different signs); if (Dc[i,j]=0 & Dc[j,i]=0) then do; Dc[i,j]=0.5; Dc[j,i]=Dc[i,j]; end; end; /***************************************************/ /***** Conditional dominance ****/ /***************************************************/ * define matrix of constants ("avg") to determine average (within model size) dominance between each pair of predictors; subsets=damat; * subsets is a matrix that consists of p+2 columns (and the null model in the top row): 1/0 for x1 - xp, in, Rsq; avg=J(&p,2**&p-1,0); do c=1 to &p; * for each model size; num=0; * index number of subsets per size; do m=1 to 2**&p-1; * for each model, determine number of models of size is c-1 to which the ith predictor makes an additional contribution; if (subsets[m,&p+1]=c-1 & subsets[m,i]=0) then num=num+1; end; if num=0 then num=1; * for the case of the null model (size=0); do r=1 to 2**&p-1; * determine subsets to contrast; * consider additional contributions for model size (c); if (subsets[r,&p+1]=c-1 | subsets[r,&p+1]=c) then do; * average contribution of models that inlcude i but not j; if (subsets[r,i]=1 & subsets[r,j]=0) then avg[c,r]=1/num; * average contribution of models that inlcude j but not i; if (subsets[r,i]=0 & subsets[r,j]=1) then avg[c,r]=-1/num; end; end; end; *** Determine the average Dij value ***; adiffij=avg*RSQ; zero=J(nrow(adiffij), ncol(adiffij),0); ijdom=nrow(adiffij); * obtain average dominance matrices; * undetermined case (all differences are zero); if adiffij=zero then do; Da[i,j]=0.5; Da[j,i]=Da[i,j]; end; * else, check signs of difference elements; if adiffij ^= zero then do; nonneg=0; nonpos=0; do k=1 to nrow(adiffij); if adiffij[k,]>=0 then nonneg=nonneg+1; if adiffij[k,]<=0 then nonpos=nonpos+1; end; * dominance case; if nonneg=ijdom then Da[i,j]=1; else Da[i,j]=0; if nonpos=ijdom then Da[j,i]=1; else Da[j,i]=0; * undetermined case (differences have different signs); if (Da[i,j]=0 & Da[j,i]=0) then do; Da[i,j]=0.5; Da[j,i]=Da[i,j]; end; end; /***************************************************/ /***** General dominance ****/ /***************************************************/ * define matrix of constants ("glob") to determine global (overall average) dominance between each pair of predictors; glob=J(1,2**&p-1,99); do g=1 to 2**&p-1; glob[,g]=avg[+,g]/&p; end; *** Determine the global Dij value ***; gdiffij=glob*RSQ; *print 'global dominance comparisons'; *print i j gdiffij; zero=J(nrow(gdiffij), ncol(gdiffij),0); ijdom=nrow(gdiffij); * obtain global dominance matrices; * undetermined case (all differences are zero); if gdiffij=zero then do; Dg[i,j]=0.5; Dg[j,i]=Dg[i,j]; end; * else, check signs of difference elements; if gdiffij ^= zero then do; nonneg=0; nonpos=0; do k=1 to nrow(gdiffij); if gdiffij[k,]>=0 then nonneg=nonneg+1; if gdiffij[k,]<=0 then nonpos=nonpos+1; end; * dominance case; if nonneg=ijdom then Dg[i,j]=1; else Dg[i,j]=0; if nonpos=ijdom then Dg[j,i]=1; else Dg[j,i]=0; * undetermined case (differences have different signs); if (Dg[i,j]=0 & Dg[j,i]=0) then do; Dg[i,j]=0.5; Dg[j,i]=Dg[i,j]; end; end; end; *(i loop); end; *(j loop); **** compare current D matrices to sample D matrices ****; if Dc=sampleDc then patcount[,1]=patcount[,1]+1; if Da=sampleDa then patcount[,2]=patcount[,2]+1; if Dg=sampleDg then patcount[,3]=patcount[,3]+1; create patfreq from patcount; append from patcount; close patfreq; thisDc1=J(&p,&p,0); thisDc0=J(&p,&p,0); thisDcn=J(&p,&p,0); thisDa1=J(&p,&p,0); thisDa0=J(&p,&p,0); thisDan=J(&p,&p,0); thisDg1=J(&p,&p,0); thisDg0=J(&p,&p,0); thisDgn=J(&p,&p,0); do i=1 to &p; do j=1 to &p; if i ^=j then do; if Dc[i,j]=1 then thisDc1[i,j]=1; if Dc[i,j]=0 then thisDc0[i,j]=1; if Dc[i,j]=0.5 then thisDcn[i,j]=1; if Da[i,j]=1 then thisDa1[i,j]=1; if Da[i,j]=0 then thisDa0[i,j]=1; if Da[i,j]=0.5 then thisDan[i,j]=1; if Dg[i,j]=1 then thisDg1[i,j]=1; if Dg[i,j]=0 then thisDg0[i,j]=1; if Dg[i,j]=0.5 then thisDgn[i,j]=1; end; end; end; **** add current D matrices to previous D matrices ****; freqDc1 = freqDc1+thisDc1; freqDa1 = freqDa1+thisDa1; freqDg1 = freqDg1+thisDg1; freqDc0 = freqDc0+thisDc0; freqDa0 = freqDa0+thisDa0; freqDg0 = freqDg0+thisDg0; freqDcn = freqDcn+thisDcn; freqDan = freqDan+thisDan; freqDgn = freqDgn+thisDgn; create sumDc1 from freqDc1; append from freqDc1; close sumDc1; create sumDc0 from freqDc0; append from freqDc0; close sumDc0; create sumDcn from freqDcn; append from freqDcn; close sumDcn; create sumDa1 from freqDa1; append from freqDa1; close sumDa1; create sumDa0 from freqDa0; append from freqDa0; close sumDa0; create sumDan from freqDan; append from freqDan; close sumDan; create sumDg1 from freqDg1; append from freqDg1; close sumDg1; create sumDg0 from freqDg0; append from freqDg0; close sumDg0; create sumDgn from freqDgn; append from freqDgn; close sumDgn; *at last run create summary tables containing the following columns: i, j, f=1, f=0, f=0.5 pij pji; if &r=&B then do; row=1; summary_c=J(&p*(&p-1), 8, -9); summary_a=J(&p*(&p-1), 8, -9); summary_g=J(&p*(&p-1), 8, -9); do i=1 to &p; do j=1 to &p; if i^=j then do; summary_c[row,1]=i; summary_c[row,2]=j; summary_c[row,3]=freqDc1[i,j]; summary_c[row,4]=freqDc0[i,j]; summary_c[row,5]=freqDcn[i,j]; summary_c[row,6]=freqDc1[i,j]/&B; summary_c[row,7]=freqDc0[i,j]/&B; summary_c[row,8]=freqDcn[i,j]/&B; summary_a[row,1]=i; summary_a[row,2]=j; summary_a[row,3]=freqDa1[i,j]; summary_a[row,4]=freqDa0[i,j]; summary_a[row,5]=freqDan[i,j]; summary_a[row,6]=freqDa1[i,j]/&B; summary_a[row,7]=freqDa0[i,j]/&B; summary_a[row,8]=freqDan[i,j]/&B; summary_g[row,1]=i; summary_g[row,2]=j; summary_g[row,3]=freqDg1[i,j]; summary_g[row,4]=freqDg0[i,j]; summary_g[row,5]=freqDgn[i,j]; summary_g[row,6]=freqDg1[i,j]/&B; summary_g[row,7]=freqDg0[i,j]/&B; summary_g[row,8]=freqDgn[i,j]/&B; row=row+1; end; end; end; create csummary from summary_c[colname={i j f1 f0 fn Pij Pji pno}]; append from summary_c; close csummary; create asummary from summary_a[colname={i j f1 f0 fn Pij Pji pno}]; append from summary_a; close asummary; create gsummary from summary_g[colname={i j f1 f0 fn Pij Pji pno}]; append from summary_g; close gsummary; create patsum from patcount[colname={complete conditional general}]; append from patcount; close patsum; end; finish; run; quit; %end; ***** end the 1 to B loop ****; /*************************************************************************/ /* PART 3: Probabilities and reproducibilities */ /*************************************************************************/ * input: nine p by p matrices of COUNTS; * stpes: convert to probabilities, compute test statistics / CIs, plots; * output: tables; data average; set asummary; dominance='conditional'; pair=i*10+j; run; data complete; set csummary; dominance='complete'; pair=i*10+j; run; data global; set gsummary; dominance='general'; pair=i*10+j; run; data merged; set complete average global; run; /* proc print data=merged; title 'results'; run; */ data results; set merged; array f[3] f1 f0 fn; do domtype=1 to 3; domfreq = f[domtype]; output; end; run; data results; set results; if domtype=1 then Dij=1; if domtype=2 then Dij=0; if domtype=3 then Dij=0.5; run; proc sort data=results; by dominance pair; run; proc means data=results noprint; var Dij; freq domfreq; by dominance pair; output out=meansout mean=Dbar STD=SE; run; data samplea; set Sample_da; pair=i*10+j; dominance='conditional'; run; data samplec; set Sample_dc; pair=i*10+j; dominance='complete'; run; data sampleg; set Sample_dg; pair=i*10+j; dominance='general'; run; data sample; set samplec samplea sampleg; run; ******* SUMMARY OUTPUT *******; * variables: dominance-type, i, j, p(iDj), p(jDi), p(noD), Dbar, SE, sample-value, pattern-prob; *** merge sample, results and meansout data sets by dominance and pair; proc sort data=sample; by dominance pair; run; data results; set results; if domtype=1; run; proc sort data=results; by dominance pair; run; proc sort data=meansout; by dominance pair; run; data printout; merge results sample meansout; by dominance pair; run; data table; set printout(drop=domtype domfreq _TYPE_ _FREQ_); /* i_dom_j=pij; j_dom_i=pji; no_dom=pno; */ Pijno=pno; Dij_mean = dbar; Dij_se = se; if dij=1 then reprod = pij; if dij=0 then reprod = pji; if dij=0.5 then reprod = pijno; format Dij_se F6.3; run; data table; set table(drop = f1 f0 fn pno dbar); run; proc sort; by dominance pair; run; proc print data=table; title 'summary of results: all pairs'; title2 'Pij, Pji, Pijno are dominance probabilities and'; title3 'reprod are reproducibility values'; var dominance i j Dij Dij_mean dij_SE Pij Pji Pijno reprod; run; data plots; set table; if Pij ge Pji; format Dij_se F6.3; run; proc print data=plots; title 'summary of results: pair arranged by dominant predictor'; title2 'Pij, Pji, Pijno are dominance probabilities and'; title3 'reprod are reproducibility values'; var dominance i j Dij Dij_mean dij_SE Pij Pji Pijno reprod; run; %mend dom; /*********************************************************** The following macros expand a variable list containing hyphens into a list specifying each individual variable. ***********************************************************/ %MACRO EXPAND; %LET LNGTH = %LENGTH(&INDEP); %LET TEMP=; %DO _INDEX_ = 1 %TO &LNGTH; %LET ITEM = %SCAN(&INDEP,&_INDEX_,%QUOTE( )); %IF %LENGTH(&ITEM) EQ 0 %THEN %GOTO DONE; %IF %INDEX(&ITEM,-) > 0 %THEN %EXPANDED; %LET TEMP = &TEMP &ITEM; %END; %DONE: %LET INDEP = &TEMP; %MEND EXPAND; %MACRO EXPANDED; %LET DASH = %INDEX(&ITEM,-); %DO I = %EVAL(&DASH-1) %TO 1 %BY -1; %LET ALPHANUM = %SUBSTR(&ITEM,&I,1); %DO II = 0 %TO 9; %IF &ALPHANUM EQ &II %THEN %GOTO FOUND; %END; %GOTO DONE; %FOUND: %END; %DONE: %LET PREFX = %SUBSTR(&ITEM,1,&I); %LET LOWER = %SUBSTR(&ITEM,%EVAL(&I+1),%EVAL(&DASH-&I-1)); %LET UPPER = %SUBSTR(&ITEM,%EVAL(&DASH+&I+1)); %LET ITEM=; %DO II = &LOWER %TO &UPPER; %LET ITEM = &ITEM &PREFX.&II; %END; %MEND EXPANDED;