/****************************************************************************** DOMINANCE ANALYSIS MACRO This macro executes dominance analysis as described by David Budescu in Psychological Bulletin. 1993, Vol. 114, No. 3, pp. 542-551. This macro was written by Razia Azen with valuable improvements written by Robert Ceurvorst. Dominance analysis quantifies the importance of each predictor as its average increment to the model r-square, across all possible submodel sizes. NOTE: This program is limited to at most 10 predictors! ------ STEPS: ------ 1. In this macro, change the %dom line (which appears just below these instructions) according to the following directions: %dom (data=_last_, dep=Y, indep='list of predictors', p=n_of_predictors, noprint=0, short=0); If defaults are used, then only p= OR indep= is necessary, e.g., "%dom(p=4)" will use the last data set created and operate on variables Y and X1-X4. Either p= OR indep= is required -- not both. If both are specified, p is determined by counting the variables in the indep= list. data= SAS dataset to be used. Default is last dataset created. dep= Name of dependent variable. Default is Y. indep= List of predictors in quotes. OR p= No. of predictors IF and ONLY IF they are named X1-Xp, in which case indep= is not required. noprint=1 Suppresses printing of the input dataset. short=1 Suppresses listing of each predictor's contributions to individual submodels. 2. Save this macro, and add the following two lines (below) to the SAS program in which you've read the data set to be analyzed: %include 'k:\sasmacro\dom.sas'; *** CHANGE TO PATH WHERE MACRO IS SAVED ***; %dom; ******************************************************************************/ run; option nosource; %macro dom (data=_last_, dep=Y, p=, indep=, noprint=0, short=0); %if &indep= and &p= %then %do; %put YOU MUST SPECIFY INDEP=list of predictors OR P=no. of predictors (IF THEY ARE NAMED X1-Xp).; %goto done; %end; %else %if &indep ne %then %do; %if %index(&indep,%str(%'))>0 or %index(&indep,%str(%"))>0 %then %let indep = %substr(&indep,2,%length(&indep)-2); %if %index(&indep,-) > 0 %then %expand; %let p=1; %do %while(%length(%scan(&indep,&p))>0); %let x&p = %scan(&indep,&p); %let p = %eval(&p+1); %end; %let p = %eval(&p-1); %end; %else %if &p > 0 %then %do; %let indep=; %do i=1 %to &p; %let indep=&indep x&i; %let x&i=x&i; %end; %end; %if &p>10 %then %do; %put THE MAXIMUM NUMBER OF PREDICTORS IS 10. YOU HAVE &P..; %goto done; %end; %LET DATANAME=&SYSLAST; option nonotes; DATA _NULL_; FILE PRINT LINESLEFT=WITH; CALL SYMPUT('WITH',WITH); DATA _NULL_; FILE PRINT NOTITLES LINESLEFT=WITHOUT; CALL SYMPUT ('MTITLE',TRIM(LEFT(WITHOUT-&WITH+2))); CALL SYMPUT('NMODELS', 2**&p -1); RUN; data original; set &data; %if &noprint=0 %then %do; proc print; title&mtitle 'Input data set (check to make sure it is correct)'; run; %end; data labels; set; keep &indep; length dvlabel $40; call label(&dep,dvlabel); call symput('dvl',trim(dvlabel)); if _n_=1 then stop; run; /********************************************************* PART 1: regression/DA of the original data *********************************************************/ option notes; proc reg corr data=original outest=onereg (keep=_in_ _rsq_ &indep); model &dep=&indep / stb pcorr2 scorr2 ; model &dep=&indep / selection=adjrsq best=&nmodels %if &short=1 %then noprint;; run; quit; option nonotes; * order all subset models in lexicographical order; data modelmat; set onereg; %if %substr(&sysver,1,1)=8 %then if _n_>1;; %do i=1 %to &p; &&x&i=(&&x&i>.); %end; if _IN_=. then delete; keep &indep _IN_ _RSQ_; proc sort; by _IN_ %do i=1 %to &p; descending &&x&i %end;; run; /*** %if &short=0 %then %do; proc print; id _IN_ _RSQ_; format _RSQ_ 5.4; run; %end; ***/ /********************************************** DOMINANCE ANALYSIS TABLE **********************************************/ option notes; proc IML; reset noprint; start; * read the subset models matrix into DOM; use modelmat; read all into DOM; close modelmat; * dom contains, for each subset model, 1/0 values for the predictors, the number of predictors in (_IN_), and the model r2 (_RSQ_); p=ncol(dom)-2; ncol=ncol(dom); * dom is rearranged to contain, for each subset model, the number of predictors in and the r2 of the model, followed by 1/0 values for predictors in/out; dom1=dom[,1:p]; dom2=dom[,(p+1):ncol]; dom=dom2||dom1; free dom1 dom2; * generate table of additional contributions; null=J(1,ncol,0); dom=null//dom; nrow=nrow(dom); * dom now contains a top row of zeros for the null model; full=J(1,p,99); fullrsq=J(1,1,99); reduced=J(1,p,99); redrsq=J(1,1,99); contrib=J(nrow,p,0); * additional contributions matrix; do i=1 to nrow; * for each model; do j=1 to p; * for each predictor; if dom(|i,j+2|)=0 then do; * if predictor is not in subset model; reduced=dom[i,3:ncol]; * the 1/0 row represents the reduced model; full=reduced; * the full model is same as reduced model; do k=1 to p; if k=j then full(|1,k|)=1; * add the jth predictor to the full model; end; do r=1 to nrow; * for each model; comp=dom[r,3:ncol]; * comp is the 1/0 row of dom; if comp=full then fullrsq=dom[r,2]; * r2 of row is fullrsq, or; if comp=reduced then redrsq=dom[r,2]; * r2 of row is redrsq; end; contrib(|i,j|)=fullrsq-redrsq; * contrib is r2 difference; end; else do; contrib(|i,j|)=.; * if predictor is in model, contib is .; end; end; end; contrib=dom||contrib; *contrib=contrib[1:nrow-1,]; cols = {IN RSQ %do i=1 %to &p; &&X&i %end; %do i=1 %to &p; CP&i %end;}; create rsqtable from contrib[colname=cols]; append from contrib; close rsqtable; finish; run; quit; option nonotes; %if &short=0 %then %do; proc print data=rsqtable; id IN RSQ; format RSQ CP1-CP&p 5.4; title&mtitle 'Dominance Table: Additional Contributions of Predictors Across All Subset Regression Models'; %UNQUOTE(TITLE%EVAL(&MTITLE+1)) 'CPi indicates the additional contribution of predictor i to the model r-square'; %end; data rsqtable; set rsqtable; if IN ne &p; run; proc summary nway; var CP1--CP&p; class in; output out=avgcont (drop=_type_) mean=&indep; proc means noprint; var &indep; output out=meanc mean=; data avgcont; set labels meanc (in=y) avgcont; if y then in=999; proc print double; id IN; var &indep; format &indep 5.4 IN %if &p<11 %then 1.0; %else 2.0;; title&mtitle 'Dominance Analysis: Overall Average Contributions of Predictors (First Row)'; %UNQUOTE(TITLE%EVAL(&MTITLE+1)) 'And Average Contributions to Models of Each Size (Remaining Rows)'; run; proc transpose prefix=size out=meanc (rename=(SIZE999=OVERALL _NAME_=VAR)); id IN; var &indep; run; proc sort; by descending overall; title&mtitle 'Dominance Analysis: Average Predictor Contributions Overall and to Models of Each Size'; proc print; id _character_; format _numeric_ 5.4; run; TITLE&MTITLE; %DONE: option notes _last_=&syslast; %mend dom; /*********************************************************** The following macros expand a variable list containing hyphens into a list specifying each individual variable. ***********************************************************/ %MACRO EXPAND; %LET LNGTH = %LENGTH(&INDEP); %LET TEMP=; %DO _INDEX_ = 1 %TO &LNGTH; %LET ITEM = %SCAN(&INDEP,&_INDEX_,%QUOTE( )); %IF %LENGTH(&ITEM) EQ 0 %THEN %GOTO DONE; %IF %INDEX(&ITEM,-) > 0 %THEN %EXPANDED; %LET TEMP = &TEMP &ITEM; %END; %DONE: %LET INDEP = &TEMP; %MEND EXPAND; %MACRO EXPANDED; %LET DASH = %INDEX(&ITEM,-); %DO I = %EVAL(&DASH-1) %TO 1 %BY -1; %LET ALPHANUM = %SUBSTR(&ITEM,&I,1); %DO II = 0 %TO 9; %IF &ALPHANUM EQ &II %THEN %GOTO FOUND; %END; %GOTO DONE; %FOUND: %END; %DONE: %LET PREFX = %SUBSTR(&ITEM,1,&I); %LET LOWER = %SUBSTR(&ITEM,%EVAL(&I+1),%EVAL(&DASH-&I-1)); %LET UPPER = %SUBSTR(&ITEM,%EVAL(&DASH+&I+1)); %LET ITEM=; %DO II = &LOWER %TO &UPPER; %LET ITEM = &ITEM &PREFX.&II; %END; %MEND EXPANDED;