* --- from http://ftp.sas.com/techsup/download/stat/AllsubsREG.html ---*; /* USAGE: Before calling the ALLSUBSREG macro, you must first define the macro in your current SAS session. You can do this either by copying this file into the SAS program editor and submitting it, or by using a %INCLUDE statement containing the path and filename of this file on your system. Once the macro is defined, call the macro using the desired options. See the section below for an example. The following parameters are required when using the macro. data= The name of the input data set to be analyzed. If not specified, the last-created data set is used. depvar= The name of the dependent (response) variable. indepvar= The names of your independent variables, separated by spaces, in the model. method= The type of the all subset model selection method to be used in the regression analysis. One of the following three methods can be specified: RSQUARE, ADJRSQ, and CP. sortvar= The name of the sort variable for model statistic table. By default the final table is sorted by the number of variables in the model, _IN_. In addition to the _IN_ the sortvar variable allows one of the model statistics to be specified: _P_ _CP_ _PRESS_ _RMSE_ _MSE_ _RSQ_ _ADJRSQ_ _AIC_ _BIC_ _SBC_ and will be sorted in descending order. printvar= The names of the model statistics to be displayed in a table. By default the final table displays VarsInModel, _IN_, all model statistics, and parameter estimates. The purpose of the printvar parameter is to allow the user the option to select which model statistics to display in the table. _P_ _CP_ _PRESS_ _RMSE_ _MSE_ _RSQ_ _ADJRSQ_ _AIC_ _BIC_ _SBC_ */ %let DATA=; /* THE MACRO VARIABLE DATA CONTAINS THE NAME OF THE SAS DATA SET */ *%let DEPVAR=Y; /* THE MACRO VARIABLE DEPVAR CONTAINS THE DEPENDENT VARIABLE */ *%let INDEPVAR=x1 x2 x3 x4 ; /* THE MACRO VARIABLE INDEPVAR CONTAINS THE REGRESSOR VARIABLES */ %let method=RSQUARE; /* THE MACRO VARIABLE METHOD IDENTIFIES THE METHOD TO SELECT THE MODEL */ *%let sortvar=_PRESS_; /* THE MACRO VARIABLE SORTVAR IDENTIFIES THE MODEL STATISTICS THE TABLE WILL BE SORTED BY */ *%let printvar=_P_ _CP_ _PRESS_ _RMSE_ _MSE_ _RSQ_ _ADJRSQ_ _AIC_ _BIC_ _SBC_; /* THE MACRO VARIABLE PRINTVAR CONTAINS THE MODEL STATISTICS TO BE PRINTED IN THE TABLE*/ ods noresults; %macro allsubsets(list, bnum); %local notesopt; %let notesopt = %sysfunc(getoption(notes)) %sysfunc(getoption(source)); options nosource; * nonotes; %let qlist =; %let n = 1; %do %while(%length(%scan(&list,&n))); %let qlist = &qlist "%scan(&list,&n)"; %let n = %eval(&n+1); %end; %let n = %eval(&n-1); data _tempAll; if (0); run; %do i = 1 %to &n; proc plan; factors comb = nrow ordered index = &i of &n comb / noprint; output out=_temp index cvals=(&qlist); data _temp; set _temp; i = &i; proc transpose data=_temp out=_temp; by comb; var index; ***; data temp&n; set _temp; %do c = &i+1 %to &n; COL&c = ' . '; %end; /* proc print data=temp&n; title "temp file for &i"; */ proc append base=submodels&bnum data=temp&n force; run; data _temp; set _temp; length Subset $ %length(&list); array col{&i}; Subset = trim(left(col{1})); do i = 2 to dim(col); Subset = trim(left(Subset)) || ' ' || trim(left(col{i})); end; keep i comb Subset; data _tempAll; set _tempAll _temp; run; %end; %global nSubs; %let nSubs=%eval(2**&n-1); %do i = 1 %to &nSubs; %global modelRhs&i; data _null_; set _tempAll(obs=&i firstobs=&i); call symput("modelRhs&i",subset); run; %end; options ¬esopt; %mend; %macro ALLSUBSREG(bnum, bdata); %do i = 1 %to &nSubs; /* proc logistic descending data=&data outest=modelparms&i ; model &depvar=&&modelRhs&i/rsq; run; data submodel&i; set modelparms&i; subset = "&&modelRhs&i"; keep subset _STATUS_ Intercept _LNLIKE_; proc print data=submodel&i; proc append base=summary data=submodel&i force; run; */ proc logistic descending data=&bdata; title "output for subset model &&modelRhs&i"; model &depvar=&&modelRhs&i/rsq; output out=modelout&i predicted=ypred; ods output FitStatistics = LogLFits&i; ods output Rsquare = Rsq&i; run; data fits&i; set LogLFits&i; if Criterion = '-2 Log L'; subset = "&&modelRhs&i"; ** to obtain Likelihoods from -2 log Likelihoods; LogL0 = InterceptOnly/-2; LogLM = InterceptAndCovariates/-2; L0 = exp(InterceptOnly/-2); LM = exp(InterceptAndCovariates/-2); sample=&bnum; proc append base=allfits&bnum data=fits&i force; ** correlation fit measure; proc corr data=modelout&i outp=rvalues&i; var &depvar ypred; data corrfit&i; set rvalues&i; if _TYPE_ = 'CORR' and _NAME_ = 'ypred'; subset = "&&modelRhs&i"; run; proc append base=corrfits&bnum data=corrfit&i force; %end; %mend; %macro logistic_da (fit, bootrep); /**************************************************************************/ title; * add x1-xp as 1s and 0s for presence/absence in model; data summary; set summary&bootrep; %do i=1 %to &p; x&i = 0; %do j=1 %to &p; if COL&j = "x&i" then x&i = 1; %end; %end; *proc print data=summary; run; data subsets; set summary; keep &fit x1--x&p; *proc print; run; /********************************************** DOMINANCE ANALYSIS TABLE **********************************************/ option notes; title; proc IML; reset noprint; start; * read the subset models matrix into DOM; use subsets; read all into DOM; close subsets; * dom is rearranged to contain, for each subset model, the number of predictors in and the r2 of the model, followed by 1/0 values for predictors in/out; dom1=dom[,1]; dom2=dom[,2:ncol(dom)]; in = dom2[,+]; dom=in||dom1||dom2; free in dom1 dom2; *print dom; p=&p; ncol=ncol(dom); * generate table of additional contributions (complete dominance); null=J(1,ncol,0); dom=null//dom; nrow=nrow(dom); * dom now contains a top row of zeros for the null model; full=J(1,p,99); fullrsq=J(1,1,99); reduced=J(1,p,99); redrsq=J(1,1,99); contrib=J(nrow,p,0); * additional contributions matrix; do i=1 to nrow; * for each model; do j=1 to p; * for each predictor; if dom(|i,j+2|)=0 then do; * if predictor is not in subset model; reduced=dom[i,3:ncol]; * the 1/0 row represents the reduced model; full=reduced; * the full model is same as reduced model; do k=1 to p; if k=j then full(|1,k|)=1; * add the jth predictor to the full model; end; do r=1 to nrow; * for each model; comp=dom[r,3:ncol]; * comp is the 1/0 row of dom; if comp=full then fullrsq=dom[r,2]; * r2 of row is fullrsq, or; if comp=reduced then redrsq=dom[r,2]; * r2 of row is redrsq; end; contrib(|i,j|)=fullrsq-redrsq; * contrib is r2 difference; end; else do; contrib(|i,j|)=.; * if predictor is in model, contib is .; end; end; end; contrib=dom||contrib; *contrib=contrib[1:nrow-1,]; cols = {IN &fit %do i=1 %to &p; X&i %end; %do i=1 %to &p; CP&i %end;}; create rsqtable from contrib[colname=cols]; append from contrib; close rsqtable; finish; run; quit; *proc print data=rsqtable; *run; %if &bootrep=0 %then %do; proc print data=rsqtable; id IN; format CP1-CP&p 5.4; *title 'Dominance Table: Additional Contributions of Predictors Across All Subset Regression Models'; title 'Dominance Table: CPi indicates the additional contribution of predictor i to the model r-square'; title2 "Results for parent sample using &fit measure"; run; %end; data rsqtable&fit&bootrep(rename=(&fit=fit)); set rsqtable; sample=&bootrep; measure="&fit"; *drop NAGELKERKE; run; /* title "DA table (parent sample) using &fit"; proc print data=rsqtable&fit&bootrep; run; */ * combine resutls from all reps; *proc append base=alldatable data=alldatable&bootrep force; /*data domtable2; set domtable; filename outdat "F:\DA\domtable.txt"; file outdat; put sample measure IN X1 X2 X3 X4 CP1 CP2 CP3 CP4; run; */ * obtain conditional dominance contributions; proc summary nway; var CP1--CP&p; class in; output out=avgcont (drop=_type_) mean=x1-x&p; run; proc means noprint; var x1--x&p; output out=meanc mean=; run; data avgcont; set meanc (in=y) avgcont; if y then in=999; %if &bootrep=0 %then %do; proc print data=avgcont; id IN; var x1-x&p; format x1-x&p 5.4 IN %if &p<11 %then 1.0; %else 2.0;; title 'Dominance Analysis: Overall Average Contributions of Predictors (First Row)'; title2 'With Average Contributions to Models of Each Size (Remaining Rows)'; title3 "Results for parent sample using &fit measure"; run; %end; data avgcont&fit&bootrep; set avgcont; sample=&bootrep; measure="&fit"; run; * combine resutls from all fit measures and reps; proc append base=alldatable data=rsqtable&fit&bootrep force; run; * combine across fit measures and reps; proc append base=allda data=avgcont&fit&bootrep force; run; * combine across reps; *proc append base=allda data=allda&bootrep force; *run; /* data newtable2; set newtable; filename outdat "F:\DA\fits.txt"; file outdat; put sample measure IN x1 x2 x3 x4; run; */ *data avgcont2; *set avgcont; *sample=0; *filename outdat "C:\Documents and Settings\HP_Owner\My Documents\My SAS Files\9.1\DA\DA&fit&r...txt"; *file outdat; *put sample IN x1 x2 x3 x4; *run; /* proc transpose prefix=size out=meanc (rename=(SIZE999=OVERALL _NAME_=VAR)); id IN; var x1-x&p; run; proc sort; by descending overall; title 'Dominance Analysis: Average Predictor Contributions Overall and to Models of Each Size'; title2 'Ordered by general dominance measures'; title3 "Results for &fit measure"; data meanc; set meanc; drop size&p; proc print; id _character_; format _numeric_ 5.4; run; */ title; ********* Determine dominance ordering (Dij) *********; *-- complete dominance --*; data compda; set rsqtable&fit&bootrep; %do i=1 %to &p-1; %do j=&i+1 %to &p; if cp&i ^= . and cp&j ^= . then do; comp&i&j = 0; if cp&i > cp&j then comp&i&j = 1; if cp&i < cp&j then comp&i&j = -1; end; %end; %end; *proc print data=compda; run; proc means data=compda noprint; output out=dacomp; run; data dacomp&fit&bootrep; set dacomp; if _STAT_='MEAN'; measure = "&fit"; drop _TYPE_ _FREQ_ IN &fit x1--x&p cp1--cp&p; sample=&bootrep; run; %if &bootrep=0 %then %do; proc print data=dacomp&fit&bootrep; title 'Complete dominance: xi dominates xj if comp_ij is 1'; title2 'and xj dominates xi if comp_ij is -1 (otherwise no complete dominance)'; title3 "Results for parent sample using &fit measure"; run; %end; *-- conditional dominance --*; data condda; set avgcont&fit&bootrep; if in < &p; %do i=1 %to &p-1; %do j=&i+1 %to &p; cond&i&j = 0; if x&i > x&j then cond&i&j = 1; if x&i < x&j then cond&i&j = -1; %end; %end; *proc print data=condda; proc means data=condda noprint; output out=dacond; run; *proc print data=dacond; data dacond&fit&bootrep; set dacond; if _STAT_='MEAN'; measure = "&fit"; drop _TYPE_ _FREQ_ IN x1--x&p; sample=&bootrep; run; %if &bootrep=0 %then %do; proc print data=dacond&fit&bootrep; title 'Conditional dominance: xi dominates xj if cond_ij is 1'; title2 'and xj dominates xi if cond_ij is -1 (otherwise no conditional dominance)'; title3 "Results for parent sample using &fit measure"; run; %end; *-- general dominance --*; data genda&fit&bootrep; set avgcont&fit&bootrep; if in = 999; measure = "&fit"; drop _TYPE_ _FREQ_ IN; sample=&bootrep; %do i=1 %to &p-1; %do j=&i+1 %to &p; gen&i&j = 0; if x&i > x&j then gen&i&j = 1; if x&i < x&j then gen&i&j = -1; %end; %end; %if &bootrep=0 %then %do; proc print data=genda&fit&bootrep; title 'General dominance: xi dominates xj if gen_ij is 1'; title2 'and xj dominates xi if gen_ij is -1 (otherwise no general dominance)'; title3 "Results for parent sample using &fit measure"; run; %end; * combine resutls from all fit measures and reps; proc append base=allcomp data=dacomp&fit&bootrep force; proc append base=allcond data=dacond&fit&bootrep force; proc append base=allgen data=genda&fit&bootrep force; run; * combine resutls from all reps; /* proc append base=allcomp data=allcomp&bootrep force; proc append base=allcond data=allcond&bootrep force; proc append base=allgen data=allgen&bootrep force; */ %mend;