A SAS Macro for Scorecard Performance Evaluation – Yet Another Blog in Statistical Computing
Sat Jan 15 2022 16:49:38 GMT+0000 (Coordinated Universal Time)
Saved by @IrvineChangsha #sas #woe
%macro separation(data = , score = , y = ); ***********************************************************; * THE MACRO IS TO EVALUATE THE SEPARATION POWER OF A *; * SCORECARD *; * ------------------------------------------------------- *; * PARAMETERS: *; * DATA : INPUT DATASET *; * SCORE: SCORECARD VARIABLE *; * Y : RESPONSE VARIABLE IN (0, 1) *; * ------------------------------------------------------- *; * OUTPUTS: *; * SEPARATION_REPORT.TXT *; * A SEPARATION SUMMARY REPORT IN TXT FORMAT *; * NAMED AS THE ABOVE WITH PREDICTIVE MEASURES INCLUDING *; * KS, AUC, GINI, AND DIVERGENCE *; * ------------------------------------------------------- *; * CONTACT: *; * WENSUI.LIU@53.COM *; ***********************************************************; options nonumber nodate orientation = landscape linesize = 160 nocenter formchar = "|----|+|---+=|-/\<>*" formdlim=' ' ls = 150; *** DEFAULT GROUP NUMBER FOR REPORT ***; %let grp = 10; data _tmp1 (keep = &y &score); set &data; where &y in (1, 0) and &score ~= .; run; filename lst_out temp; proc printto new print = lst_out; run; *** CONDUCT NON-PARAMETRIC TESTS ***; ods output wilcoxonscores = _wx; ods output kolsmir2stats = _ks; proc npar1way wilcoxon edf data = _tmp1; class &y; var &score; run; proc printto; run; proc sort data = _wx; by class; run; *** CALCULATE ROC AND GINI ***; data _null_; set _wx end = eof; by class; array a{2, 3} _temporary_; if _n_ = 1 then do; a[1, 1] = n; a[1, 2] = sumofscores; a[1, 3] = expectedsum; end; else do; a[2, 1] = n; end; if eof then do; auc = (a[1, 2] - a[1, 3]) / (a[1, 1] * a[2, 1]) + 0.5; if auc <= 0.5 then auc = 1 - auc; gini = 2 * (auc - 0.5); call execute('%let auc = '||put(auc, 10.4)||';'); call execute('%let gini = '||put(gini, 10.4)||';'); end; run; *** CALCULATE KS ***; data _null_; set _ks; if _n_ = 1 then do; ks = nvalue2 * 100; call execute('%let ks = '||put(ks, 10.4)||';'); end; run; *** CAPTURE SCORE POINT FOR MAX KS ***; data _null_; infile lst_out; input @" at Maximum = " ks_score; output; call execute('%let ks_score = '||put(ks_score, 10.4)||';'); stop; run; proc summary data = _tmp1 nway; class &y; output out = _data_ (drop = _type_ _freq_) mean(&score.) = mean var(&score.) = variance; run; *** CALCULATE DIVERGENCE ***; data _null_; set _last_ end = eof; array a{2, 2} _temporary_; if _n_ = 1 then do; a[1, 1] = mean; a[1, 2] = variance; end; else do; a[2, 1] = mean; a[2, 2] = variance; end; if eof then do; divergence = (a[1, 1] - a[2, 1]) ** 2 / ((a[1, 2] + a[2, 2]) / 2); call execute('%let divergence = '||put(divergence, 10.4)||';'); end; run; *** CAPTURE THE DIRECTION OF SCORE ***; ods listing close; ods output spearmancorr = _cor; proc corr data = _tmp1 spearman; var &y; with &score; run; ods listing; data _null_; set _cor; if &y >= 0 then do; call symput('desc', 'descending'); end; else do; call symput('desc', ' '); end; run; %put &desc; proc rank data = _tmp1 out = _tmp2 groups = &grp ties = low; var &score; ranks rank; run; proc summary data = _last_ nway; class rank; output out = _data_ (drop = _type_ rename = (_freq_ = freq)) min(&score) = min_score max(&score) = max_score sum(&y) = bads; run; proc sql noprint; select sum(bads) into :bcnt from _last_; select sum(freq) - sum(bads) into :gcnt from _last_; quit; proc sort data = _last_ (drop = rank); by &desc min_score; run; data _data_; set _last_; by &desc min_score; i + 1; percent = i / 100; good = freq - bads; odds = good / bads; hit_rate = bads / freq; retain cum_bads cum_freq; cum_bads + bads; cum_freq + freq; cum_hit_rate = cum_bads / cum_freq; cat_rate = bads / &bcnt; retain cum_cat_rate; cum_cat_rate + cat_rate; format symbol $4.; if i = 1 then symbol = 'BAD'; else if i = &grp - 1 then symbol = 'V'; else if i = &grp then symbol = 'GOOD'; else symbol = '|'; run; proc printto print = "%upcase(%trim(&score))_SEPARATION_REPORT.TXT" new; run; proc report data = _last_ spacing = 1 split = "/" headline nowd; column("GOOD BAD SEPARATION REPORT FOR %upcase(%trim(&score)) IN DATA %upcase(%trim(&data))/ MAXIMUM KS = %trim(&ks) AT SCORE POINT %trim(&ks_score)/ ( AUC STATISTICS = %trim(&auc), GINI COEFFICIENT = %trim(&gini), DIVERGENCE = %trim(&divergence) )/ /" percent symbol min_score max_score good bads freq odds hit_rate cum_hit_rate cat_rate cum_cat_rate); define percent / noprint order order = data; define symbol / "" center width = 5 center; define min_score / "MIN/SCORE" width = 10 format = 9.4 analysis min center; define max_score / "MAX/SCORE" width = 10 format = 9.4 analysis max center; define good / "GOOD/#" width = 10 format = comma9. analysis sum; define bads / "BAD/#" width = 10 format = comma9. analysis sum; define freq / "TOTAL/#" width = 10 format = comma9. analysis sum; define odds / "ODDS" width = 10 format = 8.2 order; define hit_rate / "BAD/RATE" width = 10 format = percent9.2 order center; define cum_hit_rate / "CUMULATIVE/BAD RATE" width = 10 format = percent9.2 order; define cat_rate / "BAD/PERCENT" width = 10 format = percent9.2 order center; define cum_cat_rate / "CUMU. BAD/PERCENT" width = 10 format = percent9.2 order; rbreak after / summarize dol skip; run; proc printto; run; ***********************************************************; * END OF THE MACRO *; ***********************************************************; %mend separation; libname data 'C:\Documents and Settings\liuwensui\Desktop\fraction_models\test'; %separation(data = data.accepts, score = bureau_score, y = bad);
Scorecard
https://statcompute.wordpress.com/2012/09/28/a-sas-macro-for-scorecard-performance-evaluation/
Comments