A SAS Macro for Scorecard Performance Evaluation – Yet Another Blog in Statistical Computing

PHOTO EMBED

Sat Jan 15 2022 16:49:38 GMT+0000 (Coordinated Universal Time)

Saved by @IrvineChangsha #sas #woe

%macro separation(data = , score = , y = );
***********************************************************;
* THE MACRO IS TO EVALUATE THE SEPARATION POWER OF A      *;
* SCORECARD                                               *; 
* ------------------------------------------------------- *;
* PARAMETERS:                                             *;
*  DATA : INPUT DATASET                                   *;
*  SCORE: SCORECARD VARIABLE                              *;
*  Y    : RESPONSE VARIABLE IN (0, 1)                     *;
* ------------------------------------------------------- *;
* OUTPUTS:                                                *;
*  SEPARATION_REPORT.TXT                                  *;
*  A SEPARATION SUMMARY REPORT IN TXT FORMAT              *;
*  NAMED AS THE ABOVE WITH PREDICTIVE MEASURES INCLUDING  *;
*  KS, AUC, GINI, AND DIVERGENCE                          *;
* ------------------------------------------------------- *;
* CONTACT:                                                *;
*  WENSUI.LIU@53.COM                                      *;
***********************************************************;

options nonumber nodate orientation = landscape linesize = 160 nocenter 
        formchar = "|----|+|---+=|-/\<>*" formdlim=' ' ls = 150; 

*** DEFAULT GROUP NUMBER FOR REPORT ***;
%let grp = 10;

data _tmp1 (keep = &y &score);
  set &data;
  where &y in (1, 0)  and &score ~= .;
run;

filename lst_out temp;

proc printto new print = lst_out;
run;

*** CONDUCT NON-PARAMETRIC TESTS ***; 
ods output wilcoxonscores = _wx;
ods output kolsmir2stats = _ks;
proc npar1way wilcoxon edf data = _tmp1;
  class &y;
  var &score;
run;

proc printto;
run;

proc sort data = _wx;
  by class;
run;

*** CALCULATE ROC AND GINI ***;
data _null_;
  set _wx end = eof;
  by class;

  array a{2, 3} _temporary_;
  if _n_ = 1 then do;
    a[1, 1] = n;
    a[1, 2] = sumofscores;
    a[1, 3] = expectedsum;
  end;
  else do;
    a[2, 1] = n;
  end;
  if eof then do;
    auc  = (a[1, 2] - a[1, 3]) / (a[1, 1] * a[2, 1])  + 0.5;
    if auc <= 0.5 then auc = 1 - auc;
    gini = 2 * (auc - 0.5);  
    call execute('%let auc = '||put(auc, 10.4)||';');
    call execute('%let gini = '||put(gini, 10.4)||';');
  end;
run;

*** CALCULATE KS ***;
data _null_;
  set _ks;

  if _n_ = 1 then do;
    ks = nvalue2 * 100;
    call execute('%let ks = '||put(ks, 10.4)||';');
  end;
run;

*** CAPTURE SCORE POINT FOR MAX KS ***;
data _null_;
  infile lst_out;
  input @" at Maximum = " ks_score;
  output;
  call execute('%let ks_score = '||put(ks_score, 10.4)||';');
  stop;
run;

proc summary data = _tmp1 nway;
  class &y;
  output out = _data_ (drop = _type_ _freq_)
  mean(&score.) = mean var(&score.) = variance;
run;

*** CALCULATE DIVERGENCE ***;
data _null_;
  set _last_ end = eof;
  array a{2, 2} _temporary_;
  if _n_ = 1 then do;
    a[1, 1] = mean;
    a[1, 2] = variance;
  end;
  else do;
    a[2, 1] = mean;
    a[2, 2] = variance;
  end;
  if eof then do;
    divergence = (a[1, 1] - a[2, 1]) ** 2 / ((a[1, 2] + a[2, 2]) / 2);
    call execute('%let divergence = '||put(divergence, 10.4)||';'); 
  end; 
run;

*** CAPTURE THE DIRECTION OF SCORE ***;
ods listing close;
ods output spearmancorr = _cor;
proc corr data = _tmp1 spearman;
  var &y;
  with &score;
run;
ods listing;
 
data _null_;
  set _cor;
  if &y >= 0 then do;
    call symput('desc', 'descending');
  end;
  else do;
    call symput('desc', ' ');
  end;
run;
%put &desc;

proc rank data = _tmp1 out = _tmp2 groups = &grp ties = low;
  var &score;
  ranks rank;
run;

proc summary data = _last_ nway;
  class rank;
  output out = _data_ (drop = _type_ rename = (_freq_ = freq))
  min(&score) = min_score max(&score) = max_score
  sum(&y) = bads;
run;

proc sql noprint;
  select sum(bads) into :bcnt from _last_;
  select sum(freq) - sum(bads) into :gcnt from _last_;
quit;

proc sort data = _last_ (drop = rank);
  by &desc min_score;
run;

data _data_;
  set _last_;
  by &desc min_score;

  i + 1; 
  percent = i / 100; 
  good  = freq - bads;
  odds  = good / bads;

  hit_rate = bads / freq;
  retain cum_bads cum_freq;
  cum_bads + bads;
  cum_freq + freq;
  cum_hit_rate = cum_bads / cum_freq;

  cat_rate = bads / &bcnt;
  retain cum_cat_rate;
  cum_cat_rate + cat_rate; 

  format symbol $4.;
  if i = 1 then symbol = 'BAD';
  else if i = &grp - 1 then symbol = 'V';
  else if i = &grp then symbol = 'GOOD';
  else symbol = '|';
run;

proc printto print = "%upcase(%trim(&score))_SEPARATION_REPORT.TXT" new;
run;

proc report data = _last_ spacing = 1 split = "/" headline nowd;
  column("GOOD BAD SEPARATION REPORT FOR %upcase(%trim(&score)) IN DATA %upcase(%trim(&data))/
          MAXIMUM KS = %trim(&ks) AT SCORE POINT %trim(&ks_score)/   
          ( AUC STATISTICS = %trim(&auc), GINI COEFFICIENT = %trim(&gini), DIVERGENCE = %trim(&divergence) )/ /"
         percent symbol min_score max_score good bads freq odds hit_rate cum_hit_rate cat_rate cum_cat_rate);

  define percent      / noprint order order = data;
  define symbol       / "" center               width = 5 center;
  define min_score    / "MIN/SCORE"             width = 10 format = 9.4        analysis min center;
  define max_score    / "MAX/SCORE"             width = 10 format = 9.4        analysis max center;
  define good         / "GOOD/#"                width = 10 format = comma9.    analysis sum;
  define bads         / "BAD/#"                 width = 10 format = comma9.    analysis sum;
  define freq         / "TOTAL/#"               width = 10 format = comma9.    analysis sum;
  define odds         / "ODDS"                  width = 10 format = 8.2        order;
  define hit_rate     / "BAD/RATE"              width = 10 format = percent9.2 order center;
  define cum_hit_rate / "CUMULATIVE/BAD RATE"   width = 10 format = percent9.2 order;
  define cat_rate     / "BAD/PERCENT"           width = 10 format = percent9.2 order center;
  define cum_cat_rate / "CUMU. BAD/PERCENT"     width = 10 format = percent9.2 order; 

  rbreak after / summarize dol skip;
run; 

proc printto;
run;

***********************************************************;
*                     END OF THE MACRO                    *;
***********************************************************; 
%mend separation;

libname data 'C:\Documents and Settings\liuwensui\Desktop\fraction_models\test';

%separation(data = data.accepts, score = bureau_score, y = bad);
content_copyCOPY

Scorecard

https://statcompute.wordpress.com/2012/09/28/a-sas-macro-for-scorecard-performance-evaluation/