评分卡建模—单变量分析

主要是逐步选择后对特征进行单变量分析,我主要是rank分组之后进行单变量分析,以下是代码:

代码很简单,看看就明白了。。

optionscompress=yes;

optionsmlogicmprint;

OPTIONVALIDVARNAME=ANY;

 

/*跑逻辑回归stepwise*/

/*对这些特征进行分箱*/

procsqlnoprint;

select _name_ into :varall separated by" "from parmas1;

select count(*) into:n from  parmas1 where  _name_ like "%_2";

select _name_ into:varname separated by" "from parmas1

where _name_ like "%_2";

select _name_ into:varname1-:varname%left(&n.)  from parmas1 

where _name_ like "%_2"

;

quit;

%put &varname.;

%put &n.;

%macro rank;

%do i = 1%to &n.;

%if &i.=1%then%do;

proc rank data=http://www.gunmi.cn/v/train1 out = rank descending groups = 10;

var&&varname&i.;

ranksr_&&varname&i.;

run;

data rank;

set rank;

ifmissing(r_&&varname&i.) then r_&&varname&i. = -99;

run; 

%end;

%else%do;

proc rank data= http://www.gunmi.cn/v/train1 out = rank&i (keep =r_&&varname&i.) descending groups = 10;

var&&varname&i.;

ranksr_&&varname&i.;

run;

data rank&i.;

set rank&i.;

ifmissing(r_&&varname&i.) then r_&&varname&i. = -99;

run; 

data rank;

mergerank  rank&i.;

run;

%end;

%end;

proc datasetslib=work  nolist;

   delete rank1-rank35 /memtype=data;

quit;

%mend;

%rank;

proccontentsdata = rank noprintout =rank_var(keep = name label type);

run;

procsqlnoprint;

select sum(casewhen default=1then1else0end), sum(casewhendefault=0then1else

0end),count(*) into :tot_bad, :tot_good, :tot_both fromrank;

select count(*) into:n from  rank_var where  name like "r_%"

;

select name into:rankname separated by" "from rank_var where name like "r_%";

select name into: rankname1-:rankname%left(&n.)  from  rank_var where name like "r_%"

;

quit;

%put &n.;

%put &rankname.;

%macro woeall;

%do i = 1%to &n.;

proc sql;

create tablewoe&i. as

(select "&&rankname&i." as varname,

 &&rankname&i. as tier,

count(*) ascnt,

count(*)/&tot_bothas cnt_pct,

sum(case whendefault=0 then 1 else 0end) as sum_good,

sum(case whendefault=0 then 1 else 0end)/&tot_good as dist_good,

sum(case whendefault=1 then 1 else 0end) as sum_bad,

sum(case whendefault=1 then 1 else 0end)/&tot_bad as dist_bad,

log((sum(casewhen default=1 then 1else 0 end)/&tot_bad)/(sum(case when default=0 then 1 else 0end)/&tot_good)) as woe,

((sum(casewhen default=1 then 1else 0 end)/&tot_bad)-(sum(case when default=0 then 1 else 0end)/&tot_good))

*log((sum(casewhen default=1 then 1else 0 end)/&tot_bad)/(sum(case when default=0 then 1 else 0end)/&tot_good)) as pre_iv,

sum(case whendefault=1 then 1 else 0end)/count(*) as huai_rate

from rank

group by "&&rankname&i.",tier

)

order by  &&rankname&i.;

quit;

 

%if i = 1%then%do;

data woe;

set woe&i;

run;

%end;

%else%do;

data woe;

set woewoe&i;

run;

%end;

%end;

proc datasetslib=work  nolist;

   delete woe1-woe35 /memtype=data;

quit;

%mend;

%woeall;

 

评分卡建模—单变量分析