Cox PH 模型在倖存分析(survival analysis)被廣泛的使用。一篇發表在SAS GLOBAL FORUM 2009的技術文件解說了在 SAS 底下如何用 Cox PH 模型來計算一些重要估計量的方法,並且提供一個方便的巨集程式來簡化繁複的程式寫作和運行。
C-index
C-index原本是在ROC curve裡面用來解釋預測鑒別能力的測量指標,但後來在Cox PH model裡也可以應用。在倖存分析裡,C-index可以解釋為一個機率,是從某個事件(如:死亡)來的一個病人,他會比非該事件內的任何一個病人都擁有較高的機會會發生該事件。以下列程式來說明如何用 SAS 算出 C-index:
proc phreg data=sample;
id idn;
model combdays*combfv(0)=mihx diabhx lowef;
output out=obs survival=surv;
run;
這個PROC PHREG程序並沒有什麼特別,使用者唯一需要額外做的就是利用一個ODS OUTPUT指令把個體的ID, 觀測到的倖存時間以及倖存函數估計值另存一個新檔(ods)。接著,用下面兩到程序去整理出一個concordance data。
proc sql;
create table allset as
select idn_j, y_j, x_j, idn as idn_i, surv as y_i, combdays as x_i
from evtset, obs
where idn_j<>idn;
quit;
data concord;
set allset;
if (x_iy_j) or (x_i>x_j and y_i
然後用下面這一串程式直接把C-index及其95%信賴區間該算出來:
data _null_;
set concord end=eof;
retain nch ndh;
if _N_=1 then do;
nch=0;
ndh=0;
end;
if concord=1 then nch+1;
if concord=0 then ndh+1;
if eof=1 then do;
call symput('ch',trim(left(nch)));
call symput('dh',trim(left(ndh)));
call symput('uspairs',trim(left(_n_)));
end;
run;
data _null_;
set sample end=eof;
if eof=1 then call symput('totobs',trim(left(_n_)));
run;
%put &ch &dh &uspairs &totobs;
data calculat;
ch=input("&ch",12.0);
dh=input("&dh",12.0);
uspairs=input("&uspairs",12.0);
totobs=input("&totobs",10.0);
pc=ch/(totobs*(totobs-1));
pd=dh/(totobs*(totobs-1));
c_hat=pc/(pc+pd);
w=(2*1.96**2)/(totobs*(pc+pd));
low_ci_w=((w+2*c_hat)/(2*(1+w)))-(sqrt((w**2+4*w*c_hat*(1-c_hat))/(2*(1+w))));
upper_ci_w=((w+2*c_hat)/(2*(1+w)))+(sqrt((w**2+4*w*c_hat*(1-c_hat))/(2*(1+w))));
run;
結果如下圖所示:
Calculating adjusted survival rates using corrected group prognosis method
作者自製了一段巨集程式 %CGPM_adj 可將倖存機率以及倖存曲線圖直接輸出。其原始碼如下:
*****************************************************************************;
* Program: CGPM_adj.sas;
* Purpose: Calculate the unadjusted and adjusted survival rates and plot the
*survival curves using corrected group prognosis method;
*****************************************************************************;
*----------------------------------------------------------------------------;
* 1. All variables used in the model have to be binary variable.
*Categorical and numeric variables need to be re-coded as dummy
*variable to meet this requirement;
* 2. The maximum number of variables in the model is limited to 20;
*(not including the control variable). But this number can be
*easily expanded as needed.;
* 3. The analysis dataset for modeling was assign to be in SAS
*default library WORK. ;
*----------------------------------------------------------------------------;
%macro CGPM_adj(outputpath, figname, tbname, dsn, covars, ctrlvar, outcom, cnsrval=0, timevar, rlalpha=0.05);
*----------------------------------------------------------------------------;
* Calculate unadjusted survival rate;
*----------------------------------------------------------------------------;
*create dataset for controlled variable;
proc sql;
create table ctrlset as
select distinct &ctrlvar
from &dsn
order by &ctrlvar;
quit;
*get the unadjusted survival rates for controlled variable;
proc phreg data=&dsn noprint;
model &timevar * &outcom (&cnsrval) = &ctrlvar;
baseline covariates=ctrlset out=unadjset survival=survival / nomean;
run;
*----------------------------------------------------------------------------;
* Calculate adjusted survival rates using corrected group prognosis method;
*----------------------------------------------------------------------------;
*create identifier set for all possible combinations of covariates;
data _null_;
cnt=1+count("&covars",' ');
call symputx('varcnt', cnt);
run;
%put &varcnt;
%macro idx;
%let i=1;
%do %until(%scan(&covars,&i,' ')=%str());
%local varnam;
%let varnam=%scan(&covars,&i,' ');
xp=&i+1;
if &varnam=1 then substr(idx,xp,1)=1;
%let i=%eval(&i+1);
%end;
%mend;
%macro idxrv;
%let i=1;
%do %until(%scan(&covars,&i,' ')=%str());
%local varnam;
%let varnam=%scan(&covars,&i,' ');
xp=&i+1;
if substr(idx,xp,1)='1' then &varnam=1; else &varnam=0;
%let i=%eval(&i+1);
%end;
%mend;
data modelset;
set &dsn;
idx=left(put(10**(input("&varcnt",2.0)),21.));
%idx;
keep idx &ctrlvar &covars &timevar &outcom;
run;
proc freq data=modelset noprint;
tables idx / out=idx;
run;
data popset (drop=percent xp);
if _n_=1 then do until(last);
set idx end=last;
&ctrlvar=0;
%idxrv;
output;
end;
set idx;
&ctrlvar=1;
%idxrv;
output;
run;
ods output censoredsummary=censum parameterEstimates=parasum;
proc phreg data=modelset;
id idx;
model &timevar * &outcom (&cnsrval) = &ctrlvar &covars / rl alpha=&rlalpha;
baseline covariates=popset out=adjset0 survival=survival / nomean;
run;
ods output close;
proc sort data=popset out=count (keep=idx count);
by idx;
run;
proc sort data=adjset0;
by idx;
run;
data adjset1;
merge adjset0 count;
by idx;
run;
proc sort data=adjset1;
by &ctrlvar &timevar;
run;
data adjset;
set adjset1;
by &ctrlvar &timevar;
if first.&timevar then frqsum=0;
frqsum+count;
if first.&timevar then sursum=0;
sursum+survival*count;
if last.&timevar;
adjsurv=sursum/frqsum;
keep &ctrlvar &timevar adjsurv;
run;
data allsurv;
length group $22;
set adjset (in=a rename=(adjsurv=survival)) unadjset (in=b);
if a=1 and &ctrlvar=0 then group='Adjusted'||' '||"&ctrlvar"||'=0';
else if a=1 and &ctrlvar=1 then group='Adjusted'||' '||"&ctrlvar"||'=1';
else if b=1 and &ctrlvar=0 then group='Unadjusted'||'
'||"&ctrlvar"||'=0';
else if b=1 and &ctrlvar=1 then group='Unadjusted'||'
'||"&ctrlvar"||'=1';
eventrate=1-survival;
run;
data summary;
set allsurv;
by group;
if last.group;
run;
*----------------------------------------------------------------------------;
* You can change the code below to modify the title and graph axis scale etc.;
*----------------------------------------------------------------------------;
goption reset=all device=emf gsfname=grfa hsize=6.0 vsize=4 ftext='Arial/bo' htext=11pt display;
filename grfa "&outputpath.\&figname..emf";
symbol1 i=steplj v=none l=1 ci=green w=1;
symbol2 i=steplj v=none l=1 ci=black w=1;
symbol3 i=steplj v=none l=3 ci=green w=1;
symbol4 i=steplj v=none l=3 ci=black w=1;
axis1 offset=(0.5 cm,0.5 cm);
axis2 label=(angle=90 j=c 'Survival') order=(0.8 to 1 by 0.05) offset=(0.0cm,0.0 cm);
legend1 across=2
mode=share
position=(bottom left inside)
label=none
value=(j=l h=10pt)
offset=(0.5cm, 0cm);
proc gplot data=allsurv;
plot survival * &timevar = group / legend=legend1 haxis=axis1 vaxis=axis2;
label eventrate='Survival Rate' &timevar='Days After Randomization';
run;
quit;
title;
footnote;
ods rtf style=journal file="&outputpath.\&tbname..rtf" bodytitle
startpage=no;
proc print data=parasum noobs;
title "Analysis of Maximum Likelihood Estimates (alpha=&rlalpha for CI)";
run;
proc print data=summary noobs;
var group survival eventrate;
title 'Estimated survival and event rates at the time of last event observed';
run;
ods rtf close;
%mend;
巨集參數定義如下:
- outputpath=報表與圖形輸出的路徑
- figname=圖形檔名
- tbname=報表檔名
- dsn=存放在SAS裡面的資料檔名
- covars=所有預測變數的名稱,不包含控制變數的名稱
- ctrlvar=控制變數的名稱
- outcom=輸出變數(事件)的名稱
- cnsrval=設限的代碼,預設值為0
- timevar=時間變數名稱
- rlalpha=顯著水準alpha,預設值為0.05
承上面的範例,巨集程式執行的寫法如下:
%CGPM_adj(outputpath=L:\SGF2009\Examples,
figname=Adjdiab_fig,
tbname=adjdiab_summary,
dsn=sample,
covars=age65 lowef chfhx,
ctrlvar=diabhx,
outcom=deathfv,
cnsrval=0,
timevar=deathdays,
rlalpha=0.05);
結果如下:
Presenting the effect of a continuous covariate on estimated survival
當模型內包含連續型變數時,需要額外多兩個步驟來計算n-year倖存率。首先,用PROC PHREG程序把基底函數(baseline function)等一干相關的估計量另存成兩個新檔 yrout & sample(紅色部分):
proc phreg data=sample;
model combdays*combfv(0)=age male;
baseline out=yrout lower=low_ci upper=up_ci survival=surv covariates=sample/alpha=0.05 nomean;
run;
再依照想要分類的變數(male),去和倖存時間變數去排序(combdays)後再另存新檔(maxday):
proc sort data=sample out=maxday (where=(combfv=1) keep=idn male combfv combdays);
by male combdays;
run;
然後利用下面兩個資料步驟程序去計算男人和女人的5-year倖存率:
data _null_;
set maxday;
by male;
if male=0 and last.male then call symput('lastday_f',combdays);
if male=1 and last.male then call symput('lastday_m',combdays);
run;
%put &lastday_f &lastday_m;
data yrest;
set yrout;
evtrate=(1-surv)*100;
evtlow=(1-low_ci)*100;
evtup=(1-up_ci)*100;
if combdays=&lastday_m and male=1 then output;
if combdays=&lastday_f and male=0 then output;
run;
最後畫出的圖形如下所示:
(PS) 原文內並沒有附此圖的繪製程式。
Contact information
Lea Liu, Maryland Medical Research Institute
600 Wyndhurst Ave. Baltimore, MD 21210
(410) 435-4200, Email: lliu@mmri.org, Web: www.mmri.org
沒有留言:
張貼留言
要問問題的人請在文章下方的intensedebate欄位留言,請勿使用blogger預設的意見表單。今後用blogger意見表單留言的人我就不回應了。