R语言系列6:生存分析中多重时间依赖性ROC曲线绘制 timeROC
上一篇文章,我们讲到,以及
今天主要围绕生存分析中,预测模型验证部分,如何将多条time-depend ent ROC 曲线绘制在一个图里,进行示范操作
模型构建(同前)
有关模型构建,前面已述,代码与之前重复
输入文件:dev.csv
模型构建:
dev<-read.csv("dev.csv")
head(dev)
library(rms)
library(foreign)
library(survival)
#将time变为num数字形式
dev$time<-as.numeric(dev$time)
#将其余变量转为factor并进行标注
dev$age<-factor(dev$age,labels=c('<50','50-59','60-69','70-79', '>=80'))
dev$sex<-factor(dev$sex,labels=c('Male','Female'))
dev$race<-factor(dev$race,labels=c('White','Black','Other'))
dev$marry<-factor(dev$marry,labels=c('Married','Unmarried','Divorced or Seperated','Widowed'))
dev$site<-factor(dev$site,labels=c('cecum','ascending colon','hepatic','transverse', 'splenic', 'descending', 'sigmoid'))
dev$size<-factor(dev$size,labels=c('<3','<4','<5', '<6', '<7', '<8', '>=8'))
dev$grade<-factor(dev$grade,labels=c('I','II','III', 'IV'))
#打包数据
ddist <- datadist(dev)
options(datadist='ddist')
#time单位是:月
units(dev$time) <- "Month"
fcox <- cph(Surv(dev$time,dev$os==1) ~ age + race+ size+marry+grade,surv=T,x=T, y=T,data=dev)
#画图
surv <- Survival(fcox)
nom <- nomogram(fcox, fun=list(function(x) surv(36, x),
function(x) surv(60, x)),
funlabel=c("3-years Survival Probability",
"5-years Survival Probability"),lp=F)
plot(nom)
Calibration curve绘制:
#计算三年生存率的标准曲线
fcox3 <- cph(Surv(dev$time,dev$os==1) ~ age + race+ size+marry+grade,surv=T,x=T, y=T,time.inc = 36,data=dev)
cal3 <- calibrate(fcox3, cmethod="KM", method="boot", u=36, m=1200, B=500)
plot(cal3)
计算模型预测值
基于多因素Cox回归系数,计算每个样本风险值 PI,使用if循环
agen<- as.numeric(dev$age)
dev$agepoint<- ifelse(agen==1,0,ifelse(agen==2, 0.22246,ifelse(agen==3,0.38178,ifelse(agen==4,0.71654,1.39447))))
marryn<-as.numeric(dev$marry)
dev$marrypoint<-ifelse(marryn==1,0,ifelse(marryn==2,0.38655,ifelse(marryn==3, 0.22522,0.18693)))
graden<-as.numeric(dev$grade)
dev$gradepoint<-ifelse(graden==1,0,ifelse(graden==2,0.24516,ifelse(graden==3,0.41496,0.57645)))
racen<-as.numeric(dev$race)
dev$racepoint<-ifelse(racen==0,0,ifelse(racen==1,0.46380,1.04854))
sizen<-as.numeric(dev$size)
dev$sizepoint<-ifelse(sizen==1,0,ifelse(sizen==2,-0.08669,ifelse(sizen==3,-0.02557,ifelse(sizen==4,0.04862,ifelse(sizen==5,-0.24576,ifelse(sizen==6,-0.10212,-0.23057))))))
#PI等于各项之和
dev$PI <- dev$points <- rowSums(dev[,c("agepoint","marrypoint","sizepoint","gradepoint","racepoint")])
summary(dev$PI)
依据PI,进行ROC绘制,可作为模型评估的一种方法,分别绘制1,3,5年
在此以3年为例,进行示范
#用"KM"法拟合PI指标的3年ROC曲线
library(survivalROC) # 载入程序包
nobs<- NROW(dev) # 定义数据集的行数
cutoff1<-12 # 设定为1年生存时间。
cutoff2<- 36 # 设定为3年生存时间。
cutoff3<- 60 # 设定为5年生存时间。
#以3年为例,进行绘制
SROC3= survivalROC(Stime = dev$time, status = dev$os, marker = dev$PI, predict.time =cutoff2, method= "KM" ) #构建生存函数
cut.op3= SROC3$cut.values[which.max(SROC3$TP-SROC3$FP)] # 计算最佳截断值
cut.op3 # 输出最佳截断值
plot(SROC3$FP,SROC3$TP, type="l", xlim=c(0,1), ylim=c(0,1),
xlab = paste( "FP","\n", "AUC = ",round(SROC3$AUC,3)),
ylab = "TP", col="blue")
abline(0,1)
legend("bottomright",c("3-year OS"),col="blue",lty=c(1,1))
绘制多个时间点timeROC
这里有两种常用方法,
1.ggplot
2.timeROC package (需要安装对应版本Rtools,略麻烦,后续介绍)
推荐第一种,该方法参考自果子学生信,果子老师的博客,进行适当修改
survivalROC_helper <- function(t) {
survivalROC(Stime=dev$time, status=dev$os, marker = dev$PI,
predict.time =t, method="KM")}
#install.packages("tidyverse")
library(tidyverse)
library(survivalROC)
survivalROC_data <- data_frame(t = c(1,3,5)) %>%
mutate(survivalROC = map(t, survivalROC_helper),
## Extract scalar AUC
auc = map_dbl(survivalROC, magrittr::extract2, "AUC"),
## Put cut off dependent values in a data_frame
df_survivalROC = map(survivalROC, function(obj) {
as_data_frame(obj[c("cut.values","TP","FP")])
})) %>%
dplyr::select(-survivalROC) %>%
unnest() %>%
arrange(t, FP, TP)
survivalROC_data1 <- survivalROC_data %>%
mutate(auc =sprintf("%.3f",auc))%>%
unite(year, t,auc,sep = " year AUC: ")
AUC =factor(survivalROC_data1$year)
survivalROC_data1 %>%
ggplot(mapping = aes(x = FP, y = TP)) +
geom_path(aes(color= AUC))+
geom_abline(intercept = 0, slope = 1, linetype = "dashed")+
theme_bw() +
theme(legend.position = c(0.8,0.2))
注:该文是基于Cox回归系数计算个体预测值,作为连续变量,进行ROC曲线绘制,该方法应用相对广泛
也可基于列线图预模型,计算个体total points,作为连续变量,进行分析,将会在之后的操作中进行演示。如何计算列线图模型各个变量对应的具体point,以及据此计算total points,欢迎关注。
演示文件,这里不再重复设置。
好不容易看完啦,点个在看再走吧