我遇到了 R 中 survminer 包中的 ggsurvplot 函数的问题。当尝试使用带有 newdata 参数的 ggsurvplot 绘制生存曲线时,该函数错误地将观察数解释为级别数,从而导致错误。如果没有 newdata 参数,该函数将无法正确识别因子变量的级别,从而导致另一个错误。
观察到的行为: 使用 newdata 参数:
set.seed(123)
d.dat.tot <- data.frame(
DONOR_SURVIVAL_TIME = rexp(100, 0.1),
DEAD_OR_ALIVE = sample(0:1, 100, replace = TRUE),
GENOTYPE.111758446 = sample(c("A/A", "G/A", "G/G"), 100, replace = TRUE)
)
# Data preparation
d.dat.tot.clean <- d.dat.tot[d.dat.tot$DEAD_OR_ALIVE < 2, ]
d.dat.tot.clean$GENOTYPE.111758446 <- as.factor(d.dat.tot.clean$GENOTYPE.111758446)
cox_model_single_111758446 <- coxph(
Surv(DONOR_SURVIVAL_TIME, DEAD_OR_ALIVE) ~ GENOTYPE.111758446,
data = d.dat.tot.clean
)
cox_plot_single_111758446 <- ggsurvplot(
survfit(cox_model_single_111758446, newdata = d.dat.tot.clean),
data = d.dat.tot.clean,
pval = TRUE,
conf.int = TRUE,
risk.table = TRUE,
legend.title = "Genotype",
legend.labs = levels(d.dat.tot.clean$GENOTYPE.111758446),
xlab = "Time to Last Follow-up, mo",
ylab = "Cumulative Survival, %",
ggtheme = theme_minimal(),
palette = "set2"
)
这会导致错误:
Error in ggsurvplot_df(d, fun = fun, color = color, palette = palette, :
The length of legend.labs should be 236.
(236是我的案例数。)
没有 newdata 参数: 该函数无法正确识别因子变量的级别并返回:
Error in ggsurvplot_df(d, fun = fun, color = color, palette = palette, :
The length of legend.labs should be 1.
# Sample reproducible data
set.seed(123)
d.dat.tot <- data.frame(
DONOR_SURVIVAL_TIME = rexp(100, 0.1),
DEAD_OR_ALIVE = sample(0:1, 100, replace = TRUE),
GENOTYPE.111758446 = sample(c("A/A", "G/A", "G/G"), 100, replace = TRUE)
)
# Data preparation
d.dat.tot.clean <- d.dat.tot[d.dat.tot$DEAD_OR_ALIVE < 2, ]
d.dat.tot.clean$GENOTYPE.111758446 <- as.factor(d.dat.tot.clean$GENOTYPE.111758446)
# Cox model fitting
cox_model_single_111758446 <- coxph(Surv(DONOR_SURVIVAL_TIME, DEAD_OR_ALIVE) ~ GENOTYPE.111758446, data = d.dat.tot.clean)
# Survival curves creation
surv_fit <- survfit(cox_model_single_111758446)
# Plotting survival curves
cox_plot_single_111758446 <- ggsurvplot(
surv_fit,
data = d.dat.tot.clean,
pval = TRUE,
conf.int = TRUE,
risk.table = TRUE,
legend.title = "Genotype",
legend.labs = levels(d.dat.tot.clean$GENOTYPE.111758446),
xlab = "Time to Last Follow-up, mo",
ylab = "Cumulative Survival, %",
ggtheme = theme_minimal(),
palette = "set2"
)
print(cox_plot_single_111758446)
我没有完整的答案。我认为您需要为每组提供 1 行
newdata
,这适用于绘图,但不适用于表格。
sfit <- survfit(
cox_model_single_111758446,
newdata = data.frame(GENOTYPE.111758446 = levels(d.dat.tot.clean$GENOTYPE.111758446))
)
cox_plot_single_111758446 <- ggsurvplot(
sfit,
data = d.dat.tot.clean,
pval = TRUE,
conf.int = TRUE,
risk.table = TRUE,
legend.title = "Genotype",
legend.labs = levels(d.dat.tot.clean$GENOTYPE.111758446),
xlab = "Time to Last Follow-up, mo",
ylab = "Cumulative Survival, %",
ggtheme = theme_minimal(),
palette = "set2"
)