我编写了一些虚拟数据,其结构与我们无法共享的内部数据非常相似。 我想知道如何为 Cox 回归做准备。
您能帮我将虚拟数据预处理成适合 Cox 回归的结构吗?生成虚拟数据的代码如下。 请考虑到一些“吸烟者”在死亡前几年才开始吸烟。此信息必须包含在您可以传递给 Cox 回归的数据中。
data Cox_data;
personID=1; *smoker who died at the age of 9;
do age=1 to 10;
if age<=8 then dead='N'; else dead='Y';
if age<=6 then no_cigars=0; else no_cigars=age/2; * # of cigarettes smoked per day;
if dead='Y' then no_cigars=.;
output;
end;
personID=2; *smoker who died at the age of 6;
do age=1 to 10;
if age<=5 then dead='N'; else dead='Y';
if age<=3 then no_cigars=0; else no_cigars=age/2.2;
if dead='Y' then no_cigars=.;
output;
end;
personID=3; *smoker who died at the age of 13;
do age=1 to 15;
if age<=12 then dead='N'; else dead='Y';
if age<=7 then no_cigars=0; else no_cigars=age/3;
if dead='Y' then no_cigars=.;
output;
end;
personID=4; *non-smoker died at the age of 17;
do age=1 to 18;
if age<=16 then dead='N'; else dead='Y';
if age<=20 then no_cigars=0; else no_cigars=age/2;
if dead='Y' then no_cigars=.;
output;
end;
personID=5; *smoker who did not die during the measured time;
do age=1 to 13;
if age<=15 then dead='N'; else dead='Y';
if age<=6 then no_cigars=0; else no_cigars=1;
if dead='Y' then no_cigars=.;
output;
end;
personID=6; *non-smoker who did not die during the measured time;
do age=1 to 11;
if age<=15 then dead='N'; else dead='Y';
if age<=15 then no_cigars=0; else no_cigars=1;
if dead='Y' then no_cigars=.;
output;
end;
run;
展示您的模型和所需的数据会很有帮助。 我只是在这里猜测。
data tte;
set cox_data;
by personID dead notsorted;
if first.dead and dead eq 'Y' then do;
tte=age;
censor=0;
output;
end;
if last.PersonID and dead eq 'N' then do;
tte=age;
censor=1;
output;
end;
drop no_cigars;
run;
proc summary data=cox_data nway;
class personID;
output out=no_cigars(drop=_:) mean(no_Cigars)=;
run;
data tte;
merge tte no_cigars;
by personid;
run;
proc phreg data=tte;
model tte*censor(1) = no_cigars;
run;