df <- data.frame(
id = c(1, 2, 3, 4, 5),
age = c(30, 45, 50, 60, 35),
sis_num = c(0,1, 2, 3, 4),
sis1_bca_status = c(0, 1, 0, 1, 0),
sis2_bca_status = c(0, 0, 1, 0, 1),
sis1_bca_age = c(0, 40, 50, 60, 70),
sis2_bca_age = c(0, 45, 55, 65, 75),
menopause_status = c(0, 1, 2, 0, 1),
meno_age = c(55, 60, 50, 55, 52),
hrt_status = c(0, 1, 2, 1, 0)
)
我需要解析该DF的每一行并产生新的DF。它将从此DF复制每个ID的所有值,然后基于值SIS_NUM(姐妹数量),我需要在每个ID的输出中添加七个值。如果sis_num = 0,则姐妹的0值,如果sis_num = 1,7个额外的值是sis_num = 2,则14个额外的值等等。因此,在输出中,每一行都会有不同数量的列(我需要以TXT格式以此输出来插入另一个程序以计算其他内容,这是它将接受的唯一输入)。为此,我到目前为止所做的是:
# Function to modify each row based on sis_num
create_row <- function(row) {
# Extract values from the row using the correct indexing
id <- row[1]
age <- row[2]
sis_num <- row[3]
sis1_bca_status <- row[4]
sis2_bca_status <- row[5]
sis1_bca_age <- row[6]
sis2_bca_age <- row[7]
menopause_status <- row[8]
meno_age <- row[9]
hrt_status <- row[10]
# Initialize the new row with id, age, sis_num
new_row <- c(id, age, sis_num)
# Add dynamic values for the first two sisters
if (sis_num == 0) {
# If sis_num is 0, return the row as it is
new_row<-c(new_row) }
else if (sis_num == 1) {
new_row <- c(new_row, sis1_bca_status, 0, 0, sis1_bca_age, -99, -99, 0) }
else if (sis_num == 2) {
new_row <- c(new_row, sis1_bca_status, 0, 0, sis1_bca_age, -99, -99, 0,
sis2_bca_status, 0, 0, sis2_bca_age, -99, -99, 0) }
else if (sis_num > 2) {
new_row <- c(new_row, sis1_bca_status, 0, 0, sis1_bca_age, -99, -99, 0,
sis2_bca_status, 0, 0, sis2_bca_age, -99, -99, 0)
# Add static values for remaining sisters (sis3, sis4, etc.)
for (i in 3:sis_num) {
new_row <- c(new_row, 0, 0, 0, -99, -99,-99, 0)
} }
# Add the remaining columns (menopause_status, meno_age, hrt_status)
new_row <- c(new_row, menopause_status, meno_age, hrt_status)
return(new_row) }
# Apply the function to each row of the dataframe
output_df <- t(apply(df, 1, create_row))
output_df2 <- do.call(rbind, output_df)
# Convert the output to a dataframe
output_df2 <- as.data.frame(output_df2)
print(output_df2)
我为output_df2获得的输出是:
output_df2
我希望在没有V1,V2,V3的TXT文件中此输出。每个ID都有一个具有其值的新行,并且根据SIS_NUM,每行(或ID)的列数是动态的。 为了修复列表,我尝试了以前共享的以前的解决方案:<- as.data.frame(output_df2) print(output_df2)
V1 V1 1, 30, 0, 1, 55, 0 V2 2, 45, 1, 1, 0, 0, 40, -99, -99, 0, 1, 60, 1 V3 3, 50, 2, 0, 0, 0, 50, -99, -99, 0, 1, 0, 0, 55, -99, -99, 0, 2, 50, 2 V4 4, 60, 3, 1, 0, 0, 60, -99, -99, 0, 0, 0, 0, 65, -99, -99, 0, 0, 0, 0, -99, -99, -99, 0, 0, 55, 1 V5 5, 35, 4, 0, 0, 0, 70, -99, -99, 0, 1, 0, 0, 75, -99, -99, 0, 0, 0, 0, -99, -99, -99, 0, 0, 0, 0, -99, -99, -99, 0, 1, 52, 0 > write.table(output_df2, file = "output_df.txt", sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE) Error in write.table(output_df2, file = "output_df.txt", sep = "\t", row.names = FALSE, : unimplemented type 'list' in 'EncodeElement'
> output_df2 <- apply(output_df,2,as.character)
write.csv(output_df2,file = "output_df.csv")
输出在CSV中看起来像这样,但这不是我想要的:
输入图像描述在这里我该如何解决?
在现实生活中,我在输入文件中有80个变量。无论如何,我可以更快地将它们复制到新的数据帧中,而不是索引每个值
我并不真正了解您的功能。这是我了解的内容:
f = \(r) {
i = r[c('id', 'age', 'sis_num')]
j = r[c('menopause_status', 'meno_age', 'hrt_status')]
if (r['sis_num'] == 0) {
c(i, j)
} else if (r['sis_num'] == 1) {
c(i, r['sis1_bca_status'], 0, 0, r['sis1_bca_age'], -99, -99, 0, j)
} else {
c(i, r['sis1_bca_status'], 0, 0, r['sis1_bca_age'], -99, -99, 0,
r['sis2_bca_status'], 0, 0, r['sis2_bca_age'], -99, 0, j)
}
}
我们应该重新编写此内容,因为如果您同意OUPUT。
l = apply(df, 1, \(x) toString(f(setNames(as.vector(x), colnames(df)))))
是列表
> l
[1] "1, 30, 0, 0, 55, 0"
[2] "2, 45, 1, 1, 0, 0, 40, -99, -99, 0, 1, 60, 1"
[3] "3, 50, 2, 0, 0, 0, 50, -99, -99, 0, 1, 0, 0, 55, -99, 0, 2, 50, 2"
[4] "4, 60, 3, 1, 0, 0, 60, -99, -99, 0, 0, 0, 0, 65, -99, 0, 0, 55, 1"
[5] "5, 35, 4, 0, 0, 0, 70, -99, -99, 0, 1, 0, 0, 75, -99, 0, 1, 52, 0"
我们可以使用
cat()
存储为csv文件,称为工作目录:
l