我有这个空数据框
new_df<-structure(list(id = c("R_88j7lG37gLfxk22", "R_88j7lG37gLfxk22",
"R_88j7lG37gLfxk22", "R_88j7lG37gLfxk22", "R_88j7lG37gLfxk22",
"R_88j7lG37gLfxk22", "R_88j7lG37gLfxk22", "R_88j7lG37gLfxk22",
"R_88j7lG37gLfxk22", "R_88j7lG37gLfxk22", "R_88j7lG37gLfxk22",
"R_88j7lG37gLfxk22", "R_6DK8lERVf8lSQf4", "R_6DK8lERVf8lSQf4",
"R_6DK8lERVf8lSQf4", "R_6DK8lERVf8lSQf4", "R_6DK8lERVf8lSQf4",
"R_6DK8lERVf8lSQf4", "R_6DK8lERVf8lSQf4", "R_6DK8lERVf8lSQf4",
"R_6DK8lERVf8lSQf4", "R_6DK8lERVf8lSQf4", "R_6DK8lERVf8lSQf4",
"R_6DK8lERVf8lSQf4"), choice = c(0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1), low_env = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), mid_env = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA), high_env = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), low_eth = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), mid_eth = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), high_eth = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
`low_pri($25)` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
`mid_pri($75)` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
`high_pri($125)` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
)), row.names = c(NA, 24L), class = "data.frame")
并且使用上面的数据和代码,我根据
long
数据集的第一行填充了其中的一半。如何使用 for()
循环将此方法应用于 long
数据集的其余行(将填充另一半)?
long<-structure(list(id = c("R_88j7lG37gLfxk22", "R_6DK8lERVf8lSQf4"
), t1_choice = c("2", "3"), t2_choice = c("1", "3"), t3_choice = c("1",
"2"), t4_choice = c("2", "1"), t1_p1_env = c("high_env", "mid_env"
), t1_p1_eth = c("low_eth", "mid_eth"), t1_p1_pri = c("$125",
"$25"), t1_p2_env = c("mid_env", "high_env"), t1_p2_eth = c("high_eth",
"low_eth"), t1_p2_pri = c("$25", "$75"), t1_p3_env = c("low_env",
"low_env"), t1_p3_eth = c("mid_eth", "low_eth"), t1_p3_pri = c("$75",
"$75"), t2_p1_env = c("high_env", "mid_env"), t2_p1_eth = c("low_eth",
"high_eth"), t2_p1_pri = c("$75", "$125"), t2_p2_env = c("mid_env",
"low_env"), t2_p2_eth = c("mid_eth", "low_eth"), t2_p2_pri = c("$125",
"$75"), t2_p3_env = c("mid_env", "high_env"), t2_p3_eth = c("mid_eth",
"high_eth"), t2_p3_pri = c("$75", "$75"), t3_p1_env = c("high_env",
"mid_env"), t3_p1_eth = c("high_eth", "mid_eth"), t3_p1_pri = c("$125",
"$125"), t3_p2_env = c("mid_env", "high_env"), t3_p2_eth = c("low_eth",
"low_eth"), t3_p2_pri = c("$25", "$25"), t3_p3_env = c("low_env",
"low_env"), t3_p3_eth = c("high_eth", "high_eth"), t3_p3_pri = c("$25",
"$75"), t4_p1_env = c("low_env", "high_env"), t4_p1_eth = c("low_eth",
"low_eth"), t4_p1_pri = c("$75", "$125"), t4_p2_env = c("high_env",
"mid_env"), t4_p2_eth = c("mid_eth", "mid_eth"), t4_p2_pri = c("$125",
"$25"), t4_p3_env = c("low_env", "low_env"), t4_p3_eth = c("high_eth",
"mid_eth"), t4_p3_pri = c("$25", "$125")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame"))
#working
# Loop through the first three rows of new_df
for (i in 1:3) {
# Extracting the required values from long1 for each row
env <- long[1,][paste0("t1_p", i, "_env")][1]
eth <- long[1,][paste0("t1_p", i, "_eth")][1]
pri <- long[1,][paste0("t1_p", i, "_pri")][1]
# Matching values from long[1,] to new_df columns in the corresponding row
new_df[i, "low_env"] <- as.numeric(env == "low_env")
new_df[i, "mid_env"] <- as.numeric(env == "mid_env")
new_df[i, "high_env"] <- as.numeric(env == "high_env")
new_df[i, "low_eth"] <- as.numeric(eth == "low_eth")
new_df[i, "mid_eth"] <- as.numeric(eth == "mid_eth")
new_df[i, "high_eth"] <- as.numeric(eth == "high_eth")
new_df[i, "low_pri($25)"] <- as.numeric(pri == "$25")
new_df[i, "mid_pri($75)"] <- as.numeric(pri == "$75")
new_df[i, "high_pri($125)"] <- as.numeric(pri == "$125")
}
# Loop through the second three rows of new_df
for (i in 1:3) {
# Extracting the required values from long[1,] for each row
env <- long[1,][paste0("t2_p", i, "_env")][1]
eth <- long[1,][paste0("t2_p", i, "_eth")][1]
pri <- long[1,][paste0("t2_p", i, "_pri")][1]
# Matching values from long[1,] to new_df columns in the corresponding row
new_df[i + 3, "low_env"] <- as.numeric(env == "low_env")
new_df[i + 3, "mid_env"] <- as.numeric(env == "mid_env")
new_df[i + 3, "high_env"] <- as.numeric(env == "high_env")
new_df[i + 3, "low_eth"] <- as.numeric(eth == "low_eth")
new_df[i + 3, "mid_eth"] <- as.numeric(eth == "mid_eth")
new_df[i + 3, "high_eth"] <- as.numeric(eth == "high_eth")
new_df[i + 3, "low_pri($25)"] <- as.numeric(pri == "$25")
new_df[i + 3, "mid_pri($75)"] <- as.numeric(pri == "$75")
new_df[i + 3, "high_pri($125)"] <- as.numeric(pri == "$125")
# Adjusting the choice column
new_df[i + 3, "choice"] <- as.numeric(long[1,][paste0("t2_choice")][1] == i)
}
# Loop through the second three rows of new_df
for (i in 1:3) {
# Extracting the required values from long[1,] for each row
env <- long[1,][paste0("t3_p", i, "_env")][1]
eth <- long[1,][paste0("t3_p", i, "_eth")][1]
pri <- long[1,][paste0("t3_p", i, "_pri")][1]
# Matching values from long[1,] to new_df columns in the corresponding row
new_df[i + 6, "low_env"] <- as.numeric(env == "low_env")
new_df[i + 6, "mid_env"] <- as.numeric(env == "mid_env")
new_df[i + 6, "high_env"] <- as.numeric(env == "high_env")
new_df[i + 6, "low_eth"] <- as.numeric(eth == "low_eth")
new_df[i + 6, "mid_eth"] <- as.numeric(eth == "mid_eth")
new_df[i + 6, "high_eth"] <- as.numeric(eth == "high_eth")
new_df[i + 6, "low_pri($25)"] <- as.numeric(pri == "$25")
new_df[i + 6, "mid_pri($75)"] <- as.numeric(pri == "$75")
new_df[i + 6, "high_pri($125)"] <- as.numeric(pri == "$125")
# Adjusting the choice column
new_df[i + 6, "choice"] <- as.numeric(long[1,][paste0("t3_choice")][1] == i)
}
for (i in 1:3) {
# Extracting the required values from long[1,] for each row
env <- long[1,][paste0("t4_p", i, "_env")][1]
eth <- long[1,][paste0("t4_p", i, "_eth")][1]
pri <- long[1,][paste0("t4_p", i, "_pri")][1]
# Matching values from long[1,] to new_df columns in the corresponding row
new_df[i + 9, "low_env"] <- as.numeric(env == "low_env")
new_df[i + 9, "mid_env"] <- as.numeric(env == "mid_env")
new_df[i + 9, "high_env"] <- as.numeric(env == "high_env")
new_df[i + 9, "low_eth"] <- as.numeric(eth == "low_eth")
new_df[i + 9, "mid_eth"] <- as.numeric(eth == "mid_eth")
new_df[i + 9, "high_eth"] <- as.numeric(eth == "high_eth")
new_df[i + 9, "low_pri($25)"] <- as.numeric(pri == "$25")
new_df[i + 9, "mid_pri($75)"] <- as.numeric(pri == "$75")
new_df[i + 9, "high_pri($125)"] <- as.numeric(pri == "$125")
# Adjusting the choice column
new_df[i + 9, "choice"] <- as.numeric(long[1,][paste0("t4_choice")][1] == i)
}
你的方法看起来很乏味。相反,您可以将更有利的
names
然后 reshape
数据两次指定为所需的长格式,并使用 model.matrix
获得 one-hot 编码。不确定如何获得 choice 变量,因为您已经对其进行了硬编码,但您会知道它。
> names(long)[-1] <-
+ strsplit(names(long)[-1], '_') |>
+ sapply(\(x) paste(paste(c(rev(x[-1])), collapse='_'), x[1], sep='.'))
> res <- reshape(as.data.frame(long), varying=-1, direction='l', new.row.names=1:1e9) |>
+ reshape(direction='l', varying=-(1:3), new.row.names=1:1e9, sep='_')
> cbind(res[1:3], model.matrix(~ 0 + env + eth + pri, res))
id time choice envhigh_env envlow_env envmid_env ethlow_eth ethmid_eth pri$25 pri$75
1 R_88j7lG37gLfxk22 p1 2 1 0 0 1 0 0 0
2 R_6DK8lERVf8lSQf4 p1 3 0 0 1 0 1 1 0
3 R_88j7lG37gLfxk22 p1 1 1 0 0 1 0 0 1
4 R_6DK8lERVf8lSQf4 p1 3 0 0 1 0 0 0 0
5 R_88j7lG37gLfxk22 p1 1 1 0 0 0 0 0 0
6 R_6DK8lERVf8lSQf4 p1 2 0 0 1 0 1 0 0
7 R_88j7lG37gLfxk22 p1 2 0 1 0 1 0 0 1
8 R_6DK8lERVf8lSQf4 p1 1 1 0 0 1 0 0 0
9 R_88j7lG37gLfxk22 p2 2 0 0 1 0 0 1 0
10 R_6DK8lERVf8lSQf4 p2 3 1 0 0 1 0 0 1
11 R_88j7lG37gLfxk22 p2 1 0 0 1 0 1 0 0
12 R_6DK8lERVf8lSQf4 p2 3 0 1 0 1 0 0 1
13 R_88j7lG37gLfxk22 p2 1 0 0 1 1 0 1 0
14 R_6DK8lERVf8lSQf4 p2 2 1 0 0 1 0 1 0
15 R_88j7lG37gLfxk22 p2 2 1 0 0 0 1 0 0
16 R_6DK8lERVf8lSQf4 p2 1 0 0 1 0 1 1 0
17 R_88j7lG37gLfxk22 p3 2 0 1 0 0 1 0 1
18 R_6DK8lERVf8lSQf4 p3 3 0 1 0 1 0 0 1
19 R_88j7lG37gLfxk22 p3 1 0 0 1 0 1 0 1
20 R_6DK8lERVf8lSQf4 p3 3 1 0 0 0 0 0 1
21 R_88j7lG37gLfxk22 p3 1 0 1 0 0 0 1 0
22 R_6DK8lERVf8lSQf4 p3 2 0 1 0 0 0 0 1
23 R_88j7lG37gLfxk22 p3 2 0 1 0 0 0 1 0
24 R_6DK8lERVf8lSQf4 p3 1 0 1 0 0 1 0 0