使用 for() 循环将数据处理应用于数据集的所有行

Question

我有这个空数据框

new_df<-structure(list(id = c("R_88j7lG37gLfxk22", "R_88j7lG37gLfxk22", 
"R_88j7lG37gLfxk22", "R_88j7lG37gLfxk22", "R_88j7lG37gLfxk22", 
"R_88j7lG37gLfxk22", "R_88j7lG37gLfxk22", "R_88j7lG37gLfxk22", 
"R_88j7lG37gLfxk22", "R_88j7lG37gLfxk22", "R_88j7lG37gLfxk22", 
"R_88j7lG37gLfxk22", "R_6DK8lERVf8lSQf4", "R_6DK8lERVf8lSQf4", 
"R_6DK8lERVf8lSQf4", "R_6DK8lERVf8lSQf4", "R_6DK8lERVf8lSQf4", 
"R_6DK8lERVf8lSQf4", "R_6DK8lERVf8lSQf4", "R_6DK8lERVf8lSQf4", 
"R_6DK8lERVf8lSQf4", "R_6DK8lERVf8lSQf4", "R_6DK8lERVf8lSQf4", 
"R_6DK8lERVf8lSQf4"), choice = c(0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 
1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1), low_env = c(NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA), mid_env = c(NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA), high_env = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), low_eth = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA), mid_eth = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA), high_eth = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    `low_pri($25)` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    `mid_pri($75)` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    `high_pri($125)` = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
    )), row.names = c(NA, 24L), class = "data.frame")

并且使用上面的数据和代码，我根据

long

数据集的第一行填充了其中的一半。如何使用

for()

循环将此方法应用于

long

数据集的其余行（将填充另一半）？

long<-structure(list(id = c("R_88j7lG37gLfxk22", "R_6DK8lERVf8lSQf4"
), t1_choice = c("2", "3"), t2_choice = c("1", "3"), t3_choice = c("1", 
"2"), t4_choice = c("2", "1"), t1_p1_env = c("high_env", "mid_env"
), t1_p1_eth = c("low_eth", "mid_eth"), t1_p1_pri = c("$125", 
"$25"), t1_p2_env = c("mid_env", "high_env"), t1_p2_eth = c("high_eth", 
"low_eth"), t1_p2_pri = c("$25", "$75"), t1_p3_env = c("low_env", 
"low_env"), t1_p3_eth = c("mid_eth", "low_eth"), t1_p3_pri = c("$75", 
"$75"), t2_p1_env = c("high_env", "mid_env"), t2_p1_eth = c("low_eth", 
"high_eth"), t2_p1_pri = c("$75", "$125"), t2_p2_env = c("mid_env", 
"low_env"), t2_p2_eth = c("mid_eth", "low_eth"), t2_p2_pri = c("$125", 
"$75"), t2_p3_env = c("mid_env", "high_env"), t2_p3_eth = c("mid_eth", 
"high_eth"), t2_p3_pri = c("$75", "$75"), t3_p1_env = c("high_env", 
"mid_env"), t3_p1_eth = c("high_eth", "mid_eth"), t3_p1_pri = c("$125", 
"$125"), t3_p2_env = c("mid_env", "high_env"), t3_p2_eth = c("low_eth", 
"low_eth"), t3_p2_pri = c("$25", "$25"), t3_p3_env = c("low_env", 
"low_env"), t3_p3_eth = c("high_eth", "high_eth"), t3_p3_pri = c("$25", 
"$75"), t4_p1_env = c("low_env", "high_env"), t4_p1_eth = c("low_eth", 
"low_eth"), t4_p1_pri = c("$75", "$125"), t4_p2_env = c("high_env", 
"mid_env"), t4_p2_eth = c("mid_eth", "mid_eth"), t4_p2_pri = c("$125", 
"$25"), t4_p3_env = c("low_env", "low_env"), t4_p3_eth = c("high_eth", 
"mid_eth"), t4_p3_pri = c("$25", "$125")), row.names = c(NA, 
-2L), class = c("tbl_df", "tbl", "data.frame"))

#working
# Loop through the first three rows of new_df
for (i in 1:3) {
  # Extracting the required values from long1 for each row
  env <- long[1,][paste0("t1_p", i, "_env")][1]
  eth <- long[1,][paste0("t1_p", i, "_eth")][1]
  pri <- long[1,][paste0("t1_p", i, "_pri")][1]
  
  # Matching values from long[1,] to new_df columns in the corresponding row
  new_df[i, "low_env"] <- as.numeric(env == "low_env")
  new_df[i, "mid_env"] <- as.numeric(env == "mid_env")
  new_df[i, "high_env"] <- as.numeric(env == "high_env")
  new_df[i, "low_eth"] <- as.numeric(eth == "low_eth")
  new_df[i, "mid_eth"] <- as.numeric(eth == "mid_eth")
  new_df[i, "high_eth"] <- as.numeric(eth == "high_eth")
  new_df[i, "low_pri($25)"] <- as.numeric(pri == "$25")
  new_df[i, "mid_pri($75)"] <- as.numeric(pri == "$75")
  new_df[i, "high_pri($125)"] <- as.numeric(pri == "$125")
}

# Loop through the second three rows of new_df
for (i in 1:3) {
  # Extracting the required values from long[1,] for each row
  env <- long[1,][paste0("t2_p", i, "_env")][1]
  eth <- long[1,][paste0("t2_p", i, "_eth")][1]
  pri <- long[1,][paste0("t2_p", i, "_pri")][1]
  
  # Matching values from long[1,] to new_df columns in the corresponding row
  new_df[i + 3, "low_env"] <- as.numeric(env == "low_env")
  new_df[i + 3, "mid_env"] <- as.numeric(env == "mid_env")
  new_df[i + 3, "high_env"] <- as.numeric(env == "high_env")
  new_df[i + 3, "low_eth"] <- as.numeric(eth == "low_eth")
  new_df[i + 3, "mid_eth"] <- as.numeric(eth == "mid_eth")
  new_df[i + 3, "high_eth"] <- as.numeric(eth == "high_eth")
  new_df[i + 3, "low_pri($25)"] <- as.numeric(pri == "$25")
  new_df[i + 3, "mid_pri($75)"] <- as.numeric(pri == "$75")
  new_df[i + 3, "high_pri($125)"] <- as.numeric(pri == "$125")
  # Adjusting the choice column
  new_df[i + 3, "choice"] <- as.numeric(long[1,][paste0("t2_choice")][1] == i)
}
  # Loop through the second three rows of new_df
  for (i in 1:3) {
    # Extracting the required values from long[1,] for each row
    env <- long[1,][paste0("t3_p", i, "_env")][1]
    eth <- long[1,][paste0("t3_p", i, "_eth")][1]
    pri <- long[1,][paste0("t3_p", i, "_pri")][1]
    
    # Matching values from long[1,] to new_df columns in the corresponding row
    new_df[i + 6, "low_env"] <- as.numeric(env == "low_env")
    new_df[i + 6, "mid_env"] <- as.numeric(env == "mid_env")
    new_df[i + 6, "high_env"] <- as.numeric(env == "high_env")
    new_df[i + 6, "low_eth"] <- as.numeric(eth == "low_eth")
    new_df[i + 6, "mid_eth"] <- as.numeric(eth == "mid_eth")
    new_df[i + 6, "high_eth"] <- as.numeric(eth == "high_eth")
    new_df[i + 6, "low_pri($25)"] <- as.numeric(pri == "$25")
    new_df[i + 6, "mid_pri($75)"] <- as.numeric(pri == "$75")
    new_df[i + 6, "high_pri($125)"] <- as.numeric(pri == "$125")
    # Adjusting the choice column
    new_df[i + 6, "choice"] <- as.numeric(long[1,][paste0("t3_choice")][1] == i)
  }
for (i in 1:3) {
  # Extracting the required values from long[1,] for each row
  env <- long[1,][paste0("t4_p", i, "_env")][1]
  eth <- long[1,][paste0("t4_p", i, "_eth")][1]
  pri <- long[1,][paste0("t4_p", i, "_pri")][1]
  
  # Matching values from long[1,] to new_df columns in the corresponding row
  new_df[i + 9, "low_env"] <- as.numeric(env == "low_env")
  new_df[i + 9, "mid_env"] <- as.numeric(env == "mid_env")
  new_df[i + 9, "high_env"] <- as.numeric(env == "high_env")
  new_df[i + 9, "low_eth"] <- as.numeric(eth == "low_eth")
  new_df[i + 9, "mid_eth"] <- as.numeric(eth == "mid_eth")
  new_df[i + 9, "high_eth"] <- as.numeric(eth == "high_eth")
  new_df[i + 9, "low_pri($25)"] <- as.numeric(pri == "$25")
  new_df[i + 9, "mid_pri($75)"] <- as.numeric(pri == "$75")
  new_df[i + 9, "high_pri($125)"] <- as.numeric(pri == "$125")
  

  # Adjusting the choice column
  new_df[i + 9, "choice"] <- as.numeric(long[1,][paste0("t4_choice")][1] == i)
}

Answer 1

你的方法看起来很乏味。相反，您可以将更有利的

names

然后

reshape

数据两次指定为所需的长格式，并使用

model.matrix

获得 one-hot 编码。不确定如何获得 choice 变量，因为您已经对其进行了硬编码，但您会知道它。

> names(long)[-1] <- 
+   strsplit(names(long)[-1], '_') |>
+   sapply(\(x) paste(paste(c(rev(x[-1])), collapse='_'), x[1], sep='.'))
> res <- reshape(as.data.frame(long), varying=-1, direction='l', new.row.names=1:1e9) |>
+   reshape(direction='l', varying=-(1:3), new.row.names=1:1e9, sep='_')
> cbind(res[1:3], model.matrix(~ 0 + env + eth + pri, res))
                  id time choice envhigh_env envlow_env envmid_env ethlow_eth ethmid_eth pri$25 pri$75
1  R_88j7lG37gLfxk22   p1      2           1          0          0          1          0      0      0
2  R_6DK8lERVf8lSQf4   p1      3           0          0          1          0          1      1      0
3  R_88j7lG37gLfxk22   p1      1           1          0          0          1          0      0      1
4  R_6DK8lERVf8lSQf4   p1      3           0          0          1          0          0      0      0
5  R_88j7lG37gLfxk22   p1      1           1          0          0          0          0      0      0
6  R_6DK8lERVf8lSQf4   p1      2           0          0          1          0          1      0      0
7  R_88j7lG37gLfxk22   p1      2           0          1          0          1          0      0      1
8  R_6DK8lERVf8lSQf4   p1      1           1          0          0          1          0      0      0
9  R_88j7lG37gLfxk22   p2      2           0          0          1          0          0      1      0
10 R_6DK8lERVf8lSQf4   p2      3           1          0          0          1          0      0      1
11 R_88j7lG37gLfxk22   p2      1           0          0          1          0          1      0      0
12 R_6DK8lERVf8lSQf4   p2      3           0          1          0          1          0      0      1
13 R_88j7lG37gLfxk22   p2      1           0          0          1          1          0      1      0
14 R_6DK8lERVf8lSQf4   p2      2           1          0          0          1          0      1      0
15 R_88j7lG37gLfxk22   p2      2           1          0          0          0          1      0      0
16 R_6DK8lERVf8lSQf4   p2      1           0          0          1          0          1      1      0
17 R_88j7lG37gLfxk22   p3      2           0          1          0          0          1      0      1
18 R_6DK8lERVf8lSQf4   p3      3           0          1          0          1          0      0      1
19 R_88j7lG37gLfxk22   p3      1           0          0          1          0          1      0      1
20 R_6DK8lERVf8lSQf4   p3      3           1          0          0          0          0      0      1
21 R_88j7lG37gLfxk22   p3      1           0          1          0          0          0      1      0
22 R_6DK8lERVf8lSQf4   p3      2           0          1          0          0          0      0      1
23 R_88j7lG37gLfxk22   p3      2           0          1          0          0          0      1      0
24 R_6DK8lERVf8lSQf4   p3      1           0          1          0          0          1      0      0

使用 for() 循环将数据处理应用于数据集的所有行

问题描述投票：0回答：1

1个回答

最新问题

使用 for() 循环将数据处理应用于数据集的所有行

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1