这是我的初始数据
long5<-structure(list(id = c("R_88j7lG37gLfxk22", "R_6DK8lERVf8lSQf4",
"R_eG8g4wMm8JsqNlI", "R_9TCgsW0sLA4xHOm", "R_6J5Obu2AvpCeu9w"
), .choice.t1 = c("2", "3", "3", "2", "1"), .choice.t2 = c("1",
"3", "1", "3", "1"), .choice.t3 = c("1", "2", "2", "2", "3"),
.choice.t4 = c("2", "1", "3", "2", "1")), row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame"))
它的第一行是这样的:
我想将其转换为:
正如您所看到的,对于每个 id,我都会检查 t1 到 t4 选择(从 1 到 3),然后创建一个名为
choice
的新列,其中每种颜色的 second 行中有 1,例如,如果选择是 2 ,如果选择为 1,则每种颜色的 first 行 1 个;如果选择为 3,则每种颜色的 3rd 行 1。我的实际数据集有更多行
我尝试这样做,但它不起作用:
names(long5)[-1] <-
strsplit(names(long5)[-1], '_') |>
sapply(\(x) paste(paste(c(rev(x[-1])), collapse='_'), x[1], sep='.'))
res <- reshape(as.data.frame(long5), varying=-1, direction='l') |>
reshape(direction='l', varying=-(1:3), new.row.names=1:1e9, sep='_') |>
type.convert(as.is=FALSE)
all<-cbind(res[1:3], model.matrix(~ 0 + env + eth + pri, res,
contrasts.arg = list(eth=contrasts(res$eth, contrasts=FALSE),
pri=contrasts(res$pri, contrasts=FALSE))))
我建议保留
.choice.t#
名称,并在每个 c(id, name)
分组中添加序列,这样,如果您的数据被重新排列,它将是明确的。
library(dplyr)
library(tidyr) # pivot_longer
pivot_longer(long5, cols = -id) %>%
reframe(choices = 1:3, choice = +(choices == value), .by = c("id", "name"))
# # A tibble: 60 × 4
# id name choices choice
# <chr> <chr> <int> <int>
# 1 R_88j7lG37gLfxk22 .choice.t1 1 0
# 2 R_88j7lG37gLfxk22 .choice.t1 2 1
# 3 R_88j7lG37gLfxk22 .choice.t1 3 0
# 4 R_88j7lG37gLfxk22 .choice.t2 1 1
# 5 R_88j7lG37gLfxk22 .choice.t2 2 0
# 6 R_88j7lG37gLfxk22 .choice.t2 3 0
# 7 R_88j7lG37gLfxk22 .choice.t3 1 1
# 8 R_88j7lG37gLfxk22 .choice.t3 2 0
# 9 R_88j7lG37gLfxk22 .choice.t3 3 0
# 10 R_88j7lG37gLfxk22 .choice.t4 1 0
# # ℹ 50 more rows
# # ℹ Use `print(n = ...)` to see more rows
out <- reshape2::melt(long5, id.vars = "id")
out <- do.call(rbind,
lapply(1:nrow(out), function(rn) {
transform(out[rn,], choices = 1:3, row.names = NULL) |>
transform(choice = +(choices == value))
}))
head(out)
# id variable value choices choice
# 1 R_88j7lG37gLfxk22 .choice.t1 2 1 0
# 2 R_88j7lG37gLfxk22 .choice.t1 2 2 1
# 3 R_88j7lG37gLfxk22 .choice.t1 2 3 0
# 4 R_6DK8lERVf8lSQf4 .choice.t1 3 1 0
# 5 R_6DK8lERVf8lSQf4 .choice.t1 3 2 0
# 6 R_6DK8lERVf8lSQf4 .choice.t1 3 3 1
library(tidyverse)
# define dummy data
df<-structure(list(id = "R_88j7lG37gLfxk22", t1_choice = "2", t2_choice = "1",
t3_choice = "1", t4_choice = "2"), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame"))
# reshape longer
df_long <- df |>
pivot_longer(
!id,
names_to = "task",
names_pattern = "t(\\d+).*",
values_to = "choice"
)
# expand dummies
expand_dummy <- function(x) {
out <- rep(0, times = 3)
out[as.numeric(x)] <- 1
out
}
df_long |>
group_by(id, task) |>
reframe(choice = expand_dummy(choice))
#> # A tibble: 12 × 3
#> id task choice
#> <chr> <chr> <dbl>
#> 1 R_88j7lG37gLfxk22 1 0
#> 2 R_88j7lG37gLfxk22 1 1
#> 3 R_88j7lG37gLfxk22 1 0
#> 4 R_88j7lG37gLfxk22 2 1
#> 5 R_88j7lG37gLfxk22 2 0
#> 6 R_88j7lG37gLfxk22 2 0
#> 7 R_88j7lG37gLfxk22 3 1
#> 8 R_88j7lG37gLfxk22 3 0
#> 9 R_88j7lG37gLfxk22 3 0
#> 10 R_88j7lG37gLfxk22 4 0
#> 11 R_88j7lG37gLfxk22 4 1
#> 12 R_88j7lG37gLfxk22 4 0
创建于 2024-01-08,使用 reprex v2.0.2