尝试通过将字符串替换为最后一个有效标记来修改 data.table 列,当最后一个标记丢失时保留“NA”。
input_DT <- structure(list(Team = c("T1", "T1", "T1", "T1", "T1", "T1", "T1",
"T1", "T1", "T1", "T2", "T2", "T2", "T2", "T2"), Player = c("T1.P1",
"T1.P2", "T1.P3", "T1.P4", "T1.P5", "T1.P1", "T1.P2", "T1.P3",
"T1.P4", "T1.P5", "T2.P1", "T2.P2", "T2.P3", "T2.P4", "T2.P5"
), Year = c(2011L, 2011L, 2011L, 2011L, 2011L, 2012L, 2012L,
2012L, 2012L, 2012L, 2011L, 2011L, 2011L, 2011L, 2011L), Str = c("a bin|a 3 2|a north|an|a c d|a bin",
"a bin|a 3 2|a north|an|a c d|a 3 2", "a bin|a 3 2|a north|an|a c d|a north",
"a bin|a 3 2|a north|an|a c d|an", "a bin|a 3 2|a north|an|a c d|a c d",
"a bin|a north|an|a bin", "a bin|a north|an", "a bin|a north|an|a north",
"a bin|a north|an|an", "a bin|a north|an", "an|bin|4eight|12|an",
"an|bin|4eight|12", "an|bin|4eight|12|bin", "an|bin|4eight|12|4eight",
"an|bin|4eight|12|12")), row.names = c(NA, -15L), class = c("data.table",
"data.frame"))
> print(input_DT)
Team Player Year Str
<char> <char> <int> <char>
1: T1 T1.P1 2011 a bin|a 3 2|a north|an|a c d|a bin
2: T1 T1.P2 2011 a bin|a 3 2|a north|an|a c d|a 3 2
3: T1 T1.P3 2011 a bin|a 3 2|a north|an|a c d|a north
4: T1 T1.P4 2011 a bin|a 3 2|a north|an|a c d|an
5: T1 T1.P5 2011 a bin|a 3 2|a north|an|a c d|a c d
6: T1 T1.P1 2012 a bin|a north|an|a bin
7: T1 T1.P2 2012 a bin|a north|an
8: T1 T1.P3 2012 a bin|a north|an|a north
9: T1 T1.P4 2012 a bin|a north|an|an
10: T1 T1.P5 2012 a bin|a north|an
11: T2 T2.P1 2011 an|bin|4eight|12|an
12: T2 T2.P2 2011 an|bin|4eight|12
13: T2 T2.P3 2011 an|bin|4eight|12|bin
14: T2 T2.P4 2011 an|bin|4eight|12|4eight
15: T2 T2.P5 2011 an|bin|4eight|12|12
output_DT <- structure(list(Team = c("T1", "T1", "T1", "T1", "T1", "T1", "T1",
"T1", "T1", "T1", "T2", "T2", "T2", "T2", "T2"), Player = c("T1.P1",
"T1.P2", "T1.P3", "T1.P4", "T1.P5", "T1.P1", "T1.P2", "T1.P3",
"T1.P4", "T1.P5", "T2.P1", "T2.P2", "T2.P3", "T2.P4", "T2.P5"
), Year = c(2011L, 2011L, 2011L, 2011L, 2011L, 2012L, 2012L,
2012L, 2012L, 2012L, 2011L, 2011L, 2011L, 2011L, 2011L), Var = c("a bin",
"a 3 2", "a north", "an", "a c d", "a bin", "", "a north", "an",
"", "an", "", "bin", "4eight", "12")), row.names = c(NA, -15L
), class = c("data.table", "data.frame"))
> print(output_DT)
Team Player Year Var
<char> <char> <int> <char>
1: T1 T1.P1 2011 a bin
2: T1 T1.P2 2011 a 3 2
3: T1 T1.P3 2011 a north
4: T1 T1.P4 2011 an
5: T1 T1.P5 2011 a c d
6: T1 T1.P1 2012 a bin
7: T1 T1.P2 2012 NA
8: T1 T1.P3 2012 a north
9: T1 T1.P4 2012 an
10: T1 T1.P5 2012 NA
11: T2 T2.P1 2011 an
12: T2 T2.P2 2011 NA
13: T2 T2.P3 2011 bin
14: T2 T2.P4 2011 4eight
15: T2 T2.P5 2011 12
output_DT$Var <-
input_DT[,
data.table::fifelse(
stringr::str_count(input_DT$Str,"\\|")==max(stringr::str_count(input_DT$Str,"\\|")),
stringr::word(input_DT$Str,
sep = stringr::fixed("|"),-1
),NA_character_
)
]
> print(output_DT)
Team Player Year Var
1: T1 T1.P1 2011 a bin
2: T1 T1.P2 2011 a 3 2
3: T1 T1.P3 2011 a north
4: T1 T1.P4 2011 an
5: T1 T1.P5 2011 a c d
6: T1 T1.P1 2012 <NA>
7: T1 T1.P2 2012 <NA>
8: T1 T1.P3 2012 <NA>
9: T1 T1.P4 2012 <NA>
10: T1 T1.P5 2012 <NA>
11: T2 T2.P1 2011 <NA>
12: T2 T2.P2 2011 <NA>
13: T2 T2.P3 2011 <NA>
14: T2 T2.P4 2011 <NA>
15: T2 T2.P5 2011 <NA>
我尝试的方式,最大字符串“长度”(字符串中的最大标记)基于“Str”列中的所有字符串,而不是每个团队和年份的字符串中的标记数量。
提前致谢。
包含多个步骤的解决方案可以接受吗?
library(data.table)
library(tidyverse)
input_DT <- structure(list(Team = c("T1", "T1", "T1", "T1", "T1", "T1", "T1",
"T1", "T1", "T1", "T2", "T2", "T2", "T2", "T2"),
Player = c("T1.P1", "T1.P2", "T1.P3", "T1.P4", "T1.P5", "T1.P1", "T1.P2", "T1.P3",
"T1.P4", "T1.P5", "T2.P1", "T2.P2", "T2.P3", "T2.P4", "T2.P5"
), Year = c(2011L, 2011L, 2011L, 2011L, 2011L, 2012L, 2012L,
2012L, 2012L, 2012L, 2011L, 2011L, 2011L, 2011L, 2011L),
Str = c("a bin|a 3 2|a north|an|a c d|a bin", "a bin|a 3 2|a north|an|a c d|a 3 2", "a bin|a 3 2|a north|an|a c d|a north",
"a bin|a 3 2|a north|an|a c d|an", "a bin|a 3 2|a north|an|a c d|a c d",
"a bin|a north|an|a bin", "a bin|a north|an", "a bin|a north|an|a north",
"a bin|a north|an|an", "a bin|a north|an", "an|bin|4eight|12|an",
"an|bin|4eight|12", "an|bin|4eight|12|bin", "an|bin|4eight|12|4eight",
"an|bin|4eight|12|12")),
row.names = c(NA, -15L), class = c("data.table", "data.frame"))
CountPipes <- input_DT[, .(maxPipes = max(str_count(Str, "\\|"))), c("Team", "Year")]
input_DT2 <- merge(input_DT, CountPipes)
input_DT2[, Pipes := str_count(Str, "\\|")]
input_DT2[, Var := fifelse(Pipes == maxPipes, str_extract(Str, "[^|]+$"), NA)]
input_DT2
#> Key: <Team, Year>
#> Team Year Player Str maxPipes Pipes
#> <char> <int> <char> <char> <int> <int>
#> 1: T1 2011 T1.P1 a bin|a 3 2|a north|an|a c d|a bin 5 5
#> 2: T1 2011 T1.P2 a bin|a 3 2|a north|an|a c d|a 3 2 5 5
#> 3: T1 2011 T1.P3 a bin|a 3 2|a north|an|a c d|a north 5 5
#> 4: T1 2011 T1.P4 a bin|a 3 2|a north|an|a c d|an 5 5
#> 5: T1 2011 T1.P5 a bin|a 3 2|a north|an|a c d|a c d 5 5
#> 6: T1 2012 T1.P1 a bin|a north|an|a bin 3 3
#> 7: T1 2012 T1.P2 a bin|a north|an 3 2
#> 8: T1 2012 T1.P3 a bin|a north|an|a north 3 3
#> 9: T1 2012 T1.P4 a bin|a north|an|an 3 3
#> 10: T1 2012 T1.P5 a bin|a north|an 3 2
#> 11: T2 2011 T2.P1 an|bin|4eight|12|an 4 4
#> 12: T2 2011 T2.P2 an|bin|4eight|12 4 3
#> 13: T2 2011 T2.P3 an|bin|4eight|12|bin 4 4
#> 14: T2 2011 T2.P4 an|bin|4eight|12|4eight 4 4
#> 15: T2 2011 T2.P5 an|bin|4eight|12|12 4 4
#> Var
#> <char>
#> 1: a bin
#> 2: a 3 2
#> 3: a north
#> 4: an
#> 5: a c d
#> 6: a bin
#> 7: <NA>
#> 8: a north
#> 9: an
#> 10: <NA>
#> 11: an
#> 12: <NA>
#> 13: bin
#> 14: 4eight
#> 15: 12
创建于 2024 年 10 月 17 日,使用 reprex v2.1.1