我有两个数据框 df, df2 。 df 有“regular_ex”列,其中我有 df2$col 中所有变量的正则表达式。
我想借助 df 中的 Regular_ex 列来连接 df2 中的所有变量
df <- data.frame(var = c("D52", "D31", "D32", "D33", "D34_1"),
regular_ex = c("^D52_","^D31_\\d_1$", "^D32_", "^D33_", "^D34_\\d_1$"),
dt = c("STA", "MP", "ST", "LI", "DA"))
df2 <- data.frame(col = c("D52_1", "D3_1_1", "D32_1", "D33_1", "D34_1"),
city = c("Munich","Ceylon", "Dhaka", "London", "NY"),
rev = c(1653,1432,7642,9864,6522))
df$col <- NA
df$city <- NA
df$rev <- NA
for (i in 1:nrow(df)) {
regular_exp <- df$regular_ex[i]
matching_variables <- grep(regular_exp, df2$col, value = TRUE)
if (length(matching_variables) > 0) {
matched_index <- match(matching_variables[1], df2$col)
df$col[i] <- df$col[matched_index]
df$city[i] <- df$city[matched_index]
}
}
如果可以使用
fuzzyjoin
和dplyr
,你可以选择regex_inner_join()
;使用 slice_head()
您可以只保留一场比赛。
library(fuzzyjoin)
library(dplyr, warn.conflicts = FALSE)
df <- data.frame(var = c("D52", "D31", "D32", "D33", "D34_1"),
regular_ex = c("^D52_","^D31_\\d_1$", "^D32_", "^D33_", "^D34_\\d_1$"),
dt = c("STA", "MP", "ST", "LI", "DA"))
df2 <- data.frame(col = c("D52_1", "D3_1_1", "D32_1", "D33_1", "D34_1"),
city = c("Munich","Ceylon", "Dhaka", "London", "NY"),
rev = c(1653,1432,7642,9864,6522))
# join string column by regex column in another table:
# df with regex should be "another", y
regex_inner_join(df2, df, by = c(col = "regular_ex")) |>
# change column order
select(names(df), names(df2)) |>
# if there were multiple matches, keep the first
slice_head(n = 1, by = names(df))
#> var regular_ex dt col city rev
#> 1 D52 ^D52_ STA D52_1 Munich 1653
#> 2 D32 ^D32_ ST D32_1 Dhaka 7642
#> 3 D33 ^D33_ LI D33_1 London 9864
创建于 2024-07-06,使用 reprex v2.1.0