根据正则表达式获取列

问题描述 投票:0回答:1

我有两个数据框 df, df2 。 df 有“regular_ex”列,其中我有 df2$col 中所有变量的正则表达式。

我想借助 df 中的 Regular_ex 列来连接 df2 中的所有变量

df <- data.frame(var = c("D52", "D31", "D32", "D33", "D34_1"),
                 regular_ex = c("^D52_","^D31_\\d_1$", "^D32_", "^D33_", "^D34_\\d_1$"),
                 dt = c("STA", "MP", "ST", "LI", "DA"))

df2 <- data.frame(col = c("D52_1", "D3_1_1", "D32_1", "D33_1", "D34_1"),
                  city = c("Munich","Ceylon", "Dhaka", "London", "NY"),
                  rev = c(1653,1432,7642,9864,6522))

df$col <- NA
df$city <- NA
df$rev <- NA

for (i in 1:nrow(df)) {
  regular_exp <- df$regular_ex[i]
  matching_variables <- grep(regular_exp, df2$col, value = TRUE)
  if (length(matching_variables) > 0) {
    matched_index <- match(matching_variables[1], df2$col)
    df$col[i] <- df$col[matched_index]
    df$city[i] <- df$city[matched_index]
  }
}

r
1个回答
0
投票

如果可以使用

fuzzyjoin
dplyr
,你可以选择
regex_inner_join()
;使用
slice_head()
您可以只保留一场比赛。

library(fuzzyjoin)
library(dplyr, warn.conflicts = FALSE)

df <- data.frame(var = c("D52", "D31", "D32", "D33", "D34_1"),
                 regular_ex = c("^D52_","^D31_\\d_1$", "^D32_", "^D33_", "^D34_\\d_1$"),
                 dt = c("STA", "MP", "ST", "LI", "DA"))

df2 <- data.frame(col = c("D52_1", "D3_1_1", "D32_1", "D33_1", "D34_1"),
                  city = c("Munich","Ceylon", "Dhaka", "London", "NY"),
                  rev = c(1653,1432,7642,9864,6522))

# join string column by regex column in another table:
#   df with regex should be "another", y 
regex_inner_join(df2, df, by = c(col = "regular_ex")) |>
  # change column order  
  select(names(df), names(df2)) |>
  # if there were multiple matches, keep the first
  slice_head(n = 1, by = names(df))
#>   var regular_ex  dt   col   city  rev
#> 1 D52      ^D52_ STA D52_1 Munich 1653
#> 2 D32      ^D32_  ST D32_1  Dhaka 7642
#> 3 D33      ^D33_  LI D33_1 London 9864

创建于 2024-07-06,使用 reprex v2.1.0

© www.soinside.com 2019 - 2024. All rights reserved.