基于原始列和第二个dataFrame

问题描述 投票:0回答:1
df1 <- data.frame(Date = as.Date(c("2024-01-01", "2024-02-01", "2024-03-01", "2024-04-01", "2024-05-01", "2024-06-01", "2024-07-01")), RT = c("150.5-40", "150.5-40", "150.5-40", "150.5-40", "150.5-140", "150.5-80", "150.5-80"), RTa = c("150.5-40a", "150.5-40a", "150.5-40a", "150.5-40a", "150.5- 140a", "150.5-80a", "150.5-80a"))

df1

DATERT RTA
                     
2024-01-01 150.5-40150.5-40A

2024-02-01 150.5-40150.5-40A

2024-03-01 150.5-40150.5-40A
2024-04-01 150.5-40150.5-40A

2024-05-01 150.5-140150.5-140A

2024-06-01 150.5-80150.5-80A
2024-07-01 150.5-80150.5-80A

df2 <- data.frame(Date = as.Date(c("2024-03-01", "2024-04-01")), RT = c("150.5-40", "150.5-10"), MRT = c("150.5-140", "150.5-110"))


df2

DATERT MRT
                     
2024-03-01 150.5-40150.5-140

2024-04-01 150.5-10150.5-110

既定的结果:

DATERT RT RTA RT_F 2024-01-01 150.5-40 150.5-40A150.5-40
2024-02-01 150.5-40 150.5-40A150.5-40

2024-03-01 150.5-40 150.5-40A150.5-40A

2024-04-01 150.5-40 150.5-40A150.5-40A

2024-05-01 150.5-140 150.5-140A150.5-140A

2024-06-01 150.5-80 150.5-80A150.5-80
2024-07-01 150.5-80 150.5-80A150.5-80
我尝试使用dplyr ::突变如下:

df1 %>% dplyr::mutate(RT_F = if_else(RT == df2$RT | RT == df2$MRT & Date >= df2$Date, RTa, RT))

产生以下不准确的结果:

DATERT RT RTA RT_F 2024-01-01 150.5-40 150.5-40A150.5-40A

2024-02-01 150.5-40 150.5-40A150.5-40
2024-03-01 150.5-40 150.5-40A150.5-40A
2024-04-01 150.5-40 150.5-40A150.5-40

2024-05-01 150.5-140 150.5-140A150.5-140A

2024-06-01 150.5-80 150.5-80A150.5-80

2024-07-01 150.5-80 150.5-80A150.5-80
我也尝试了:

for (i in 1:nrow(df1)) { # For every row in df1 # # Select if df1 RT = df2 RT if (df1[i, "RT"] %in% df2$"RT") { # If RT is in df2 df[i, "RTF"] <- df2$RTa # Add RTa to the RTF column } }

产生以下错误:

在df [i,“ rtf”]

中 也没有成功尝试以下内容

df1 %>% dplyr::mutate(RTF = case_when( RT == df2$RT | RT == df2$MRT & Date >= df2$Date ~ RT, TRUE ~ RTa ))
    

基于您的logic在

dplyr<- df2$RTa : object of type 'closure' is not subsettable

代码中,实现所需输出的一种方法是使用

dplyr::left_join()

结合DF1和DF2,然后应用日期条件。
实现此目标的一种方法是将您的DF2对象转换为加入之前使用
tidyr::pivot_longer()
edit mutate
1个回答
0
投票
首先,负载所需的软件包和数据(示例数据中存在错误,修改了):

library(dplyr)
library(tidyr)

df1 <- data.frame(
  Date = as.Date(c("2024-01-01", "2024-02-01", "2024-03-01", 
                   "2024-04-01", "2024-05-01", "2024-06-01", "2024-07-01")),
  RT = c("150.5-40", "150.5-40", "150.5-40",
         "150.5-40", "150.5-140", "150.5-80", "150.5-80"),
  RTa = c("150.5-40a", "150.5-40a", "150.5-40a",
          "150.5-40a", "150.5-140a", "150.5-80a", "150.5-80a")
  )

df2 <- data.frame(
  Date = as.Date(c("2024-03-01", "2024-04-01")),
  RT = c("150.5-40", "150.5-10"),
  MRT = c("150.5-140", "150.5-110")
  )
然后枢轴df2到长形式:

df3 <- df2 |> pivot_longer(-Date, names_to = "Type", values_to = "Code") df3 # # A tibble: 4 × 3 # Date Type Code # <date> <chr> <chr> # 1 2024-03-01 RT 150.5-40 # 2 2024-03-01 MRT 150.5-140 # 3 2024-04-01 RT 150.5-10 # 4 2024-04-01 MRT 150.5-110

最后,加入DF1和DF3并创建一个新列,其中应用日期条件:

df4 <- df1 |> left_join(df3, by = join_by(RT == Code)) |> mutate(RTF = if_else(Date.x >= Date.y, RTa, RT)) |> select(Date = Date.x, RT, RTa, RTF) df4 # Date RT RTa RTF # 1 2024-01-01 150.5-40 150.5-40a 150.5-40 # 2 2024-02-01 150.5-40 150.5-40a 150.5-40 # 3 2024-03-01 150.5-40 150.5-40a 150.5-40a # 4 2024-04-01 150.5-40 150.5-40a 150.5-40a # 5 2024-05-01 150.5-140 150.5-140a 150.5-140a # 6 2024-06-01 150.5-80 150.5-80a <NA> # 7 2024-07-01 150.5-80 150.5-80a <NA>

注意,RTF列中的NA值与您的示例所需输出有所不同,因为您的示例DF2。
	

最新问题
© www.soinside.com 2019 - 2025. All rights reserved.