分组数据的分组条件[R]

问题描述 投票:0回答:1

我有这个数据集:

Profile <- structure(list(Dist = c(0, 1.4, 3.09, 3.64, 4.24, 4.85, 5.38, 
5.85, 6.39, 7.07, 7.69, 8.44, 9.19, 9.94, 10.65, 11.16, 11.77, 
12.07, 12.87, 13.42, 14.23, 15.44, 0, 1.4, 3.09, 3.64, 4.24, 
4.85, 5.38, 5.85, 6.39, 7.07, 7.69, 8.44, 9.19, 9.94, 10.65, 
11.16, 11.77, 12.07, 12.87, 13.42, 14.23, 15.44, 0, 1.4, 3.09, 
3.64, 4.24, 4.85, 5.38, 5.85, 6.39, 7.07, 7.69, 8.44, 9.19, 9.94, 
10.65, 11.16, 11.77, 12.07, 12.87, 13.42, 14.23, 15.44, 1.95409836065574, 
13.42, 13.42, 11.1106451612903, 2.89052459016393, 14.0610429447853, 
12.87, 2.17573770491803), Alt = c(170.82, 170.69, 167.64, 167.62, 
167.61, 167.57, 167.39, 167.38, 167.35, 167.41, 167.47, 167.45, 
167.39, 167.41, 167.44, 168.06, 168.19, 169.06, 169.29, 169.69, 
169.63, 169.65, 171.82, 171.69, 168.64, 168.62, 168.61, 168.57, 
168.39, 168.38, 168.35, 168.41, 168.47, 168.45, 168.39, 168.41, 
168.44, 169.06, 169.19, 170.06, 170.29, 169, 170.63, 170.65, 
169.82, 169.69, 166.64, 166.62, 166.61, 166.57, 166.39, 166.38, 
166.35, 166.41, 166.47, 166.45, 166.39, 166.41, 166.44, 167.06, 
167.19, 168.06, 168.29, 167, 168.63, 168.65, 169.69, 169.69, 
169, 169, 169, 168.29, 168.29, 168.29), Profil = c(3, 3, 3, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 
3, 4, 4, 4, 5, 5, 5), Remark = c(NA, "HB", NA, NA, NA, NA, 
NA, NA, "TH", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "HB", NA, 
NA, NA, "HB", NA, NA, NA, NA, NA, NA, "TH", NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, "HB", NA, NA, NA, "HB", NA, NA, NA, NA, NA, 
NA, "TH", NA, NA, NA, NA, NA, NA, NA, NA, NA, "HB", NA, NA, NA, 
"THB", "THB", "THB", "THB", "THB", "THB", "THB", "THB")), row.names = c(NA, 
-74L), class = c("tbl_df", "tbl", "data.frame"))

我想应用一个条件,对于每个配置文件,它将应用一个布尔条件:如果 Dist 为 TRUE,如果不是,则为 <= to the observation with the minimum value for "Dist" field & with a Remark == "THB", FALSE。 没有分组的话应该是这样的:

Profil %>% group_by(Profil) %>% mutate(P = ifelse(Dist >=   max(Dist %>% filter(Remark == "THB")), TRUE, FALSE))

然而,这并没有奏效。

我尝试提取“备注”字段等于“THB”的观察结果。要应用该代码:

structure(list(Profil = c(3, 3, 4, 4, 4, 5, 5, 5), Remark = c("THB", 
"THB", "THB", "THB", "THB", "THB", "THB", "THB"), Dist = c(1.95409836065574, 
13.42, 13.42, 11.1106451612903, 2.89052459016393, 14.0610429447853, 
12.87, 2.17573770491803), Alt = c(169.69, 169.69, 169, 169, 169, 
168.29, 168.29, 168.29)), row.names = c(NA, -8L), class = c("tbl_df", 
"tbl", "data.frame"))

Profil %>% group_by(Profil) %>% mutate(P = ifelse(Dist <=  max(Int$Dist), TRUE, FALSE))

但这也不起作用

我不知道如何继续。

r conditional-statements boolean
1个回答
0
投票

这是

data.table
方法

library(data.table)
setDT(Profile)
# create a lookup table with the minimum values
lookup <- Profile[Remark == "THB", .(val = min(Dist)), by = .(Profil)]
#    Profil      val
#     <num>    <num>
# 1:      3 1.954098
# 2:      4 2.890525
# 3:      5 2.175738
# perform a join to create the new column with the TRUE/FALSE value
Profile[lookup, cond := ifelse(Dist <= i.val, TRUE, FALSE), on = .(Profil)]
#     Dist    Alt Profil Remark   cond
#    <num>  <num>  <num> <char> <lgcl>
# 1:  0.00 170.82      3   <NA>   TRUE
# 2:  1.40 170.69      3     HB   TRUE
# 3:  3.09 167.64      3   <NA>  FALSE
# 4:  3.64 167.62      3   <NA>  FALSE
# 5:  4.24 167.61      3   <NA>  FALSE
# 6:  4.85 167.57      3   <NA>  FALSE
# ...
© www.soinside.com 2019 - 2024. All rights reserved.