这可能是一个
R
dplyr
summarise
的问题。
我有一个 data.frame
,其中以 5 分钟的时间间隔记录受试者的值,它具有以下三列:id
:受试者 ID、value
:该时间点的记录值,以及 cum_time
:累积时间每个 id
的值:
library(dplyr)
set.seed(1)
df <- data.frame(id = c(rep("id1", 100), rep("id2", 100), rep("id3", 100)),
value = runif(300, 10, 20)) %>%
dplyr::group_by(id) %>%
dplyr::mutate(cum_time = 5 * (dplyr::row_number()-1))
我想在 60 分钟的时间间隔内用
data.frame
的 median
来计算 value
,以给出结果 data.frame
:
rbind(data.frame(id = "id1", median_value = c(median(dplyr::filter(df, id == "id1" & cum_time >= 0 & cum_time <= 60)$value),
median(dplyr::filter(df, id == "id1" & cum_time >= 65 & cum_time <= 120)$value),
median(dplyr::filter(df, id == "id1" & cum_time >= 125 & cum_time <= 180)$value),
median(dplyr::filter(df, id == "id1" & cum_time >= 185 & cum_time <= 240)$value),
median(dplyr::filter(df, id == "id1" & cum_time >= 245 & cum_time <= 300)$value)),
cum_time = c(60, 120, 180, 240, 300)),
data.frame(id = "id2", median_value = c(median(dplyr::filter(df, id == "id2" & cum_time >= 0 & cum_time <= 60)$value),
median(dplyr::filter(df, id == "id2" & cum_time >= 65 & cum_time <= 120)$value),
median(dplyr::filter(df, id == "id2" & cum_time >= 125 & cum_time <= 180)$value),
median(dplyr::filter(df, id == "id2" & cum_time >= 185 & cum_time <= 240)$value),
median(dplyr::filter(df, id == "id2" & cum_time >= 245 & cum_time <= 300)$value)),
cum_time = c(60, 120, 180, 240, 300)),
data.frame(id = "id3", median_value = c(median(dplyr::filter(df, id == "id3" & cum_time >= 0 & cum_time <= 60)$value),
median(dplyr::filter(df, id == "id3" & cum_time >= 65 & cum_time <= 120)$value),
median(dplyr::filter(df, id == "id3" & cum_time >= 125 & cum_time <= 180)$value),
median(dplyr::filter(df, id == "id3" & cum_time >= 185 & cum_time <= 240)$value),
median(dplyr::filter(df, id == "id3" & cum_time >= 245 & cum_time <= 300)$value)),
cum_time = c(60, 120, 180, 240, 300)))
id median_value cum_time
1 id1 15.72853 60
2 id1 15.74687 120
3 id1 14.87811 180
4 id1 16.00048 240
5 id1 14.57858 300
6 id2 15.98761 60
7 id2 14.65317 120
8 id2 15.36035 180
9 id2 15.16835 240
10 id2 13.90954 300
11 id3 12.68951 60
12 id3 15.79852 120
13 id3 14.03968 180
14 id3 14.29187 240
15 id3 15.11250 300
也许这种方法适合你?
df %>%
filter(cum_time<=300) %>%
group_by(id, grp=cut(cum_time, seq(0, max(cum_time),60), include.lowest = T)) %>%
summarize(median_value = median(value), .groups = "drop")
输出:
id grp median_value
1 id1 [0,60] 15.72853
2 id1 (60,120] 15.74687
3 id1 (120,180] 14.87811
4 id1 (180,240] 16.00048
5 id1 (240,300] 14.57858
6 id2 [0,60] 15.98761
7 id2 (60,120] 14.65317
8 id2 (120,180] 15.36035
9 id2 (180,240] 15.16835
10 id2 (240,300] 13.90954
11 id3 [0,60] 12.68951
12 id3 (60,120] 15.79852
13 id3 (120,180] 14.03968
14 id3 (180,240] 14.29187
15 id3 (240,300] 15.11250