我使用了比应有的更多的嵌套循环,但现在我希望创建更有效的代码来与同事共享,特别是现在我们正在使用更大的数据集。
我做的一个常见操作是查看每一行,我
我正在努力弄清楚如何以 dplyr 或矢量化方式完成任务 1) 和 2)。
我附上了我正在使用的嵌套循环的简化版本。在这个特定的示例中,我 1) 逐行查看过去 100 秒内该特定海狸的测量结果,2) 查看其中有多少行的温度高于中值,以及 3) 分配基于此的类别。
#Creating an example dataframe with different groups (beavers) and times.
beaver_data <- bind_rows(
beaver1 %>% mutate(beaver = "1"),
beaver2 %>% mutate(beaver = "2")
) %>%
filter(!(beaver == "1" & day == 347)) %>% #Removing the 2nd day from each beaver for simplicity.
filter(!(beaver == "2" & day == 308))
beaver_data$class <- NA # This is what I would like to assign.
for(i in 1:nrow(beaver_data)){
beaver_data_loop <- beaver_data %>%
filter(beaver == beaver_data$beaver[i]) %>% #Only selecting data from that beaver
filter(between(time, beaver_data$time[i] - 100, beaver_data$time[i])) #Only counting rows within 100 seconds of that one.
ratio_pass_offset_loop <- sum(beaver_data_loop$temp > 37)/nrow(beaver_data_loop) #Finding what ratio of samples in that time period are above the median.
if(ratio_pass_offset_loop < 0.5){ #If less than half are above the median, then it's low.
beaver_data$class[i] <- "Low"
} else if(ratio_pass_offset_loop < 1){ #If more than half but less than all are are above the median, medium.
beaver_data$class[i] <- "Medium"
} else { #Otherwise, assign it as high.
beaver_data$class[i] <- "High"
}
}
我在 dplyr 中也成功运行了任务 1),但我无法让它进入下一步并计算有多少也高于中位数。
beaver_data %>%
group_by(beaver) %>%
mutate(
CountWithin100Seconds = rowSums(
sapply(time, function(x) {
time - x >= 0 & time - x <= 100
})
))
这是一种实现:
beaver_data %>%
mutate(
ratio = sapply(time, function(time1) {
ind <- between(time, time1 - 100, time1 - 1e-9)
sum(temp[ind] > 37) / sum(ind)
}),
ratio = coalesce(ratio, 0),
class = cut(ratio, c(-Inf, 0.5, 1, Inf),
labels = c("Low", "Medium", "High")),
.by = beaver)
# day time temp activ beaver ratio class
# 1 346 840 36.33 0 1 0.00000 Low
# 2 346 850 36.34 0 1 0.00000 Low
# 3 346 900 36.35 0 1 0.00000 Low
# 4 346 910 36.42 0 1 0.00000 Low
# 5 346 920 36.55 0 1 0.00000 Low
# 6 346 930 36.69 0 1 0.00000 Low
# 7 346 940 36.71 0 1 0.00000 Low
# 8 346 950 36.75 0 1 0.00000 Low
# 9 346 1000 36.81 0 1 0.00000 Low
# 10 346 1010 36.88 0 1 0.00000 Low
# 11 346 1020 36.89 0 1 0.00000 Low
# 12 346 1030 36.91 0 1 0.00000 Low
# 13 346 1040 36.85 0 1 0.00000 Low
# 14 346 1050 36.89 0 1 0.00000 Low
# 15 346 1100 36.89 0 1 0.00000 Low
# 16 346 1110 36.67 0 1 0.00000 Low
# 17 346 1120 36.50 0 1 0.00000 Low
# 18 346 1130 36.74 0 1 0.00000 Low
# 19 346 1140 36.77 0 1 0.00000 Low
# 20 346 1150 36.76 0 1 0.00000 Low
# 21 346 1200 36.78 0 1 0.00000 Low
# 22 346 1210 36.82 0 1 0.00000 Low
# 23 346 1220 36.89 0 1 0.00000 Low
# 24 346 1230 36.99 0 1 0.00000 Low
# 25 346 1240 36.92 0 1 0.00000 Low
# 26 346 1250 36.99 0 1 0.00000 Low
# 27 346 1300 36.89 0 1 0.00000 Low
# 28 346 1310 36.94 0 1 0.00000 Low
# 29 346 1320 36.92 0 1 0.00000 Low
# 30 346 1330 36.97 0 1 0.00000 Low
# 31 346 1340 36.91 0 1 0.00000 Low
# 32 346 1350 36.79 0 1 0.00000 Low
# 33 346 1400 36.77 0 1 0.00000 Low
# 34 346 1410 36.69 0 1 0.00000 Low
# 35 346 1420 36.62 0 1 0.00000 Low
# 36 346 1430 36.54 0 1 0.00000 Low
# 37 346 1440 36.55 0 1 0.00000 Low
# 38 346 1450 36.67 0 1 0.00000 Low
# 39 346 1500 36.69 0 1 0.00000 Low
# 40 346 1510 36.62 0 1 0.00000 Low
# 41 346 1520 36.64 0 1 0.00000 Low
# 42 346 1530 36.59 0 1 0.00000 Low
# 43 346 1540 36.65 0 1 0.00000 Low
# 44 346 1550 36.75 0 1 0.00000 Low
# 45 346 1600 36.80 0 1 0.00000 Low
# 46 346 1610 36.81 0 1 0.00000 Low
# 47 346 1620 36.87 0 1 0.00000 Low
# 48 346 1630 36.87 0 1 0.00000 Low
# 49 346 1640 36.89 0 1 0.00000 Low
# 50 346 1650 36.94 0 1 0.00000 Low
# 51 346 1700 36.98 0 1 0.00000 Low
# 52 346 1710 36.95 0 1 0.00000 Low
# 53 346 1720 37.00 0 1 0.00000 Low
# 54 346 1730 37.07 1 1 0.00000 Low
# 55 346 1740 37.05 0 1 0.16667 Low
# 56 346 1750 37.00 0 1 0.33333 Low
# 57 346 1800 36.95 0 1 0.33333 Low
# 58 346 1810 37.00 0 1 0.33333 Low
# 59 346 1820 36.94 0 1 0.33333 Low
# 60 346 1830 36.88 0 1 0.33333 Low
# 61 346 1840 36.93 0 1 0.16667 Low
# 62 346 1850 36.98 0 1 0.00000 Low
# 63 346 1900 36.97 0 1 0.00000 Low
# 64 346 1910 36.85 0 1 0.00000 Low
# 65 346 1920 36.92 0 1 0.00000 Low
# 66 346 1930 36.99 0 1 0.00000 Low
# 67 346 1940 37.01 0 1 0.00000 Low
# 68 346 1950 37.10 1 1 0.16667 Low
# 69 346 2000 37.09 0 1 0.33333 Low
# 70 346 2010 37.02 0 1 0.50000 Low
# 71 346 2020 36.96 0 1 0.66667 Medium
# 72 346 2030 36.84 0 1 0.66667 Medium
# 73 346 2040 36.87 0 1 0.66667 Medium
# 74 346 2050 36.85 0 1 0.50000 Low
# 75 346 2100 36.85 0 1 0.33333 Low
# 76 346 2110 36.87 0 1 0.16667 Low
# 77 346 2120 36.89 0 1 0.00000 Low
# 78 346 2130 36.86 0 1 0.00000 Low
# 79 346 2140 36.91 0 1 0.00000 Low
# 80 346 2150 37.53 1 1 0.00000 Low
# 81 346 2200 37.23 0 1 0.16667 Low
# 82 346 2210 37.20 0 1 0.33333 Low
# 83 346 2230 37.25 1 1 0.60000 Medium
# 84 346 2240 37.20 0 1 0.80000 Medium
# 85 346 2250 37.21 0 1 1.00000 Medium
# 86 346 2300 37.24 1 1 1.00000 Medium
# 87 346 2310 37.10 0 1 1.00000 Medium
# 88 346 2320 37.20 0 1 1.00000 Medium
# 89 346 2330 37.18 0 1 1.00000 Medium
# 90 346 2340 36.93 0 1 1.00000 Medium
# 91 346 2350 36.83 0 1 0.83333 Medium
# 92 307 930 36.58 0 2 0.00000 Low
# 93 307 940 36.73 0 2 0.00000 Low
# 94 307 950 36.93 0 2 0.00000 Low
# 95 307 1000 37.15 0 2 0.00000 Low
# 96 307 1010 37.23 0 2 0.25000 Low
# 97 307 1020 37.24 0 2 0.40000 Low
# 98 307 1030 37.24 0 2 0.50000 Low
# 99 307 1040 36.90 0 2 0.66667 Medium
# 100 307 1050 36.95 0 2 0.66667 Medium
# 101 307 1100 36.89 0 2 0.66667 Medium
# 102 307 1110 36.95 0 2 0.50000 Low
# 103 307 1120 37.00 0 2 0.33333 Low
# 104 307 1130 36.90 0 2 0.16667 Low
# 105 307 1140 36.99 0 2 0.00000 Low
# 106 307 1150 36.99 0 2 0.00000 Low
# 107 307 1200 37.01 0 2 0.00000 Low
# 108 307 1210 37.04 0 2 0.16667 Low
# 109 307 1220 37.04 0 2 0.33333 Low
# 110 307 1230 37.14 0 2 0.50000 Low
# 111 307 1240 37.07 0 2 0.66667 Medium
# 112 307 1250 36.98 0 2 0.83333 Medium
# 113 307 1300 37.01 0 2 0.83333 Medium
# 114 307 1310 36.97 0 2 0.83333 Medium
# 115 307 1320 36.97 0 2 0.66667 Medium
# 116 307 1330 37.12 0 2 0.50000 Low
# 117 307 1340 37.13 0 2 0.50000 Low
# 118 307 1350 37.14 0 2 0.50000 Low
# 119 307 1400 37.15 0 2 0.66667 Medium
# 120 307 1410 37.17 0 2 0.66667 Medium
# 121 307 1420 37.12 0 2 0.83333 Medium
# 122 307 1430 37.12 0 2 1.00000 Medium
# 123 307 1440 37.17 0 2 1.00000 Medium
# 124 307 1450 37.28 0 2 1.00000 Medium
# 125 307 1500 37.28 0 2 1.00000 Medium
# 126 307 1510 37.44 0 2 1.00000 Medium
# 127 307 1520 37.51 0 2 1.00000 Medium
# 128 307 1530 37.64 0 2 1.00000 Medium
# 129 307 1540 37.51 0 2 1.00000 Medium
# 130 307 1550 37.98 1 2 1.00000 Medium
# 131 307 1600 38.02 1 2 1.00000 Medium
# 132 307 1610 38.00 1 2 1.00000 Medium
# 133 307 1620 38.24 1 2 1.00000 Medium
# 134 307 1630 38.10 1 2 1.00000 Medium
# 135 307 1640 38.24 1 2 1.00000 Medium
# 136 307 1650 38.11 1 2 1.00000 Medium
# 137 307 1700 38.02 1 2 1.00000 Medium
# 138 307 1710 38.11 1 2 1.00000 Medium
# 139 307 1720 38.01 1 2 1.00000 Medium
# 140 307 1730 37.91 1 2 1.00000 Medium
# 141 307 1740 37.96 1 2 1.00000 Medium
# 142 307 1750 38.03 1 2 1.00000 Medium
# 143 307 1800 38.17 1 2 1.00000 Medium
# 144 307 1810 38.19 1 2 1.00000 Medium
# 145 307 1820 38.18 1 2 1.00000 Medium
# 146 307 1830 38.15 1 2 1.00000 Medium
# 147 307 1840 38.04 1 2 1.00000 Medium
# 148 307 1850 37.96 1 2 1.00000 Medium
# 149 307 1900 37.84 1 2 1.00000 Medium
# 150 307 1910 37.83 1 2 1.00000 Medium
# 151 307 1920 37.84 1 2 1.00000 Medium
# 152 307 1930 37.74 1 2 1.00000 Medium
# 153 307 1940 37.76 1 2 1.00000 Medium
# 154 307 1950 37.76 1 2 1.00000 Medium
# 155 307 2000 37.64 1 2 1.00000 Medium
# 156 307 2010 37.63 1 2 1.00000 Medium
# 157 307 2020 38.06 1 2 1.00000 Medium
# 158 307 2030 38.19 1 2 1.00000 Medium
# 159 307 2040 38.35 1 2 1.00000 Medium
# 160 307 2050 38.25 1 2 1.00000 Medium
# 161 307 2100 37.86 1 2 1.00000 Medium
# 162 307 2110 37.95 1 2 1.00000 Medium
# 163 307 2120 37.95 1 2 1.00000 Medium
# 164 307 2130 37.76 1 2 1.00000 Medium
# 165 307 2140 37.60 1 2 1.00000 Medium
# 166 307 2150 37.89 1 2 1.00000 Medium
# 167 307 2200 37.86 1 2 1.00000 Medium
# 168 307 2210 37.71 1 2 1.00000 Medium
# 169 307 2220 37.78 1 2 1.00000 Medium
# 170 307 2230 37.82 1 2 1.00000 Medium
# 171 307 2240 37.76 1 2 1.00000 Medium
# 172 307 2250 37.81 1 2 1.00000 Medium
# 173 307 2300 37.84 1 2 1.00000 Medium
# 174 307 2310 38.01 1 2 1.00000 Medium
# 175 307 2320 38.10 1 2 1.00000 Medium
# 176 307 2330 38.15 1 2 1.00000 Medium
# 177 307 2340 37.92 1 2 1.00000 Medium
# 178 307 2350 37.64 1 2 1.00000 Medium
备注:
class
这里是一个factor
,在重要的地方将尊重其顺序(使用sort
、arrange
等),包括dplyr
和ggplot2
。如果您需要严格的字符串,那么as.character(..)
。sapply
anon-func 首先查找哪些行(组内)在指定的时间范围内,然后有条件地对这些行的匹配 temp
进行求和。意识到 time
是该组的 time
的整个向量,而 time1
只是一行的值。1e-9
中减去 time1
作为组的上限值,因为我不想在求和和计数时匹配 这一行。如果您想在计算中包含这一行,只需删除此减法即可。