我拥有遥测数据,其中包含多个站点的每个个体 (ID) 的大量检测。我想计算我的研究中每个ID行驶的总距离。
每个站点都与其坐标(经度和纬度)相关联,我将其转换为 UTM 数据。
我创建了一个与我的数据相似的数据,具有相同的数据类型(data_detections)。
#generate a random data
generate_random_datetime <- function(start_date, end_date, n) {
seq(start_date, end_date, by = "min")[sample(1:(as.integer(difftime(end_date, start_date, units = "mins")) + 1), n)]
}
set.seed(123)
n <- 100
data_detections <- data.frame(
Date.and.Time..UTC. = generate_random_datetime(ymd_hms("2024-01-01 00:00:00"), ymd_hms("2024-01-20 23:59:59"), n),
Receiver = sample(1:10, n, replace = TRUE),
Latitude = runif(n, 52.0, 53.0),
Longitude = runif(n, 3.0, 4.0) ,
ID = as.character(sample(1:10, n, replace = TRUE))
)
我这样计算了我的总距离,但是当我将其与我的“真实”值进行比较时,这些值不正确
#convert to UTM
coord <- SpatialPoints(data_detections[, c("Longitude", "Latitude")],
proj4string = CRS("+proj=longlat +datum=WGS84"))
coord.t <- spTransform(coord, CRS("+proj=utm +datum=WGS84 +zone=43"))
data_detections[, c("Longitude_UTM", "Latitude_UTM")] <- coordinates(coord.t)
head(data_detections)
#caclulate the total distance
total_distance <- function(data) {
coords <- as.matrix(data[, c("Longitude_UTM", "Latitude_UTM")])
distances <- sqrt(rowSums((coords[-1, ] - coords[-nrow(coords), ])^2))
distance <- sum(distances, na.rm = TRUE)
return(data.frame(distance = distance))
}
total_distances <- data_detections %>%
group_by(ID) %>%
group_modify(~ total_distance(.x))
print(total_distances)
不是一个
sp
解决方案,但作为比较结果的方法可能很有用。此示例使用 sf
,它取代了现已弃用的 sp
包。
它涉及根据 ID 和日期创建线串,然后使用
st_length()
返回以米为单位的距离。我已经包含了一个图来显示从最早到最晚的日期正确生成的线串。
library(lubridate)
library(sf)
library(dplyr)
library(ggplot2)
# Generate a random dataset
generate_random_datetime <- function(start_date, end_date, n) {
seq(start_date, end_date, by = "min")[sample(1:(as.integer(difftime(end_date, start_date, units = "mins")) + 1), n)]
}
set.seed(123)
n <- 100
data_detections <- data.frame(
Date.and.Time..UTC. = generate_random_datetime(ymd_hms("2024-01-01 00:00:00"), ymd_hms("2024-01-20 23:59:59"), n),
Receiver = sample(1:10, n, replace = TRUE),
Latitude = runif(n, 52.0, 53.0),
Longitude = runif(n, 3.0, 4.0) ,
ID = as.character(sample(1:10, n, replace = TRUE))
)
# Create WGS84/EPSG:4326 points sf, project to UTM 43/EPSG:32643,
# convert to linestring, calculate distance in metres
total_distances <- data_detections %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326) %>%
st_transform(32643) %>%
arrange(ID, Date.and.Time..UTC.) %>%
summarise(geometry = st_combine(geometry), .by = ID) %>%
st_cast("LINESTRING") %>%
mutate(distance_m = st_length(.))
total_distances
# Simple feature collection with 10 features and 2 fields
# Geometry type: LINESTRING
# Dimension: XY
# Bounding box: xmin: -3781279 ymin: 8417353 xmax: -3632521 ymax: 8527546
# Projected CRS: WGS 84 / UTM zone 43N
# ID geometry distance_m
# 1 1 LINESTRING (-3705312 843568... 442811.7 [m]
# 2 10 LINESTRING (-3649012 847137... 680690.1 [m]
# 3 2 LINESTRING (-3720191 845915... 426086.0 [m]
# 4 3 LINESTRING (-3715490 842553... 359007.6 [m]
# 5 4 LINESTRING (-3750874 844622... 575986.2 [m]
# 6 5 LINESTRING (-3747306 847523... 370883.0 [m]
# 7 6 LINESTRING (-3738944 848461... 408334.1 [m]
# 8 7 LINESTRING (-3758508 849621... 681211.1 [m]
# 9 8 LINESTRING (-3654471 845113... 877935.9 [m]
# 10 9 LINESTRING (-3697808 844829... 499295.0 [m]
现在确认点已按正确的顺序连接。请注意,绘制所有 10 条线会使绘图过于混乱,因此此处仅显示 ID 1:
point_sf <- data_detections %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326) %>%
st_transform(32643) %>%
arrange(as.integer(ID), Date.and.Time..UTC.)
# Plot ID 1 linestring with dates
ggplot() +
geom_sf(data = filter(total_distances, ID == "1"),
colour = "#6dc9fc") +
geom_sf_text(data = filter(point_sf, ID == "1"),
aes(label = Date.and.Time..UTC.),
size = 2.5,
fun.geometry = st_centroid,
colour = "black") +
coord_sf(clip = "off")