我有一个环境变量的栅格堆栈和一堆站点点。我想知道给定点位置的每个栅格的像元值。这通常可以使用
terra
快速处理,但是我正在使用的特定数据集有许多点稍微落在一些栅格的边界之外,因此返回 NA 值。
我在这里看到了类似的问题,并根据解决方案松散地建模了我的脚本。代码执行正确,但是我在将此解决方案扩展到我的大型光栅堆栈和点数据集时遇到问题。它太慢了,主要有两个原因:
get_nearest_non_na
函数使用terra::distance
来计算每个单元到每个点的距离,迭代时可能需要很长时间才能生成。
for
循环对每个栅格迭代此过程
创建可重复的数据集
# Load required libraries
library(terra)
library(sf)
# Create raster stack
f <- system.file("ex/elev.tif", package="terra")
r <- rast(f)
r <- rep(r, 3) * 1:3
names(r) <- paste0("var", 1:3)
# Generate spatial points dataset
extent <- as.polygons(ext(r)) |> st_as_sf()
set.seed(99)
pnts <- st_sample(extent, size = 20, type = "random") |> st_as_sf() |> sf::st_set_crs(crs(r))
处理 NA 值的函数
# This function finds the nearest non-NA raster value for a given point
get_nearest_non_na <- function(point, raster_layer) {
# Compute distance to all raster cells
distance_raster <- terra::distance(raster_layer, point)
# Extract distances and raster values
distances <- terra::values(distance_raster)
raster_values <- terra::values(raster_layer)
# Identify non-NA raster cells
valid_indices <- which(!is.na(raster_values))
# Find the nearest valid cell by minimizing distance
nearest_valid_index <- valid_indices[which.min(distances[valid_indices])]
# Return the value of the nearest valid raster cell
return(raster_values[nearest_valid_index])
}
在
for
循环中使用上述函数
# Iterate over each raster in the stack and apply the function
for (i in 1:nlyr(r)) {
raster_layer <- r[[i]]
# Extract raster values at point locations
extracted_values <- terra::extract(raster_layer, pnts, method = "simple")
# Replace NA values with the nearest non-NA raster values
corrected_values <- sapply(1:nrow(pnts), function(j) {
if (is.na(extracted_values[j, 2])) { # If the extracted value is NA
get_nearest_non_na(pnts[j, ], raster_layer) # Use the nearest valid value
} else {
extracted_values[j, 2] # Keep the extracted value if it is not NA
}
})
# Add the corrected values as a new column to the spatial dataset
pnts[[names(raster_layer)]] <- corrected_values
}
# View the updated dataset with the new raster values
print(pnts)
有点复杂的解决方案,但在具有 8.55e+07 个单元的样本数据集上相对较快:
buffer()
mask()
SpatRasternearest()
点的坐标left_join()
结果到 pnts 并生成新的几何体extract()
library(terra)
library(sf)
library(dplyr)
# Create high-res version of your raster stack
f <- system.file("ex/elev.tif", package = "terra")
r <- rast(f)
r <- disagg(r, fact = 100)
r <- rep(r, 3) * 1:3
names(r) <- paste0("var", 1:3)
# Generate spatial points dataset and add ID column
extent <- as.polygons(ext(r)) %>%
st_as_sf()
set.seed(99)
pnts <- st_sample(extent, size = 20, type = "random") %>%
st_as_sf() %>%
st_set_crs(crs(r)) %>%
mutate(ID = 1:n())
# Create spatvector of points just inside extent of non-NA r
sv_poly <- as.polygons(r)
sv_poly <- aggregate(sv_poly)
buff <- buffer(sv_poly, width = -1)
sv_line <- as.lines(buff)
sv_mask <- mask(r, sv_line)
sv_points <- as.points(sv_mask)
# Identify which pnts are NA and subset
values <- extract(r, pnts)
pnts1 <- bind_cols(pnts, select(values, starts_with("var"))) %>%
filter(is.na(var1))
# Get point from sv_points nearest to each pnts1
pnts2 <- data.frame(nearest(vect(pnts1), sv_points)) %>%
bind_cols(pnts1, .) %>%
select(ID, to_x, to_y) %>%
st_drop_geometry()
# Join result to pnts and generate new sf based on updated coordinates
pnts2 <- left_join(pnts, pnts2, by = "ID") %>%
mutate(
to_x = if_else(is.na(to_x), st_coordinates(x)[, 1], to_x),
to_y = if_else(is.na(to_y), st_coordinates(x)[, 2], to_y)
) %>%
data.frame() %>%
st_as_sf(coords = c("to_x", "to_y"), crs = crs(r))
values <- extract(r, pnts2)
values
# ID var1 var2 var3
# 1 1 352 704 1056
# 2 2 415 830 1245
# 3 3 346 692 1038
# 4 4 178 356 534
# 5 5 346 692 1038
# 6 6 188 376 564
# 7 7 248 496 744
# 8 8 494 988 1482
# 9 9 315 630 945
# 10 10 338 676 1014
# 11 11 334 668 1002
# 12 12 290 580 870
# 13 13 379 758 1137
# 14 14 257 514 771
# 15 15 214 428 642
# 16 16 328 656 984
# 17 17 334 668 1002
# 18 18 483 966 1449
# 19 19 447 894 1341
# 20 20 485 970 1455