我正在尝试对不同大小的多边形 (12) 中的大约 10000 个点进行采样。多边形具有“ID”和“面积”。我希望根据每个多边形大小(面积)绘制样本,即来自较大多边形的更多样本和来自较小多边形的更少样本,当然是基于它们的大小。我尝试了以下
pol_sample = st_read("./Data/Polygons/Polygons_merged.shp")
pol_sample = st_transform(pol_sample, 4326)
pol_sample = st_sample(pol_sample, 10000)
但这只会随机采样,不会考虑多边形大小。
有人可以帮助如何根据多边形大小对点进行采样吗?
非常感谢您的帮助。
打印出下面的 sf 对象:
Simple feature collection with 12 features and 2 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -121.1789 ymin: -38.13566 xmax: 153.2769 ymax:
42.17854
Geodetic CRS: GCS_unknown
First 10 features:
ID area_sqkm geometry
1 2380240.67 MULTIPOLYGON (((77.46847 11...
2 1609475.32 MULTIPOLYGON (((148.3452 -2...
3 609946.15 MULTIPOLYGON (((118.0327 27...
4 895408.10 MULTIPOLYGON (((36.70649 -1...
5 999426.57 MULTIPOLYGON (((35.4854 -1....
6 4961657.01 MULTIPOLYGON (((-64.37801 -...
7 4930984.79 MULTIPOLYGON (((-93.46362 1...
8 1010392.03 MULTIPOLYGON (((-68.31586 1...
9 79048.80 MULTIPOLYGON (((-80.51816 2...
10 47379.37 MULTIPOLYGON (((-69.62635 1..."
我不太同意这样的说法:当使用
st_sample()
调用时,st_sample(x, 10000)
不会考虑多边形大小。您不会获得与面积相同的比例,但当您将生成的样本点 ID 计数与多边形面积比例与 chisq.test()
进行比较时,您仍然应该获得相当高的 p 值(即远不及 0.05)。
但是采样当然是可以调整的。
st_sample()
与 sample()
有点不同,它接受的不是概率向量,而是大小向量,因此您只需将多边形面积比例乘以总样本大小即可。
您可能也想解决这个问题
GCS_unknown
CRS。
library(sf)
#> Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(dplyr, warn.conflicts = FALSE)
# prepare example data, us nc dataset from sf package as a base
example_sf <- read_sf(system.file("shape/nc.shp", package="sf")) %>%
mutate(c_lon = st_coordinates(st_centroid(geometry))[,1]) %>%
slice_max(c_lon, n = 12) %>%
select(geometry) %>%
mutate(ID = row_number() %>% as.factor(),
area_sqkm = st_area(geometry) %>% units::set_units(km^2) %>% as.numeric(),
.before = 1)
example_sf
#> Simple feature collection with 12 features and 2 fields
#> Geometry type: MULTIPOLYGON
#> Dimension: XY
#> Bounding box: xmin: -77.16426 ymin: 34.58783 xmax: -75.45698 ymax: 36.55716
#> Geodetic CRS: NAD27
#> # A tibble: 12 × 3
#> ID area_sqkm geometry
#> * <fct> <dbl> <MULTIPOLYGON [°]>
#> 1 1 944. (((-75.78317 36.22519, -75.77316 36.22926, -75.54497 35.7883…
#> 2 2 694. (((-76.00897 36.3196, -76.01735 36.33773, -76.03288 36.33598…
#> 3 3 988. (((-76.1673 35.69684, -76.21024 35.60439, -76.2328 35.59467,…
#> 4 4 616. (((-76.00897 36.3196, -75.95718 36.19377, -75.98134 36.16973…
#> 5 5 1678. (((-76.51894 35.57764, -76.5396 35.59404, -76.58588 35.60946…
#> 6 6 524. (((-76.29893 36.21423, -76.32423 36.23362, -76.37242 36.2523…
#> 7 7 626. (((-76.48053 36.07979, -76.53696 36.08792, -76.5756 36.10266…
#> 8 8 1006. (((-76.40843 35.69912, -76.63382 35.703, -76.83831 35.70546,…
#> 9 9 440. (((-76.68874 36.29452, -76.64822 36.31532, -76.60424 36.3149…
#> 10 10 1264. (((-77.14896 34.76433, -77.16426 34.77452, -77.15982 34.7885…
#> 11 11 903. (((-76.56251 36.34057, -76.60424 36.31498, -76.64822 36.3153…
#> 12 12 834. (((-76.94324 35.07003, -76.94423 35.09972, -76.98989 35.1540…
# st_sample accepts a size vector, we can calculate it from area_sqkm ratios;
# st_sample() returns only a geometry list, to add matching polygon IDs, we'll
# pass it though st_sf() %>% st_join(example_sf[,"ID"])
set.seed(123)
s_points_sf <- example_sf %>%
mutate(sample_size = (area_sqkm / sum(area_sqkm) * 10000) %>% ceiling()) %>%
st_sample(size = .[["sample_size"]]) %>%
st_sf() %>%
st_join(example_sf[,"ID"])
s_points_sf
#> Simple feature collection with 10007 features and 1 field
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: -77.16095 ymin: 34.58965 xmax: -75.46192 ymax: 36.55612
#> Geodetic CRS: NAD27
#> First 10 features:
#> ID geometry
#> 1 1 POINT (-75.76357 35.85772)
#> 2 1 POINT (-75.88236 35.78706)
#> 3 1 POINT (-75.47784 35.65193)
#> 4 1 POINT (-75.84168 35.66341)
#> 5 1 POINT (-75.8131 35.71313)
#> 6 1 POINT (-75.94289 35.68419)
#> 7 1 POINT (-75.86639 35.65802)
#> 8 1 POINT (-75.89701 35.80294)
#> 9 1 POINT (-75.78556 35.81975)
#> 10 1 POINT (-75.92244 35.81634)
left_join(
st_drop_geometry(example_sf),
count(st_drop_geometry(s_points_sf), ID, name = "point_count")
) %>%
mutate(area_prop = as.numeric(area_sqkm / sum(area_sqkm)),
point_prop = point_count / sum(point_count))
#> Joining with `by = join_by(ID)`
#> # A tibble: 12 × 5
#> ID area_sqkm point_count area_prop point_prop
#> <fct> <dbl> <int> <dbl> <dbl>
#> 1 1 944. 898 0.0897 0.0897
#> 2 2 694. 661 0.0660 0.0661
#> 3 3 988. 940 0.0940 0.0939
#> 4 4 616. 586 0.0586 0.0586
#> 5 5 1678. 1596 0.160 0.159
#> 6 6 524. 499 0.0499 0.0499
#> 7 7 626. 596 0.0595 0.0596
#> 8 8 1006. 957 0.0956 0.0956
#> 9 9 440. 419 0.0418 0.0419
#> 10 10 1264. 1203 0.120 0.120
#> 11 11 903. 859 0.0859 0.0858
#> 12 12 834. 793 0.0793 0.0792
创建于 2023-07-30,使用 reprex v2.0.2