我目前正在尝试使用遮罩网格将矩形xarray文件裁剪为国家/地区的形状。在下面可以找到我当前的解决方案(使用更简单和更小的数组)。该代码有效,我得到了基于1和0的所需蒙版。问题在于这样的事实,即代码在实际国家(更大,更复杂)的形状上运行需要30分钟以上的时间。因为我在这里使用了非常基本的操作,例如嵌套的for循环,所以我也尝试了其他替代方法,例如列表方法。但是,在对过程进行计时时,以下代码没有得到改善。我想知道是否有更快的方法来获得此掩码(矢量化?),或者我是否应该以其他方式解决该问题(尝试探索xarray的属性,但尚未找到解决该问题的方法)。
下面的代码:
import geopandas as gpd
from shapely.geometry import Polygon, Point
import pandas as pd
import numpy as np
import xarray as xr
df = pd.read_csv('Brazil_borders.csv',index_col=0)
lats = np.array([-20, -5, -5, -20,])
lons = np.array([-60, -60, -30, -30])
lats2 = np.array([-10.25, -10.75, -11.25, -11.75, -12.25, -12.75, -13.25, -13.75,
-14.25, -14.75, -15.25, -15.75, -16.25, -16.75, -17.25, -17.75,
-18.25, -18.75, -19.25, -19.75, -20.25, -20.75, -21.25, -21.75,
-22.25, -22.75, -23.25, -23.75, -24.25, -24.75, -25.25, -25.75,
-26.25, -26.75, -27.25, -27.75, -28.25, -28.75, -29.25, -29.75,
-30.25, -30.75, -31.25, -31.75, -32.25, -32.75])
lons2 = np.array([-61.75, -61.25, -60.75, -60.25, -59.75, -59.25, -58.75, -58.25,
-57.75, -57.25, -56.75, -56.25, -55.75, -55.25, -54.75, -54.25,
-53.75, -53.25, -52.75, -52.25, -51.75, -51.25, -50.75, -50.25,
-49.75, -49.25, -48.75, -48.25, -47.75, -47.25, -46.75, -46.25,
-45.75, -45.25, -44.75, -44.25])
points = []
for i in range(len(lats)):
_= [lats[i],lons[i]]
points.append(_)
poly_proj = Polygon(points)
mask = np.zeros((len(lats2),len(lons2))) # Mask with the dataset's shape and size.
for i in range(len(lats2)): # Iteration to verify if a given coordinate is within the polygon's area
for j in range(len(lons2)):
grid_point = Point(lats2[i], lons2[j])
if grid_point.within(poly_proj):
mask[i][j] = 1
bool_final = mask
bool_final
基于列表的替代方法,但处理时间更短(根据timeit):
lats = np.array([-20, -5, -5, -20,])
lons = np.array([-60, -60, -30, -30])
lats2 = np.array([-10.25, -10.75, -11.25, -11.75, -12.25, -12.75, -13.25, -13.75,
-14.25, -14.75, -15.25, -15.75, -16.25, -16.75, -17.25, -17.75,
-18.25, -18.75, -19.25, -19.75, -20.25, -20.75, -21.25, -21.75,
-22.25, -22.75, -23.25, -23.75, -24.25, -24.75, -25.25, -25.75,
-26.25, -26.75, -27.25, -27.75, -28.25, -28.75, -29.25, -29.75,
-30.25, -30.75, -31.25, -31.75, -32.25, -32.75])
lons2 = np.array([-61.75, -61.25, -60.75, -60.25, -59.75, -59.25, -58.75, -58.25,
-57.75, -57.25, -56.75, -56.25, -55.75, -55.25, -54.75, -54.25,
-53.75, -53.25, -52.75, -52.25, -51.75, -51.25, -50.75, -50.25,
-49.75, -49.25, -48.75, -48.25, -47.75, -47.25, -46.75, -46.25,
-45.75, -45.25, -44.75, -44.25])
points = []
for i in range(len(lats)):
_= [lats[i],lons[i]]
points.append(_)
poly_proj = Polygon(points)
grid_point = [Point(lats2[i],lons2[j]) for i in range(len(lats2)) for j in range(len(lons2))]
mask = [1 if grid_point[i].within(poly_proj) else 0 for i in range(len(grid_point))]
bool_final2 = np.reshape(mask,(((len(lats2)),(len(lons2)))))
谢谢您!
基于snowman2的答案,我创建了这个简单的函数,该函数通过使用geopandas
和rioxarray
提供了更快的解决方案。不必使用纬度和经度列表,而必须使用具有要屏蔽的所需形状的shapefile(Instructions用于从坐标列表中创建GeoDataFrame)。
import xarray as xr
import geopandas as gpd
import rioxarray
from shapely.geometry import mapping
def mask_shape_border (DS,shape_shp): #Inputs are the dataset to be cropped and the address of the mask file (.shp )
if 'lat' in DS: #Some datasets use lat/lon, others latitude/longitude
DS.rio.set_spatial_dims(x_dim="lon", y_dim="lat", inplace=True)
elif 'latitude' in DS:
DS.rio.set_spatial_dims(x_dim="longitude", y_dim="latitude", inplace=True)
else:
print("Error: check latitude and longitude variable names.")
DS.rio.write_crs("epsg:4326", inplace=True)
mask = gpd.read_file(shape_shp, crs="epsg:4326")
DS_clipped = DS.rio.clip(mask.geometry.apply(mapping), mask.crs, drop=False)
return(DS_clipped)