提高蒙版xarray文件的创建速度

问题描述 投票:0回答:1

我目前正在尝试使用遮罩网格将矩形xarray文件裁剪为国家/地区的形状。在下面可以找到我当前的解决方案(使用更简单和更小的数组)。该代码有效,我得到了基于1和0的所需蒙版。问题在于这样的事实,即代码在实际国家(更大,更复杂)的形状上运行需要30分钟以上的时间。因为我在这里使用了非常基本的操作,例如嵌套的for循环,所以我也尝试了其他替代方法,例如列表方法。但是,在对过程进行计时时,以下代码没有得到改善。我想知道是否有更快的方法来获得此掩码(矢量化?),或者我是否应该以其他方式解决该问题(尝试探索xarray的属性,但尚未找到解决该问题的方法)。

下面的代码:

import geopandas as gpd
from shapely.geometry import Polygon, Point
import pandas as pd
import numpy as np 
import xarray as xr

df = pd.read_csv('Brazil_borders.csv',index_col=0)
lats = np.array([-20, -5, -5, -20,])
lons = np.array([-60, -60, -30, -30])
lats2 = np.array([-10.25, -10.75, -11.25, -11.75, -12.25, -12.75, -13.25, -13.75,
       -14.25, -14.75, -15.25, -15.75, -16.25, -16.75, -17.25, -17.75,
       -18.25, -18.75, -19.25, -19.75, -20.25, -20.75, -21.25, -21.75,
       -22.25, -22.75, -23.25, -23.75, -24.25, -24.75, -25.25, -25.75,
       -26.25, -26.75, -27.25, -27.75, -28.25, -28.75, -29.25, -29.75,
       -30.25, -30.75, -31.25, -31.75, -32.25, -32.75])
lons2 = np.array([-61.75, -61.25, -60.75, -60.25, -59.75, -59.25, -58.75, -58.25,
       -57.75, -57.25, -56.75, -56.25, -55.75, -55.25, -54.75, -54.25,
       -53.75, -53.25, -52.75, -52.25, -51.75, -51.25, -50.75, -50.25,
       -49.75, -49.25, -48.75, -48.25, -47.75, -47.25, -46.75, -46.25,
       -45.75, -45.25, -44.75, -44.25])
points = []
for i in range(len(lats)):
        _= [lats[i],lons[i]]
        points.append(_)
poly_proj = Polygon(points)    

mask = np.zeros((len(lats2),len(lons2)))     # Mask with the dataset's shape and size.                                       

for i in range(len(lats2)):                  # Iteration to verify if a given coordinate is within the polygon's area                                    
    for j in range(len(lons2)):                                                  
        grid_point = Point(lats2[i], lons2[j])                  
        if grid_point.within(poly_proj):  
            mask[i][j] = 1    
bool_final = mask
bool_final

基于列表的替代方法,但处理时间更短(根据timeit):

lats = np.array([-20, -5, -5, -20,])
lons = np.array([-60, -60, -30, -30])
lats2 = np.array([-10.25, -10.75, -11.25, -11.75, -12.25, -12.75, -13.25, -13.75,
       -14.25, -14.75, -15.25, -15.75, -16.25, -16.75, -17.25, -17.75,
       -18.25, -18.75, -19.25, -19.75, -20.25, -20.75, -21.25, -21.75,
       -22.25, -22.75, -23.25, -23.75, -24.25, -24.75, -25.25, -25.75,
       -26.25, -26.75, -27.25, -27.75, -28.25, -28.75, -29.25, -29.75,
       -30.25, -30.75, -31.25, -31.75, -32.25, -32.75])
lons2 = np.array([-61.75, -61.25, -60.75, -60.25, -59.75, -59.25, -58.75, -58.25,
       -57.75, -57.25, -56.75, -56.25, -55.75, -55.25, -54.75, -54.25,
       -53.75, -53.25, -52.75, -52.25, -51.75, -51.25, -50.75, -50.25,
       -49.75, -49.25, -48.75, -48.25, -47.75, -47.25, -46.75, -46.25,
       -45.75, -45.25, -44.75, -44.25])
points = []
for i in range(len(lats)):
        _= [lats[i],lons[i]]
        points.append(_)
poly_proj = Polygon(points)    

grid_point = [Point(lats2[i],lons2[j]) for i in range(len(lats2)) for j in range(len(lons2))]                 
mask = [1 if grid_point[i].within(poly_proj) else 0 for i in range(len(grid_point))] 
bool_final2 = np.reshape(mask,(((len(lats2)),(len(lons2)))))

谢谢您!

arrays numpy for-loop grid python-xarray
1个回答
0
投票

基于snowman2的答案,我创建了这个简单的函数,该函数通过使用geopandasrioxarray提供了更快的解决方案。不必使用纬度和经度列表,而必须使用具有要屏蔽的所需形状的shapefile(Instructions用于从坐标列表中创建GeoDataFrame)。

import xarray as xr
import geopandas as gpd
import rioxarray
from shapely.geometry import mapping

def mask_shape_border (DS,shape_shp): #Inputs are the dataset to be cropped and the address of the mask file (.shp )
    if 'lat' in DS:                     #Some datasets use lat/lon, others latitude/longitude
            DS.rio.set_spatial_dims(x_dim="lon", y_dim="lat", inplace=True)
    elif 'latitude' in DS:
            DS.rio.set_spatial_dims(x_dim="longitude", y_dim="latitude", inplace=True)
    else:
        print("Error: check latitude and longitude variable names.")

    DS.rio.write_crs("epsg:4326", inplace=True)
    mask = gpd.read_file(shape_shp, crs="epsg:4326")
    DS_clipped = DS.rio.clip(mask.geometry.apply(mapping), mask.crs, drop=False)

    return(DS_clipped)
© www.soinside.com 2019 - 2024. All rights reserved.