我正在尝试读取使用这些步骤创建的 geojson
import geopandas as gpd
vec_data = gpd.read_file("map.shp")
vec_data.head()
vec_data['LPIS_name'].unique()
sel_crop = vec_data[vec_data.LPIS_name == 'Permanent Grassland']
sel_crop.to_file("Permanent_Grassland.geojson", driver='GeoJSON')
feature = gpd.read_file("Permanent_Grassland.geojson")
但我收到以下错误:
{ "name": "DataSourceError",
"message": "Failed to read GeoJSON data",
"stack": "---------------------------------------------------------------------------
DataSourceError Traceback (most recent call last)
Cell In[8], line 1
----> 1 feature = gpd.read_file(path_feature)
File c:\\Users\\bventura\\AppData\\Local\\anaconda3\\Lib\\site-packages\\geopandas\\io\\file.py:294, in _read_file(filename, bbox, mask, columns, rows, engine, **kwargs)
291 from_bytes = True
293 if engine == \"pyogrio\":
--> 294 return _read_file_pyogrio(
295 filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs
296 )
298 elif engine == \"fiona\":
299 if pd.api.types.is_file_like(filename):
File c:\\Users\\bventura\\AppData\\Local\\anaconda3\\Lib\\site-packages\\geopandas\\io\\file.py:547, in _read_file_pyogrio(path_or_bytes, bbox, mask, rows, **kwargs)
538 warnings.warn(
539 \"The 'include_fields' and 'ignore_fields' keywords are deprecated, and \"
540 \"will be removed in a future release. You can use the 'columns' keyword \"
(...)
543 stacklevel=3,
544 )
545 kwargs[\"columns\"] = kwargs.pop(\"include_fields\")
--> 547 return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)
File c:\\Users\\bventura\\AppData\\Local\\anaconda3\\Lib\\site-packages\\pyogrio\\geopandas.py:261, in read_dataframe(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, fid_as_index, use_arrow, on_invalid, arrow_to_pandas_kwargs, **kwargs)
256 if not use_arrow:
257 # For arrow, datetimes are read as is.
258 # For numpy IO, datetimes are read as string values to preserve timezone info
259 # as numpy does not directly support timezones.
260 kwargs[\"datetime_as_string\"] = True
--> 261 result = read_func(
262 path_or_buffer,
263 layer=layer,
264 encoding=encoding,
265 columns=columns,
266 read_geometry=read_geometry,
267 force_2d=gdal_force_2d,
268 skip_features=skip_features,
269 max_features=max_features,
270 where=where,
271 bbox=bbox,
272 mask=mask,
273 fids=fids,
274 sql=sql,
275 sql_dialect=sql_dialect,
276 return_fids=fid_as_index,
277 **kwargs,
278 )
280 if use_arrow:
281 meta, table = result
File c:\\Users\\bventura\\AppData\\Local\\anaconda3\\Lib\\site-packages\\pyogrio\\raw.py:196, in read(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, return_fids, datetime_as_string, **kwargs)
56 \"\"\"Read OGR data source into numpy arrays.
57
58 IMPORTANT: non-linear geometry types (e.g., MultiSurface) are converted
(...)
191
192 \"\"\"
194 dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}
--> 196 return ogr_read(
197 get_vsi_path_or_buffer(path_or_buffer),
198 layer=layer,
199 encoding=encoding,
200 columns=columns,
201 read_geometry=read_geometry,
202 force_2d=force_2d,
203 skip_features=skip_features,
204 max_features=max_features or 0,
205 where=where,
206 bbox=bbox,
207 mask=_mask_to_wkb(mask),
208 fids=fids,
209 sql=sql,
210 sql_dialect=sql_dialect,
211 return_fids=return_fids,
212 dataset_kwargs=dataset_kwargs,
213 datetime_as_string=datetime_as_string,
214 )
File c:\\Users\\bventura\\AppData\\Local\\anaconda3\\Lib\\site-packages\\pyogrio\\_io.pyx:1239, in pyogrio._io.ogr_read()
File c:\\Users\\bventura\\AppData\\Local\\anaconda3\\Lib\\site-packages\\pyogrio\\_io.pyx:219, in pyogrio._io.ogr_open()
DataSourceError: Failed to read GeoJSON data"
}
根据要求,请在这里下载Geojson以便更好地调试代码
同时,我尝试在线搜索,似乎潜在的错误可能如下:多边形和多边形应遵循右手定则
在底层,
geopandas
使用pyogrio
库来读/写文件,而pyogrio
则使用gdal
库。
因此,我查看了使用
gdal
python 绑定时错误消息中是否有更多详细信息,情况就是如此。
当您运行以下脚本时:
from osgeo import gdal
gdal.UseExceptions()
path = "C:\Temp\gras\Permanent_Grassland.geojson"
gdal.VectorTranslate(srcDS=str(path), destNameOrDestDS="C:/Temp/gras/Permanent_Grassland.gpkg")
它输出以下错误:
RuntimeError: Failed to read GeoJSON data
May be caused by: At line 6, character 51158626: GeoJSON object too
complex/large. You may define the OGR_GEOJSON_MAX_OBJ_SIZE configuration
option to a value in megabytes to allow for larger features, or 0 to
remove any size limit
所以显然其中一个几何形状相当巨大......但是,正如所示,有一个解决方案:指定允许巨大的特征。
示例脚本允许无限大小的功能,避免错误。我使用
pyogrio
设置配置选项,以避免您必须安装 gdal python 绑定 ,因为它们默认情况下不随 geopandas
安装,如果您使用纯文本,安装可能会更困难pip
:
import geopandas as gpd
import pyogrio
path = "C:\Temp\gras\Permanent_Grassland.geojson"
pyogrio.set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": 0})
gdf = gpd.read_file(path)
print(gdf)
显然你的 geojson 文件只包含一个巨大的多边形:
ID DESCR_IT ... LPIS_name
geometry
0 3097 Prato stabile ... Permanent Grassland MULTIPOLYGON (((10.51872
46.69302, 10.51878 46...
[1 rows x 8 columns]
仅供参考:我在
pyogrio
问题跟踪器中打开了一个问题,以检查在通过 pyogrio/geopandas 读取文件时是否也可以显示此详细错误:https://github.com/geopandas/pyogrio/issues/第491章