我目前正在开发一个涉及 Google Document AI 的项目,我需要使用 Python 从处理器数据集中删除文档的帮助。我尝试了各种方法,但一直无法找到解决方案。
这是我到目前为止所做的:
我们使用的代码如下:
import io
from google.cloud import documentai_v1beta3
from google.cloud.documentai_v1beta3 import DocumentId
PROCESSOR_LOCATION = "eu"
PROJECT_NUMBER = {
"DEV": "123456789",
"PROD": "123456789"
}
PROCESSOR_ID_CDE = {
"DEV": "abcdefghijk",
"PROD": "abcdefghijk"
}
# Create DocumentId objects and set the gcs_managed_doc_id attribute
doc_id1 = DocumentId(document_id="gs://test/raw_data/training/abc.pdf")
doc_id2 = DocumentId(document_id="gs://test/raw_data/training/xyz.pdf")
ENV_DEST = "DEV"
def sample_batch_delete_documents():
# Create a client
opts = {"api_endpoint": "eu-documentai.googleapis.com"}
client = documentai_v1beta3.DocumentServiceClient(client_options=opts)
# Initialize request argument(s)
dataset_documents = documentai_v1beta3.BatchDatasetDocuments.IndividualDocumentIds(document_ids=[doc_id1, doc_id2])
batch_dataset_documents = documentai_v1beta3.BatchDatasetDocuments()
batch_dataset_documents.individual_document_ids = dataset_documents
request = documentai_v1beta3.BatchDeleteDocumentsRequest(
dataset=f"projects/{PROJECT_NUMBER[ENV_DEST]}/locations/{PROCESSOR_LOCATION}/processors/{PROCESSOR_ID_CDE[ENV_DEST]}/dataset",
dataset_documents=batch_dataset_documents,
timeout=300 # 300 seconds
)
print(request)
# Make the request
operation = client.batch_delete_documents(request=request)
print("Waiting for operation to complete...")
response = operation.result()
# Handle the response
print(response)
我收到的错误消息如下:
Error Message
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/var/tmp/ipykernel_13116/441287119.py in <module>
----> 1 sample_batch_delete_documents()
/var/tmp/ipykernel_13116/2850527411.py in sample_batch_delete_documents()
39 print("Waiting for operation to complete...")
40
---> 41 response =operation.result()
42
43 # Handle the responseh
/opt/conda/lib/python3.7/site-packages/google/api_core/future/polling.py in result(self, timeout, retry, polling)
254 """
255
--> 256 self._blocking_poll(timeout=timeout,retry=retry,polling=polling)
257
258 ifself._exception isnotNone:
/opt/conda/lib/python3.7/site-packages/google/api_core/future/polling.py in _blocking_poll(self, timeout, retry, polling)
135
136 try:
--> 137 polling(self._done_or_raise)(retry=retry)
138 exceptexceptions.RetryError:
139 raise concurrent.futures.TimeoutError(
/opt/conda/lib/python3.7/site-packages/google/api_core/retry.py in retry_wrapped_func(*args, **kwargs)
352 sleep_generator,
353 self._timeout,
--> 354 on_error=on_error,
355 )
356
/opt/conda/lib/python3.7/site-packages/google/api_core/retry.py in retry_target(target, predicate, sleep_generator, timeout, on_error, **kwargs)
189 forsleep insleep_generator:
190 try:
--> 191 return target()
192
193 # pylint: disable=broad-except
/opt/conda/lib/python3.7/site-packages/google/api_core/future/polling.py in _done_or_raise(self, retry)
117 def_done_or_raise(self,retry=None):
118 """Check if the future is done and raise if it's not."""
--> 119 if notself.done(retry=retry):
120 raise_OperationNotComplete()
121
/opt/conda/lib/python3.7/site-packages/google/api_core/operation.py in done(self, retry)
172 bool:Trueifthe operation iscomplete,Falseotherwise.
173 """
--> 174 self._refresh_and_update(retry)
175 returnself._operation.done
176
/opt/conda/lib/python3.7/site-packages/google/api_core/operation.py in _refresh_and_update(self, retry)
161 ifnotself._operation.done:
162 self._operation =self._refresh(retry=retry)ifretry elseself._refresh()
--> 163 self._set_result_from_operation()
164
165 defdone(self,retry=None):
/opt/conda/lib/python3.7/site-packages/google/api_core/operation.py in _set_result_from_operation(self)
133 ifself._operation.HasField("response"):
134 response = protobuf_helpers.from_any_pb(
--> 135 self._result_type,self._operation.response
136 )
137 self.set_result(response)
/opt/conda/lib/python3.7/site-packages/google/api_core/protobuf_helpers.py in from_any_pb(pb_type, any_pb)
65 raise TypeError(
66 "Could not convert {} to {}".format(
---> 67 any_pb.__class__.__name__,pb_type.__name__
68 )
69 )
TypeError: Could not convert Any to BatchDeleteDocumentsResponse
如果有人能为我提供如何解决此问题的指导,我将不胜感激。我愿意接受有关我已经尝试过的方法或任何其他方法的建议。
你能解决这个问题吗?如果是这样,您能提供一些关于您是如何做到的指导吗?
谢谢!