我有一个脚本,用于保存从网络中提取的网页。该脚本在本地保存页面并且工作正常。
def parse_page(self, link,q):
response = requests.get(link, headers=self.headers,stream=True)
print(f"\t- Response [{response.status_code}] for: {link}")
published_date = self.get_date(response.text)
if "text/html" in response.headers["content-type"]:
format = ".html"
data = response.text
mode = "w"
elif "application/pdf" in response.headers["content-type"]:
format = ".pdf"
data = response.content
mode = "wb"
elif "officedocument.word" in response.headers["content-type"]:
format = ".docx"
data = response
mode = "wb"
folder = urlparse(link).netloc
filename = urlparse(link).path.split("/")[-1].replace("\n","")
if filename == '':
filename = folder+datetime.now().strftime("_%H_%M_%S")
# Linux
#path = self.root_folder+"/"+folder+"/"+filename+format
# Win
path =self.root_folder+"\\"+folder+"\\"+filename+format
with self.safe_open_w(path, mode) as f:
if format == '.docx':
for chunk in data.iter_content(16*1024): # 2 MB chunks
f.write(chunk)
else:
f.write(data)
现在我想将网页直接保存到Google Drive。如何在不先将文件保存到本地的情况下,直接将数据流保存到 Google Drive 上?
要在 Google Drive 上上传文件,我使用了以下脚本:
try:
# Call the Drive v3 API
file_metadata = {'name': 'file.jpg',
}
media = MediaFileUpload('file.jpg',
mimetype='image/jpeg')
# pylint: disable=maybe-no-member
file = service.files().create(body=file_metadata, media_body=media,
fields='id').execute()
print(F'File ID: {file.get("id")}')
except HttpError as error:
print(F'An error occurred: {error}')
file = None
例外情况为 e: 打印(e)
但是现在我想直接上传数据流(response、response.text、response.content),而不将文件存储在本地。 我怎么达不到?
使用
MediaIoBaseUpload
和 io.ByteIO
类。