我在 Google Drive 文件夹中有一堆文件,其文件名包含有关所需文件夹/子文件夹结构的信息:
[folder~subfolder] MyFile.ext
有些文件简单的写成:
[folder] MyOtherFile.ext
我正在寻找一种使用 thr Google Drive API 将文件分类为所需文件结构的方法,如果文件夹/子文件夹尚不存在,则创建它们。
这是我目前所拥有的:
import os
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# Set up the Drive API credentials
creds = Credentials.from_authorized_user_file(r"CREDENTIALS_PATH", ['https://www.googleapis.com/auth/drive'])
# Define the name of the Google Drive folder
folder_name = 'FOLDER_CONTAINING_FILES'
# Create a Drive API client
service = build('drive', 'v3', credentials=creds)
# Define the function to parse the subdirectories and sub-subdirectories
def parse_directory(filename):
# Split the filename by '] '
parts = filename.split('] ')
subdirs = []
for part in parts:
# Check if the part is a subdirectory
if '[' in part:
subdir = part[1:]
subdirs.append(subdir)
return subdirs
# Get the list of files in the Google Drive folder
try:
query = "mimeType='application/vnd.google-apps.folder' and trashed = false and name='" + folder_name + "'"
folder = service.files().list(q=query).execute().get('files')[0]
folder_id = folder.get('id')
query = "'" + folder_id + "' in parents and trashed = false and mimeType != 'application/vnd.google-apps.folder'"
files = service.files().list(q=query).execute().get('files')
except HttpError as error:
print(f'An error occurred: {error}')
files = []
# Create dictionaries to store the subdirectories and sub-subdirectories
subdirs_dict = {}
subsubdirs_dict = {}
# Loop through the files and create the subdirectories and sub-subdirectories if necessary
for file in files:
filename = file.get('name')
subdirs = parse_directory(filename)
if len(subdirs) == 0:
continue
parent_id = folder_id
for i in range(len(subdirs)):
subdir = subdirs[i]
if i == len(subdirs) - 1:
subsubdir = ''
else:
subsubdir = subdirs[i+1]
if subdir not in subdirs_dict:
# Create the subdirectory if it doesn't exist
metadata = {'name': subdir, 'parents': [parent_id], 'mimeType': 'application/vnd.google-apps.folder'}
subdir_file = service.files().create(body=metadata, fields='id').execute()
subdirs_dict[subdir] = subdir_file.get('id')
parent_id = subdirs_dict[subdir]
if subsubdir != '':
if subsubdir not in subsubdirs_dict:
# Create the sub-subdirectory if it doesn't exist
metadata = {'name': subsubdir, 'parents': [parent_id], 'mimeType': 'application/vnd.google-apps.folder'}
subsubdir_file = service.files().create(body=metadata, fields='id').execute()
subsubdirs_dict[subsubdir] = subsubdir_file.get('id')
parent_id = subsubdirs_dict[subsubdir]
# Loop through the files and move them to the appropriate subdirectory or sub-subdirectory
for file in files:
filename = file.get('name')
subdirs = parse_directory(filename)
if len(subdirs) == 0:
continue
parent_id = folder_id
for i in range(len(subdirs)):
subdir = subdirs[i]
if i == len(subdirs) - 1:
subsubdir = ''
else:
subsubdir = subdirs[i+1]
if subdir in subdirs_dict:
parent_id = subdirs_dict[subdir]
if subsubdir != '' and subsubdir in subsubdirs_dict:
parent_id = subsubdirs_dict[subsubdir]
metadata = {'name': filename, 'parents': [parent_id]}
service.files().update(fileId=file.get('id'), body=metadata).execute()
目前,我的代码生成(几乎)正确的文件夹,但它们没有正确嵌套。例如,对于这些输入文件:
[0_diss] good_journals.txt
[0_diss~historic_disses] Zhang (2020) - NAV premia.pdf
[0_diss~literature] A Guide to Writing the Literature Review.pdf
脚本生成以下文件夹:
0_diss
0_diss~historic_disses
0_diss~literature
我想要的是创建名为“0_diss”、“historic_disses”和“literature”的文件夹(如果它们尚不存在),并将“historic_disses”和“literature”作为“0_diss”的子文件夹。我希望这更有意义。
真的很感谢这方面的帮助,我不是一个有经验的程序员,我正在尽我最大的努力学习 Python。谢谢。
这个答案假设只能有一个
~
,因为那是您提供的数据和示例。您可以像这样解析目录:
from typing import Optional
import re
def parse_directories(filename: str) -> Optional[dict]:
dirs = {}
# Make sure we have a supported file name, match anything not ]~ into group 1
# Then optionally match anything not ] after ~ into group 2
match = re.search('^\[([^\]~]+)~?([^\]]+)?\]', filename)
# If anything was matched, assign always available parent, else return null
if (not match):
return None
dirs['parent'] = match.group(1)
# If child was found, assign it too
if (match.group(2)):
dirs['child'] = match.group(2)
return dirs
然后只使用一个循环来创建新目录并将文件移动到其中:
for filename in filenames:
newdirs = parse_directories(filename)
if newdirs:
if 'child' in newdirs:
# Create your child folder
# To verify if it's been already made simply store paths as a list
else:
# Create newdirs['parent'] if needed
# Move the file to the new/already existing folder, you don't need a 2nd loop
这应该让你走上正确的轨道。