我创建了一个 python 脚本来通过 API 从 google photo 下载图像,包括它们的元数据(主要是我对 GPS 位置数据感兴趣)。
我尝试了不同的选项,但不幸的是它们没有被检索到,任何帮助都是值得赞赏的
这是脚本的代码:
import os
import json
import requests
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient import discovery
from PIL import Image, ExifTags
import exifread
import piexif
import time
import io
import exif
from PIL.ExifTags import TAGS, GPSTAGS
# Define the scopes
SCOPES = ['https://www.googleapis.com/auth/photoslibrary.readonly']
DEBUG = 1
SHORT_EXECUTION = 1
PATH_TEMP_FOLDER= '/tmp/scripts/my-venv/photos/'
def authenticate_google_photos():
creds = None
# The file token.json stores the user's access and refresh tokens
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file('client_secret.json', SCOPES)
creds = flow.run_local_server(port=0)
with open('token.json', 'w') as token:
token.write(creds.to_json())
return creds
def get_photos_list_from_album(creds, albumID, page_size=10):
service = discovery.build('photoslibrary', 'v1', credentials = creds, static_discovery = False)
hasNextPageToken = True
nextPageToken = ""
i=0
while(hasNextPageToken):
results = service.mediaItems().search(body={"albumId": albumID, "pageSize": 100, "pageToken": nextPageToken}).execute()
if(i==0):
photos = results.get('mediaItems', [])
else:
photos = photos + results.get('mediaItems', [])
#print(f"{photos[0]}")
result = service.mediaItems().get(mediaItemId="test").execute()
metadata = result.get('mediaMetadata', {})
gps_info = metadata.get('location', {})
print(f"{metadata}")
print(f"{gps_info}")
#if (DEBUG):
#print(f"{results}")
if 'nextPageToken' in results:
hasNextPageToken = True
nextPageToken = results['nextPageToken']
else:
hasNextPageToken = False
nextPageToken = ""
i=i+1
return photos
def get_albums_list(creds, page_size=10):
service = discovery.build('photoslibrary', 'v1', credentials = creds, static_discovery = False)
hasNextPageToken = True
nextPageToken = ""
i=0
while(hasNextPageToken):
results = service.albums().list(pageSize=page_size, pageToken =nextPageToken, fields="nextPageToken,albums(id,title)").execute()
if(i==0):
albums = results.get('albums', [])
else:
albums = albums + results.get('albums', [])
if (DEBUG):
#print(f"{results}")
print(f"Reading albums: {len(albums)} identified")
if 'nextPageToken' in results:
hasNextPageToken = True
nextPageToken = results['nextPageToken']
else:
hasNextPageToken = False
nextPageToken = ""
#todo remove it
if(SHORT_EXECUTION):
hasNextPageToken = False
i=i+1
if not albums:
print('No albums found.')
else:
if (DEBUG):
print('Albums:')
for item in albums:
if (DEBUG):
print(f"{item['title'].encode('utf8')} ({item['id']})")
return albums
def download_photo(url, filename):
print("in download " +filename)
if (not(os.path.isfile(PATH_TEMP_FOLDER + filename))):
response = requests.get(url)
with open(PATH_TEMP_FOLDER + filename, 'wb') as file:
file.write(response.content)
#elif (DEBUG):
# print("File exist, skip download")
#if (filename.count(".heic")>0 or filename.count(".HEIC")>0):
# print("File heic convertion done")
def get_exif_data(photo_data):
fp = open(photo_data, "rb")
exif_image = exif.Image(fp)
result = {}
for field in exif_image.list_all():
try:
result[field] = exif_image[field]
except:
pass
exif_data = {}
gps_data = {}
image_exif = Image.open(photo_data)._getexif()
if not image_exif:
return None
# Iterate over all EXIF data
for tag, value in image_exif.items():
tag_name = TAGS.get(tag, tag)
exif_data[tag_name] = value
#if (DEBUG):
# print(f"Field: {tag_name}={value}")
# Extract GPS info if present
if tag_name == 'GPSInfo':
for gps_tag in value:
sub_tag_name = GPSTAGS.get(gps_tag, gps_tag)
gps_data[sub_tag_name] = value[gps_tag]
# print(f"gps_data: {sub_tag_name}={value[gps_tag]}")
#return gps_data if gps_data else None
return result
def extract_gps_from_image(image_path):
with open(image_path, 'rb') as f:
tags = exifread.process_file(f, details=False)
if (DEBUG):
for t in tags:
if ("GPS" in t):
print(f"tag: {t}={tags[t]}")
gps_info = get_gps_location(tags)
fields = get_exif_data(image_path)
if (DEBUG):
for f in fields:
if (("gps" or "GPS") in f):
print(f"Field: {f}={fields[f]}")
#todo remove
#time.sleep(5)
return gps_info
def get_gps_location(exif_data):
gps_info = {}
if 'GPS GPSLatitude' in exif_data and 'GPS GPSLongitude' in exif_data:
gps_latitude = exif_data['GPS GPSLatitude']
gps_latitude_ref = exif_data['GPS GPSLatitudeRef']
gps_longitude = exif_data['GPS GPSLongitude']
gps_longitude_ref = exif_data['GPS GPSLongitudeRef']
lat = convert_to_degrees(gps_latitude)
lon = convert_to_degrees(gps_longitude)
if gps_latitude_ref.values[0] != 'N':
lat = -lat
if gps_longitude_ref.values[0] != 'E':
lon = -lon
gps_info['Latitude'] = lat
gps_info['Longitude'] = lon
return gps_info
def convert_to_degrees(value):
d = float(value.values[0].num) / float(value.values[0].den)
m = float(value.values[1].num) / float(value.values[1].den)
s = float(value.values[2].num) / float(value.values[2].den)
return d + (m / 60.0) + (s / 3600.0)
def extract_photo_from_album(creds,album):
if (DEBUG):
print(f"Album: {album}")
#todo remove it
if (SHORT_EXECUTION):
photos = get_photos_list_from_album(creds,"test_Album_code")
else:
photos = get_photos_list_from_album(creds,album["id"])
if (DEBUG):
print(f"Photos identified: {len(photos)}")
extract_photo_metadata(photos)
def extract_photo_metadata(photos):
gps_data = {}
for photo in photos:
print(f"Photo: {photo}")
#TODO check
#photo_url = photo['baseUrl'] + "=d"
photo_url = photo['baseUrl']
photo_filename = photo['filename']
#photo_filename = photo_filename.replace('.HEIC', '.jpg')
if (not (photo_filename.count(".MOV")>0
or photo_filename.count(".mov")>0
or photo_filename.count(".mp4")>0
or photo_filename.count(".MP4")>0 )):
#print(f"Photo: {photo_filename}, URL: {photo_url}")
download_photo(photo_url, photo_filename)
if (DEBUG):
print(f"Photo name: {photo['filename']}")
gps_info = extract_gps_from_image(PATH_TEMP_FOLDER + photo_filename)
if gps_info:
gps_data[photo_filename] = gps_info
#os.remove(PATH_TEMP_FOLDER+photo_filename) # Remove the downloaded photo
# Print the GPS locations
for photo, location in gps_data.items():
if (DEBUG):
print(f"Photo: {photo}, Location: {location}")
def main():
creds = authenticate_google_photos()
albums = get_albums_list(creds)
for album in albums:
extract_photo_from_album(creds,album)
exit(0)
if __name__ == '__main__':
main()
如果我使用
photo_url = photo['baseUrl'] + "=d"
使用
=d
参数,它会下载除地理位置之外的所有元数据,
如果没有 d 参数,则不会导出任何元数据。下面是谷歌的文档,其中没有所有元数据导出的参数
https://developers.google.com/photos/library/guides/access-media-items
提前非常感谢
从照片下载 GPS 元信息
它实际上并没有返回带有“=d”参数的原始文件。他们的 API 文档中并不清楚。谷歌在当前的照片API中被限制下载原始文件。