我有时会使用 Steam Web API 收集 Steam 评论供个人使用,下面的代码虽然有一些冗余部分,但可以收集给定游戏的所有评论。最近我尝试收集《Apex Legends》的评论,但无论我做什么,我都收到了 1427 条评论。我尝试了很多事情,例如删除语言过滤器、更改偏离主题的活动过滤器、使结束日期动态化,但我似乎无法收集所有评论。
在产生 27 条评论的光标之后(这是我第一次得到除 100 条以外的评论),下一个光标是 AoJwq+Kw1PUCf/CWrQI=,它产生 0 条评论,也作为下一个光标返回。
我的代码如下,是否有任何建议,以便我可以收集所有评论,提前谢谢您。
import requests
from datetime import datetime
import urllib.parse
import time
def fetch_game_reviews(appid, cursor='*', filter='recent', language='english'):
"""
Fetch game reviews for a specific Steam game.
Parameters:
- appid: Steam Application ID for the game.
- cursor: Cursor for pagination. Use '*' for the first page.
- filter: Type of reviews to fetch ('recent', 'updated', 'all').
- language: Language of the reviews ('english', 'spanish', etc.).
Returns:
- JSON response containing game reviews.
"""
if cursor != '*':
cursor = urllib.parse.quote(cursor)
url = f"https://store.steampowered.com/appreviews/{appid}?json=1&cursor={cursor}&filter={filter}&language={language}&num_per_page=100&filter_offtopic_activity=0"
response = requests.get(url)
return response.json()
def fetch_reviews_until_date(appid, end_date, filter='recent', language='all'):
"""
Fetch game reviews for a specific Steam game until a specific date.
Parameters:
- appid: Steam Application ID for the game.
- end_date: The end date for fetching reviews (YYYY-MM-DD format).
- filter: Type of reviews to fetch ('recent', 'updated', 'all').
- language: Language of the reviews ('english', 'spanish', etc.).
Returns:
- List of all reviews up until the specified end date.
"""
end_date = datetime.strptime(end_date, '%Y-%m-%d')
previous_cursor = None
all_reviews = []
cursors = []
cursor = '*'
while True:
response = fetch_game_reviews(appid, cursor=cursor, filter=filter, language=language)
collected_timestamp = int(time.time())
reviews = response['reviews']
new_cursor = response.get('cursor')
cursors.append(new_cursor)
if not reviews or (new_cursor == previous_cursor and cursor != '*'):
return all_reviews, cursors
for review in reviews:
review_date = datetime.fromtimestamp(review['timestamp_created'])
review['collected'] = collected_timestamp
review['appid'] = appid
if review_date < end_date:
return all_reviews
all_reviews.append(review)
print(len(all_reviews))
print(response['cursor'])
previous_cursor = cursor
cursor = new_cursor
return all_reviews, cursors
reviews, cursors = fetch_reviews_until_date('1172470', '2020-10-01')
getreview的文档显示了一些其他参数 - 如
review_type
、purchase_type
等 - 并且它们被标记为 Required
。
当我添加这些参数时,代码给了我更多的评论。
我在 25_000 条评论时停止了代码。
payload = {
'json': 1,
'cursor': cursor,
'filter': filter,
'language': language,
'num_per_page': 100,
'filter_offtopic_activity': 0,
#'day_range': 365,
'review_type': 'all',
'purchase_type': 'all',
}
#print(payload)
url = f"https://store.steampowered.com/appreviews/{appid}"
response = requests.get(url, params=payload)
完整的工作代码:
"""
# date: 2024.07.03
# [User Reviews - Get List (Steamworks Documentation)](https://partner.steamgames.com/doc/store/getreviews)
"""
import requests
from datetime import datetime
import urllib.parse
import time
def fetch_game_reviews(appid, cursor='*', filter='recent', language='english'):
"""
Fetch game reviews for a specific Steam game.
Parameters:
- appid: Steam Application ID for the game.
- cursor: Cursor for pagination. Use '*' for the first page.
- filter: Type of reviews to fetch ('recent', 'updated', 'all').
- language: Language of the reviews ('english', 'spanish', etc.).
Returns:
- JSON response containing game reviews.
"""
payload = {
'json': 1,
'cursor': cursor,
'filter': filter,
'language': language,
'num_per_page': 100,
'filter_offtopic_activity': 0,
#'day_range': 365,
'review_type': 'all',
'purchase_type': 'all',
}
#print('payload:', payload)
url = f"https://store.steampowered.com/appreviews/{appid}"
response = requests.get(url, params=payload)
#print('response.status_code:', response.status_code)
#print('response.text:', response.text)
return response.json()
def fetch_reviews_until_date(appid, end_date, filter='recent', language='all'):
"""
Fetch game reviews for a specific Steam game until a specific date.
Parameters:
- appid: Steam Application ID for the game.
- end_date: The end date for fetching reviews (YYYY-MM-DD format).
- filter: Type of reviews to fetch ('recent', 'updated', 'all').
- language: Language of the reviews ('english', 'spanish', etc.).
Returns:
- List of all reviews up until the specified end date.
"""
end_date = datetime.strptime(end_date, '%Y-%m-%d')
previous_cursor = None
all_reviews = []
cursors = []
cursor = '*'
while True:
response = fetch_game_reviews(appid, cursor=cursor, filter=filter, language=language)
collected_timestamp = int(time.time())
reviews = response['reviews']
new_cursor = response.get('cursor')
cursors.append(new_cursor)
if not reviews or (new_cursor == previous_cursor and cursor != '*'):
return all_reviews, cursors
for review in reviews:
review_date = datetime.fromtimestamp(review['timestamp_created'])
review['collected'] = collected_timestamp
review['appid'] = appid
if review_date < end_date:
return all_reviews
all_reviews.append(review)
# - after loop -
print('len:', len(all_reviews))
print('cursor:', response['cursor'])
previous_cursor = cursor
cursor = new_cursor
return all_reviews, cursors
reviews, cursors = fetch_reviews_until_date('1172470', '2020-10-01')