我遇到了这个post,这个问题的答案显示了如何处理hotels.com的POST请求
制作的代码import trio
import httpx
import pandas as pd
async def main():
async with httpx.AsyncClient(timeout=None) as client:
data = {
"operationName": "reviewsQuery",
"query": "query reviewsQuery($hotelId: String!, $reviewType: String, $reviewOrder: String, $tripTypeFilter: String, $paginationURL: String) {\n reviews(\n hotelId: $hotelId\n reviewType: $reviewType\n reviewOrder: $reviewOrder\n tripTypeFilter: $tripTypeFilter\n paginationURL: $paginationURL\n ) {\n body {\n reviewContent {\n filters {\n type\n name\n count\n url\n __typename\n }\n overall {\n selectedFilterType\n rating\n badgeText\n total\n scores {\n score\n count\n url\n __typename\n }\n ratingAspects {\n cleanliness\n service\n comfort\n condition\n neighbourhood\n __typename\n }\n whatGuestsSay {\n type\n text\n __typename\n }\n topRated {\n category\n explanation\n __typename\n }\n __typename\n }\n sort {\n url\n options {\n value\n label\n __typename\n }\n __typename\n }\n reviews {\n hermes {\n groups {\n separatorText\n items {\n itineraryId\n brand\n googleTranslateEnabled\n reviewDbDate\n ...GuestReviewsFragment\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n pagination {\n currentPage\n nextURL\n totalPages\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment GuestReviewsFragment on ReviewsItem {\n genuineMsg\n tripType\n tripTypeText\n reviewDate\n reviewSubmitDate\n rating\n reviewer {\n name\n locality\n locale\n __typename\n }\n badge\n summary\n description\n __typename\n}\n",
"variables": {
"hotelId": "344560",
"reviewOrder": "date_newest_first",
"reviewType": "brand",
"tripTypeFilter": "all"
}
}
r = await client.post('https://fr.hotels.com/kes/graphql', json=data)
allin = []
for x in r.json()['data']['reviews']['body']['reviewContent']['reviews']['hermes']['groups']:
for i in x['items']:
allin.append(i)
df = pd.DataFrame(allin)
print(df)
if __name__ == "__main__":
trio.run(main)
我有两个与此相关的问题:
上面的代码如何,因为上面的代码无法提取所有评论,所以它只提取了 50 条评论,我想了解如何在这里读取 POST 查询
如何修改 expedia.com 的代码 我尝试了上面的 expedia.com 方法,但它不起作用,在这里:
import trio
import httpx
import pandas as pd
async def main():
async with httpx.AsyncClient(timeout=None) as client:
data = {
"operationName": "reviewsQuery",
"query": "query reviewsQuery($hotelId: String!, $reviewType: String, $reviewOrder: String, $tripTypeFilter: String, $paginationURL: String) {\n reviews(\n hotelId: $hotelId\n reviewType: $reviewType\n reviewOrder: $reviewOrder\n tripTypeFilter: $tripTypeFilter\n paginationURL: $paginationURL\n ) {\n body {\n reviewContent {\n filters {\n type\n name\n count\n url\n __typename\n }\n overall {\n selectedFilterType\n rating\n badgeText\n total\n scores {\n score\n count\n url\n __typename\n }\n ratingAspects {\n cleanliness\n service\n comfort\n condition\n neighbourhood\n __typename\n }\n whatGuestsSay {\n type\n text\n __typename\n }\n topRated {\n category\n explanation\n __typename\n }\n __typename\n }\n sort {\n url\n options {\n value\n label\n __typename\n }\n __typename\n }\n reviews {\n hermes {\n groups {\n separatorText\n items {\n itineraryId\n brand\n googleTranslateEnabled\n reviewDbDate\n ...GuestReviewsFragment\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n pagination {\n currentPage\n nextURL\n totalPages\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment GuestReviewsFragment on ReviewsItem {\n genuineMsg\n tripType\n tripTypeText\n reviewDate\n reviewSubmitDate\n rating\n reviewer {\n name\n locality\n locale\n __typename\n }\n badge\n summary\n description\n __typename\n}\n",
"variables": {
"hotelId": "344560",
"reviewOrder": "date_newest_first",
"reviewType": "brand",
"tripTypeFilter": "all"
}
}
r = await client.post('https://fr.expedia.com/kes/graphql', json=data)
allin = []
for x in r.json()['data']['reviews']['body']['reviewContent']['reviews']['hermes']['groups']:
for i in x['items']:
allin.append(i)
df = pd.DataFrame(allin)
print(df)
if __name__ == "__main__":
trio.run(main)
这是错误 gaierror Traceback(最近一次调用最后一次)
/usr/local/lib/python3.10/dist-packages/httpcore/_exceptions.py 在map_exceptions(地图) 9 尝试: ---> 10 产量 11 除了异常作为 exc:# noqa:PIE786
31帧
gaierror:[Errno -2] 名称或服务未知
上述异常是导致以下异常的直接原因:
ConnectError Traceback(最近一次调用最后)
连接错误:[Errno -2] 名称或服务未知
上述异常是导致以下异常的直接原因:
ConnectError Traceback(最近一次调用最后)
[... skipping hidden 1 frame]
/usr/local/lib/python3.10/dist-packages/httpx/_transports/default.py 在map_httpcore_exceptions() 75 76 消息 = str(exc) ---> 77 从 exc 引发mapped_exc(消息) 78 79
连接错误:[Errno -2] 名称或服务未知
这是我尝试过的另一个代码,显示 requests.exceptions.HTTPError: 429 Client Error: Too Many Requests for url: https://www.expedia.com/graphql
import requests
import pandas as pd
def main():
headers = {
"Content-Type": "application/json",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0"
}
data = [
{
"operationName": "PropertyFilteredReviewsQuery",
"variables": {
"context": {
"siteId": 1,
"locale": "en_US",
"eapid": 0,
"currency": "USD",
"device": {
"type": "DESKTOP"
},
"identity": {
"duaid": "-1",
"expUserId": "832921361",
"tuid": "-1",
"authState": "ANONYMOUS"
},
"privacyTrackingState": "CAN_TRACK",
"debugContext": {
"abacusOverrides": [],
"alterMode": "RELEASED"
}
},
"propertyId": "24625",
"searchCriteria": {
"primary": {
"dateRange": None,
"rooms": [{"adults": 2}],
"destination": {"regionId": "178305"}
},
"secondary": {
"booleans": [
{"id": "includeRecentReviews", "value": True},
{"id": "includeRatingsOnlyReviews", "value": True},
{"id": "overrideEmbargoForIndividualReviews", "value": True}
],
"counts": [{"id": "startIndex", "value": 0}, {"id": "size", "value": 10}],
"selections": [{"id": "sortBy", "value": "NEWEST_TO_OLDEST_BY_LANGUAGE"}, {"id": "searchTerm", "value": ""}]
}
}
},
"query": "query PropertyFilteredReviewsQuery($context: ContextInput!, $propertyId: String!, $searchCriteria: PropertySearchCriteriaInput!) {\n propertyReviewSummaries(\n context: $context\n propertyIds: [$propertyId]\n searchCriteria: $searchCriteria\n ) {\n ...__PropertyReviewSummaryFragment\n __typename\n }\n propertyInfo(context: $context, propertyId: $propertyId) {\n id\n reviewInfo(searchCriteria: $searchCriteria) {\n ...__PropertyReviewsListFragment\n sortAndFilter {\n ...TravelerTypeFragment\n ...SortTypeFragment\n ...SearchTextFragment\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\n... (rest of the GraphQL query) ..."
}
]
response = requests.post("https://www.expedia.com/graphql", json=data, headers=headers)
response.raise_for_status()
allin = []
for x in response.json()[0]['data']['propertyInfo']['reviewInfo']['reviews']['content']['reviews']:
allin.append(x)
df = pd.DataFrame(allin)
print(df)
if __name__ == "__main__":
main()
这对我有用,但我没有研究 client-info 和 duaid 是如何生成的。
import requests
import pandas as pd
def main():
headers = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
'Client-Info': "blossom-flex-ui,9382ef788e9311fcea3ce7a7b749cd68c4059a45,us-west-2", # not sure where this comes from just took it from browser dev tools
}
data = [
{
"operationName": "PropertyFilteredReviewsQuery",
"variables": {
"context": {
"siteId": 1,
"locale": "en_US",
"eapid": 0,
"currency": "USD",
"device": {
"type": "DESKTOP"
},
"identity": {
"duaid": "1003be2b-6834-4cf8-bb66-66a49107b76c", # not sure where this comes from just took it from browser dev tools
"expUserId": "-1",
"tuid": "-1",
"authState": "ANONYMOUS"
},
"privacyTrackingState": "CAN_TRACK",
"debugContext": {
"abacusOverrides": [],
"alterMode": "RELEASED"
}
},
"propertyId": "24625",
"searchCriteria": {
"primary": {
"dateRange": None,
"rooms": [
{
"adults": 2
}
],
"destination": {
"regionId": "178305"
}
},
"secondary": {
"booleans": [
{
"id": "includeRecentReviews",
"value": True
},
{
"id": "includeRatingsOnlyReviews",
"value": True
},
{
"id": "overrideEmbargoForIndividualReviews",
"value": True
}
],
"counts": [
{
"id": "startIndex",
"value": 0
},
{
"id": "size",
"value": 10
}
],
"selections": [
{
"id": "sortBy",
"value": "NEWEST_TO_OLDEST_BY_LANGUAGE"
},
{
"id": "searchTerm",
"value": ""
}
]
}
}
},
"query": "query PropertyFilteredReviewsQuery($context: ContextInput!, $propertyId: String!, $searchCriteria: PropertySearchCriteriaInput!) {\n propertyReviewSummaries(\n context: $context\n propertyIds: [$propertyId]\n searchCriteria: $searchCriteria\n ) {\n ...__PropertyReviewSummaryFragment\n __typename\n }\n propertyInfo(context: $context, propertyId: $propertyId) {\n id\n reviewInfo(searchCriteria: $searchCriteria) {\n ...__PropertyReviewsListFragment\n sortAndFilter {\n ...TravelerTypeFragment\n ...SortTypeFragment\n ...SearchTextFragment\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment __PropertyReviewSummaryFragment on PropertyReviewSummary {\n accessibilityLabel\n overallScoreWithDescriptionA11y {\n ...LodgingEnrichedMessageFragment\n __typename\n }\n propertyReviewCountDetails {\n fullDescription\n __typename\n }\n ...ReviewDisclaimerFragment\n reviewSummaryDetails {\n label\n ratingPercentage\n formattedRatingOutOfMax\n __typename\n }\n totalCount {\n raw\n __typename\n }\n __typename\n}\n\nfragment ReviewDisclaimerFragment on PropertyReviewSummary {\n reviewDisclaimer\n reviewDisclaimerHeading\n strategy\n reviewDisclaimerValues {\n text\n __typename\n }\n reviewDisclaimerLabel\n reviewDisclaimerAnalytics {\n referrerId\n linkName\n __typename\n }\n reviewDisclaimerUrl {\n value\n accessibilityLabel\n link {\n url\n __typename\n }\n __typename\n }\n reviewDisclaimerAccessibilityLabel\n __typename\n}\n\nfragment LodgingEnrichedMessageFragment on LodgingEnrichedMessage {\n __typename\n subText\n value\n theme\n state\n accessibilityLabel\n icon {\n id\n size\n theme\n __typename\n }\n mark {\n id\n __typename\n }\n egdsMark {\n url {\n value\n __typename\n }\n __typename\n }\n}\n\nfragment __PropertyReviewsListFragment on PropertyReviews {\n summary {\n paginateAction {\n text\n analytics {\n referrerId\n linkName\n __typename\n }\n __typename\n }\n __typename\n }\n reviews {\n contentDirectFeedbackPromptId\n ...ReviewParentFragment\n managementResponses {\n ...ReviewChildFragment\n __typename\n }\n reviewInteractionSections {\n primaryDisplayString\n reviewInteractionType\n __typename\n }\n __typename\n }\n ...NoResultsMessageFragment\n __typename\n}\n\nfragment ReviewParentFragment on PropertyReview {\n id\n superlative\n locale\n title\n brandType\n reviewScoreWithDescription {\n label\n value\n __typename\n }\n text\n seeMoreAnalytics {\n linkName\n referrerId\n __typename\n }\n submissionTime {\n longDateFormat\n __typename\n }\n impressionAnalytics {\n event\n referrerId\n __typename\n }\n themes {\n ...ReviewThemeFragment\n __typename\n }\n reviewFooter {\n ...PropertyReviewFooterSectionFragment\n __typename\n }\n ...FeedbackIndicatorFragment\n ...AuthorFragment\n ...PhotosFragment\n ...TravelersFragment\n ...ReviewTranslationInfoFragment\n ...PropertyReviewSourceFragment\n ...PropertyReviewRegionFragment\n __typename\n}\n\nfragment AuthorFragment on PropertyReview {\n reviewAuthorAttribution {\n text\n __typename\n }\n __typename\n}\n\nfragment PhotosFragment on PropertyReview {\n id\n photoSection {\n imageClickAnalytics {\n referrerId\n linkName\n __typename\n }\n exitAnalytics {\n referrerId\n linkName\n __typename\n }\n navClickAnalytics {\n referrerId\n linkName\n __typename\n }\n __typename\n }\n photos {\n description\n url\n __typename\n }\n __typename\n}\n\nfragment TravelersFragment on PropertyReview {\n travelers\n __typename\n}\n\nfragment ReviewThemeFragment on ReviewThemes {\n icon {\n id\n __typename\n }\n label\n __typename\n}\n\nfragment FeedbackIndicatorFragment on PropertyReview {\n reviewInteractionSections {\n primaryDisplayString\n accessibilityLabel\n reviewInteractionType\n feedbackAnalytics {\n linkName\n referrerId\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment ReviewTranslationInfoFragment on PropertyReview {\n translationInfo {\n loadingTranslationText\n targetLocale\n translatedBy {\n description\n __typename\n }\n translationCallToActionLabel\n seeOriginalText\n __typename\n }\n __typename\n}\n\nfragment PropertyReviewSourceFragment on PropertyReview {\n propertyReviewSource {\n accessibilityLabel\n graphic {\n description\n id\n size\n token\n url {\n value\n __typename\n }\n __typename\n }\n text {\n value\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment PropertyReviewRegionFragment on PropertyReview {\n reviewRegion {\n id\n __typename\n }\n __typename\n}\n\nfragment PropertyReviewFooterSectionFragment on PropertyReviewFooterSection {\n messages {\n seoStructuredData {\n itemscope\n itemprop\n itemtype\n content\n __typename\n }\n text {\n ... on EGDSPlainText {\n text\n __typename\n }\n ... on EGDSGraphicText {\n text\n graphic {\n ... on Mark {\n description\n id\n size\n url {\n ... on HttpURI {\n relativePath\n value\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment ReviewChildFragment on ManagementResponse {\n id\n header {\n text\n __typename\n }\n response\n __typename\n}\n\nfragment NoResultsMessageFragment on PropertyReviews {\n noResultsMessage {\n __typename\n ...MessagingCardFragment\n ...EmptyStateFragment\n }\n __typename\n}\n\nfragment MessagingCardFragment on UIMessagingCard {\n graphic {\n __typename\n ... on Icon {\n id\n description\n __typename\n }\n }\n primary\n secondaries\n __typename\n}\n\nfragment EmptyStateFragment on UIEmptyState {\n heading\n body\n __typename\n}\n\nfragment TravelerTypeFragment on SortAndFilterViewModel {\n sortAndFilter {\n name\n label\n options {\n label\n isSelected\n optionValue\n description\n clickAnalytics {\n linkName\n referrerId\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment SortTypeFragment on SortAndFilterViewModel {\n sortAndFilter {\n name\n label\n clickAnalytics {\n linkName\n referrerId\n __typename\n }\n options {\n label\n isSelected\n optionValue\n description\n clickAnalytics {\n linkName\n referrerId\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment SearchTextFragment on SortAndFilterViewModel {\n sortAndFilter {\n name\n label\n graphic {\n ... on Icon {\n description\n id\n token\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n}\n"
}
]
response = requests.post("https://www.expedia.com/graphql", json=data, headers=headers)
response.raise_for_status()
allin = pd.DataFrame()
for x in response.json()[0]['data']['propertyInfo']['reviewInfo']['reviews']:
review = {
'title': x['title'],
'superlative': x['superlative'],
'text': x['text'],
}
allin = pd.concat([allin, pd.DataFrame([review])])
print(allin)
if __name__ == "__main__":
main()