在 expedia.com 中处理 POST 请求

问题描述 投票:0回答:1

我遇到了这个post,这个问题的答案显示了如何处理hotels.com的POST请求

这是由 αdɱyi ud αмєяιcαη

制作的代码
import trio
import httpx
import pandas as pd


async def main():
    async with httpx.AsyncClient(timeout=None) as client:
        data = {
            "operationName": "reviewsQuery",
            "query": "query reviewsQuery($hotelId: String!, $reviewType: String, $reviewOrder: String, $tripTypeFilter: String, $paginationURL: String) {\n  reviews(\n    hotelId: $hotelId\n    reviewType: $reviewType\n    reviewOrder: $reviewOrder\n    tripTypeFilter: $tripTypeFilter\n    paginationURL: $paginationURL\n  ) {\n    body {\n      reviewContent {\n        filters {\n          type\n          name\n          count\n          url\n          __typename\n        }\n        overall {\n          selectedFilterType\n          rating\n          badgeText\n          total\n          scores {\n            score\n            count\n            url\n            __typename\n          }\n          ratingAspects {\n            cleanliness\n            service\n            comfort\n            condition\n            neighbourhood\n            __typename\n          }\n          whatGuestsSay {\n            type\n            text\n            __typename\n          }\n          topRated {\n            category\n            explanation\n            __typename\n          }\n          __typename\n        }\n        sort {\n          url\n          options {\n            value\n            label\n            __typename\n          }\n          __typename\n        }\n        reviews {\n          hermes {\n            groups {\n              separatorText\n              items {\n                itineraryId\n                brand\n                googleTranslateEnabled\n                reviewDbDate\n                ...GuestReviewsFragment\n                __typename\n              }\n              __typename\n            }\n            __typename\n          }\n          __typename\n        }\n        pagination {\n          currentPage\n          nextURL\n          totalPages\n          __typename\n        }\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n}\n\nfragment GuestReviewsFragment on ReviewsItem {\n  genuineMsg\n  tripType\n  tripTypeText\n  reviewDate\n  reviewSubmitDate\n  rating\n  reviewer {\n    name\n    locality\n    locale\n    __typename\n  }\n  badge\n  summary\n  description\n  __typename\n}\n",
            "variables": {
                "hotelId": "344560",
                "reviewOrder": "date_newest_first",
                "reviewType": "brand",
                "tripTypeFilter": "all"
            }
        }
        r = await client.post('https://fr.hotels.com/kes/graphql', json=data)
        allin = []
        for x in r.json()['data']['reviews']['body']['reviewContent']['reviews']['hermes']['groups']:
            for i in x['items']:
                allin.append(i)
        df = pd.DataFrame(allin)
        print(df)


if __name__ == "__main__":
    trio.run(main)

我有两个与此相关的问题:

  1. 上面的代码如何,因为上面的代码无法提取所有评论,所以它只提取了 50 条评论,我想了解如何在这里读取 POST 查询

如何修改 expedia.com 的代码 我尝试了上面的 expedia.com 方法,但它不起作用,在这里:

import trio
import httpx
import pandas as pd


async def main():
    async with httpx.AsyncClient(timeout=None) as client:
        data = {
            "operationName": "reviewsQuery",
            "query": "query reviewsQuery($hotelId: String!, $reviewType: String, $reviewOrder: String, $tripTypeFilter: String, $paginationURL: String) {\n  reviews(\n    hotelId: $hotelId\n    reviewType: $reviewType\n    reviewOrder: $reviewOrder\n    tripTypeFilter: $tripTypeFilter\n    paginationURL: $paginationURL\n  ) {\n    body {\n      reviewContent {\n        filters {\n          type\n          name\n          count\n          url\n          __typename\n        }\n        overall {\n          selectedFilterType\n          rating\n          badgeText\n          total\n          scores {\n            score\n            count\n            url\n            __typename\n          }\n          ratingAspects {\n            cleanliness\n            service\n            comfort\n            condition\n            neighbourhood\n            __typename\n          }\n          whatGuestsSay {\n            type\n            text\n            __typename\n          }\n          topRated {\n            category\n            explanation\n            __typename\n          }\n          __typename\n        }\n        sort {\n          url\n          options {\n            value\n            label\n            __typename\n          }\n          __typename\n        }\n        reviews {\n          hermes {\n            groups {\n              separatorText\n              items {\n                itineraryId\n                brand\n                googleTranslateEnabled\n                reviewDbDate\n                ...GuestReviewsFragment\n                __typename\n              }\n              __typename\n            }\n            __typename\n          }\n          __typename\n        }\n        pagination {\n          currentPage\n          nextURL\n          totalPages\n          __typename\n        }\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n}\n\nfragment GuestReviewsFragment on ReviewsItem {\n  genuineMsg\n  tripType\n  tripTypeText\n  reviewDate\n  reviewSubmitDate\n  rating\n  reviewer {\n    name\n    locality\n    locale\n    __typename\n  }\n  badge\n  summary\n  description\n  __typename\n}\n",
            "variables": {
                "hotelId": "344560",
                "reviewOrder": "date_newest_first",
                "reviewType": "brand",
                "tripTypeFilter": "all"
            }
        }
        r = await client.post('https://fr.expedia.com/kes/graphql', json=data)
        allin = []
        for x in r.json()['data']['reviews']['body']['reviewContent']['reviews']['hermes']['groups']:
            for i in x['items']:
                allin.append(i)
        df = pd.DataFrame(allin)
        print(df)


if __name__ == "__main__":
    trio.run(main)

这是错误 gaierror Traceback(最近一次调用最后一次)

/usr/local/lib/python3.10/dist-packages/httpcore/_exceptions.py 在map_exceptions(地图) 9 尝试: ---> 10 产量 11 除了异常作为 exc:# noqa:PIE786

31帧

gaierror:[Errno -2] 名称或服务未知

上述异常是导致以下异常的直接原因:

ConnectError Traceback(最近一次调用最后)

连接错误:[Errno -2] 名称或服务未知

上述异常是导致以下异常的直接原因:

ConnectError Traceback(最近一次调用最后)

[... skipping hidden 1 frame]

/usr/local/lib/python3.10/dist-packages/httpx/_transports/default.py 在map_httpcore_exceptions() 75 76 消息 = str(exc) ---> 77 从 exc 引发mapped_exc(消息) 78 79

连接错误:[Errno -2] 名称或服务未知

这是我尝试过的另一个代码,显示 requests.exceptions.HTTPError: 429 Client Error: Too Many Requests for url: https://www.expedia.com/graphql

import requests
import pandas as pd

def main():
    headers = {
        "Content-Type": "application/json",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0"
    }

    data = [
        {
            "operationName": "PropertyFilteredReviewsQuery",
            "variables": {
                "context": {
                    "siteId": 1,
                    "locale": "en_US",
                    "eapid": 0,
                    "currency": "USD",
                    "device": {
                        "type": "DESKTOP"
                    },
                    "identity": {
                        "duaid": "-1",
                        "expUserId": "832921361",
                        "tuid": "-1",
                        "authState": "ANONYMOUS"
                    },
                    "privacyTrackingState": "CAN_TRACK",
                    "debugContext": {
                        "abacusOverrides": [],
                        "alterMode": "RELEASED"
                    }
                },
                "propertyId": "24625",
                "searchCriteria": {
                    "primary": {
                        "dateRange": None,
                        "rooms": [{"adults": 2}],
                        "destination": {"regionId": "178305"}
                    },
                    "secondary": {
                        "booleans": [
                            {"id": "includeRecentReviews", "value": True},
                            {"id": "includeRatingsOnlyReviews", "value": True},
                            {"id": "overrideEmbargoForIndividualReviews", "value": True}
                        ],
                        "counts": [{"id": "startIndex", "value": 0}, {"id": "size", "value": 10}],
                        "selections": [{"id": "sortBy", "value": "NEWEST_TO_OLDEST_BY_LANGUAGE"}, {"id": "searchTerm", "value": ""}]
                    }
                }
            },
            "query": "query PropertyFilteredReviewsQuery($context: ContextInput!, $propertyId: String!, $searchCriteria: PropertySearchCriteriaInput!) {\n  propertyReviewSummaries(\n    context: $context\n    propertyIds: [$propertyId]\n    searchCriteria: $searchCriteria\n  ) {\n    ...__PropertyReviewSummaryFragment\n    __typename\n  }\n  propertyInfo(context: $context, propertyId: $propertyId) {\n    id\n    reviewInfo(searchCriteria: $searchCriteria) {\n      ...__PropertyReviewsListFragment\n      sortAndFilter {\n        ...TravelerTypeFragment\n        ...SortTypeFragment\n        ...SearchTextFragment\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n}\n\n... (rest of the GraphQL query) ..."
        }
    ]

    response = requests.post("https://www.expedia.com/graphql", json=data, headers=headers)
    response.raise_for_status()

    allin = []
    for x in response.json()[0]['data']['propertyInfo']['reviewInfo']['reviews']['content']['reviews']:
        allin.append(x)
    df = pd.DataFrame(allin)
    print(df)

if __name__ == "__main__":
    main()

python-3.x web-scraping post python-requests httpx
1个回答
0
投票

这对我有用,但我没有研究 client-info 和 duaid 是如何生成的。

import requests
import pandas as pd

def main():
    headers = {
        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
        'Client-Info': "blossom-flex-ui,9382ef788e9311fcea3ce7a7b749cd68c4059a45,us-west-2",  # not sure where this comes from just took it from browser dev tools
    }

    data = [
  {
    "operationName": "PropertyFilteredReviewsQuery",
    "variables": {
      "context": {
        "siteId": 1,
        "locale": "en_US",
        "eapid": 0,
        "currency": "USD",
        "device": {
          "type": "DESKTOP"
        },
        "identity": {
          "duaid": "1003be2b-6834-4cf8-bb66-66a49107b76c",  # not sure where this comes from just took it from browser dev tools
          "expUserId": "-1",
          "tuid": "-1",
          "authState": "ANONYMOUS"
        },
        "privacyTrackingState": "CAN_TRACK",
        "debugContext": {
          "abacusOverrides": [],
          "alterMode": "RELEASED"
        }
      },
      "propertyId": "24625",
      "searchCriteria": {
        "primary": {
          "dateRange": None,
          "rooms": [
            {
              "adults": 2
            }
          ],
          "destination": {
            "regionId": "178305"
          }
        },
        "secondary": {
          "booleans": [
            {
              "id": "includeRecentReviews",
              "value": True
            },
            {
              "id": "includeRatingsOnlyReviews",
              "value": True
            },
            {
              "id": "overrideEmbargoForIndividualReviews",
              "value": True
            }
          ],
          "counts": [
            {
              "id": "startIndex",
              "value": 0
            },
            {
              "id": "size",
              "value": 10
            }
          ],
          "selections": [
            {
              "id": "sortBy",
              "value": "NEWEST_TO_OLDEST_BY_LANGUAGE"
            },
            {
              "id": "searchTerm",
              "value": ""
            }
          ]
        }
      }
    },
    "query": "query PropertyFilteredReviewsQuery($context: ContextInput!, $propertyId: String!, $searchCriteria: PropertySearchCriteriaInput!) {\n  propertyReviewSummaries(\n    context: $context\n    propertyIds: [$propertyId]\n    searchCriteria: $searchCriteria\n  ) {\n    ...__PropertyReviewSummaryFragment\n    __typename\n  }\n  propertyInfo(context: $context, propertyId: $propertyId) {\n    id\n    reviewInfo(searchCriteria: $searchCriteria) {\n      ...__PropertyReviewsListFragment\n      sortAndFilter {\n        ...TravelerTypeFragment\n        ...SortTypeFragment\n        ...SearchTextFragment\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n}\n\nfragment __PropertyReviewSummaryFragment on PropertyReviewSummary {\n  accessibilityLabel\n  overallScoreWithDescriptionA11y {\n    ...LodgingEnrichedMessageFragment\n    __typename\n  }\n  propertyReviewCountDetails {\n    fullDescription\n    __typename\n  }\n  ...ReviewDisclaimerFragment\n  reviewSummaryDetails {\n    label\n    ratingPercentage\n    formattedRatingOutOfMax\n    __typename\n  }\n  totalCount {\n    raw\n    __typename\n  }\n  __typename\n}\n\nfragment ReviewDisclaimerFragment on PropertyReviewSummary {\n  reviewDisclaimer\n  reviewDisclaimerHeading\n  strategy\n  reviewDisclaimerValues {\n    text\n    __typename\n  }\n  reviewDisclaimerLabel\n  reviewDisclaimerAnalytics {\n    referrerId\n    linkName\n    __typename\n  }\n  reviewDisclaimerUrl {\n    value\n    accessibilityLabel\n    link {\n      url\n      __typename\n    }\n    __typename\n  }\n  reviewDisclaimerAccessibilityLabel\n  __typename\n}\n\nfragment LodgingEnrichedMessageFragment on LodgingEnrichedMessage {\n  __typename\n  subText\n  value\n  theme\n  state\n  accessibilityLabel\n  icon {\n    id\n    size\n    theme\n    __typename\n  }\n  mark {\n    id\n    __typename\n  }\n  egdsMark {\n    url {\n      value\n      __typename\n    }\n    __typename\n  }\n}\n\nfragment __PropertyReviewsListFragment on PropertyReviews {\n  summary {\n    paginateAction {\n      text\n      analytics {\n        referrerId\n        linkName\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n  reviews {\n    contentDirectFeedbackPromptId\n    ...ReviewParentFragment\n    managementResponses {\n      ...ReviewChildFragment\n      __typename\n    }\n    reviewInteractionSections {\n      primaryDisplayString\n      reviewInteractionType\n      __typename\n    }\n    __typename\n  }\n  ...NoResultsMessageFragment\n  __typename\n}\n\nfragment ReviewParentFragment on PropertyReview {\n  id\n  superlative\n  locale\n  title\n  brandType\n  reviewScoreWithDescription {\n    label\n    value\n    __typename\n  }\n  text\n  seeMoreAnalytics {\n    linkName\n    referrerId\n    __typename\n  }\n  submissionTime {\n    longDateFormat\n    __typename\n  }\n  impressionAnalytics {\n    event\n    referrerId\n    __typename\n  }\n  themes {\n    ...ReviewThemeFragment\n    __typename\n  }\n  reviewFooter {\n    ...PropertyReviewFooterSectionFragment\n    __typename\n  }\n  ...FeedbackIndicatorFragment\n  ...AuthorFragment\n  ...PhotosFragment\n  ...TravelersFragment\n  ...ReviewTranslationInfoFragment\n  ...PropertyReviewSourceFragment\n  ...PropertyReviewRegionFragment\n  __typename\n}\n\nfragment AuthorFragment on PropertyReview {\n  reviewAuthorAttribution {\n    text\n    __typename\n  }\n  __typename\n}\n\nfragment PhotosFragment on PropertyReview {\n  id\n  photoSection {\n    imageClickAnalytics {\n      referrerId\n      linkName\n      __typename\n    }\n    exitAnalytics {\n      referrerId\n      linkName\n      __typename\n    }\n    navClickAnalytics {\n      referrerId\n      linkName\n      __typename\n    }\n    __typename\n  }\n  photos {\n    description\n    url\n    __typename\n  }\n  __typename\n}\n\nfragment TravelersFragment on PropertyReview {\n  travelers\n  __typename\n}\n\nfragment ReviewThemeFragment on ReviewThemes {\n  icon {\n    id\n    __typename\n  }\n  label\n  __typename\n}\n\nfragment FeedbackIndicatorFragment on PropertyReview {\n  reviewInteractionSections {\n    primaryDisplayString\n    accessibilityLabel\n    reviewInteractionType\n    feedbackAnalytics {\n      linkName\n      referrerId\n      __typename\n    }\n    __typename\n  }\n  __typename\n}\n\nfragment ReviewTranslationInfoFragment on PropertyReview {\n  translationInfo {\n    loadingTranslationText\n    targetLocale\n    translatedBy {\n      description\n      __typename\n    }\n    translationCallToActionLabel\n    seeOriginalText\n    __typename\n  }\n  __typename\n}\n\nfragment PropertyReviewSourceFragment on PropertyReview {\n  propertyReviewSource {\n    accessibilityLabel\n    graphic {\n      description\n      id\n      size\n      token\n      url {\n        value\n        __typename\n      }\n      __typename\n    }\n    text {\n      value\n      __typename\n    }\n    __typename\n  }\n  __typename\n}\n\nfragment PropertyReviewRegionFragment on PropertyReview {\n  reviewRegion {\n    id\n    __typename\n  }\n  __typename\n}\n\nfragment PropertyReviewFooterSectionFragment on PropertyReviewFooterSection {\n  messages {\n    seoStructuredData {\n      itemscope\n      itemprop\n      itemtype\n      content\n      __typename\n    }\n    text {\n      ... on EGDSPlainText {\n        text\n        __typename\n      }\n      ... on EGDSGraphicText {\n        text\n        graphic {\n          ... on Mark {\n            description\n            id\n            size\n            url {\n              ... on HttpURI {\n                relativePath\n                value\n                __typename\n              }\n              __typename\n            }\n            __typename\n          }\n          __typename\n        }\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n  __typename\n}\n\nfragment ReviewChildFragment on ManagementResponse {\n  id\n  header {\n    text\n    __typename\n  }\n  response\n  __typename\n}\n\nfragment NoResultsMessageFragment on PropertyReviews {\n  noResultsMessage {\n    __typename\n    ...MessagingCardFragment\n    ...EmptyStateFragment\n  }\n  __typename\n}\n\nfragment MessagingCardFragment on UIMessagingCard {\n  graphic {\n    __typename\n    ... on Icon {\n      id\n      description\n      __typename\n    }\n  }\n  primary\n  secondaries\n  __typename\n}\n\nfragment EmptyStateFragment on UIEmptyState {\n  heading\n  body\n  __typename\n}\n\nfragment TravelerTypeFragment on SortAndFilterViewModel {\n  sortAndFilter {\n    name\n    label\n    options {\n      label\n      isSelected\n      optionValue\n      description\n      clickAnalytics {\n        linkName\n        referrerId\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n  __typename\n}\n\nfragment SortTypeFragment on SortAndFilterViewModel {\n  sortAndFilter {\n    name\n    label\n    clickAnalytics {\n      linkName\n      referrerId\n      __typename\n    }\n    options {\n      label\n      isSelected\n      optionValue\n      description\n      clickAnalytics {\n        linkName\n        referrerId\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n  __typename\n}\n\nfragment SearchTextFragment on SortAndFilterViewModel {\n  sortAndFilter {\n    name\n    label\n    graphic {\n      ... on Icon {\n        description\n        id\n        token\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n  __typename\n}\n"
  }
]

    response = requests.post("https://www.expedia.com/graphql", json=data, headers=headers)
    response.raise_for_status()

    allin = pd.DataFrame()
    for x in response.json()[0]['data']['propertyInfo']['reviewInfo']['reviews']:
    review = {
        'title': x['title'],
        'superlative': x['superlative'],
        'text': x['text'],
    }
    allin = pd.concat([allin, pd.DataFrame([review])])
    print(allin)

if __name__ == "__main__":
    main()
© www.soinside.com 2019 - 2024. All rights reserved.