无法产生包含我希望获得的地址的结果

问题描述 投票:0回答:1

我正在尝试使用此网站中的请求模块和BeautifulSoup库创建一个脚本,该脚本将执行以下操作:

选择Strata plan number按钮,在输入框中输入11,然后点击search按钮。最后,从结果中抓取地址。

运行脚本后,当我验证结果时,我没有看到其中的地址。

import re
import requests
from bs4 import BeautifulSoup

link = 'https://www.nsw.gov.au/housing-and-construction/strata/strata-search'
url = 'https://www.stratahub.nsw.gov.au/prweb/PRAuth/app/ssr_4380/6nxCgYjOTS_fVOVfeekVPA*/!SchemeSearch?pzTransactionId=cc5ddc1ecec1c095231675db14450f87&pzFromFrame=&pzPrimaryPageName=pyDisplayHarness&AJAXTrackID=22'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
    'accept': '*/*',
    'accept-encoding': 'gzip, deflate, br, zstd',
    'accept-language': 'en-US,en;q=0.9',
    'X-Requested-With': 'XMLHttpRequest',
    'origin': 'https://www.stratahub.nsw.gov.au',
}

payload = {
    "$PSchemeSearch$pSearchBy": "Strata Plan Number",
    "$PSchemeSearch$pSchemePlanNumber": 11,
    "pzuiactionzzz": "",
    "PreActivitiesList": "",
    "sectionParam": "",
    "ActivityParams": "=",
    "$ODesktopWrapperInclude": "",
    "$ODeterminePortalTop": "",
    "$ODynamicLayout": "",
    "$ODynamicLayoutCell": "",
    "$OEvalDOMScripts_Include": "",
    "$OForm": "",
    "$OHarness": "",
    "$OHarnessStaticJSEnd": "",
    "$OHarnessStaticJSStart": "",
    "$OHarnessStaticScriptsClientValidation": "",
    "$OPMCHarnessStaticScripts": "",
    "$OSessionUser": "",
    "$OSurveyStaticScripts": "",
    "$OWorkformStyles": "",
    "$OpxAutoComplete": "",
    "$OpxButton": "",
    "$OpxDisplayText": "",
    "$OpxHarnessContent": "",
    "$OpxLayoutContainer": "",
    "$OpxNonTemplate": "",
    "$OpxRadioButtons": "",
    "$OpxSection": "",
    "$OpxVisible": "",
    "$OpxWorkArea": "",
    "$OpxWorkAreaContent": "",
    "$OpyDirtyCheckConfirm": "",
    "$OpyWorkFormStandardEnd": "",
    "$OpyWorkFormStandardStart": "",
    "$OpzAutoCompleteAGIncludes": "",
    "$OpzHarnessInlineScriptsEnd": "",
    "$OpzHarnessInlineScriptsStart": "",
    "$OpzPortalFavIcon": "",
    "$OpzPortalIcon": "",
    "$Opzpega_ui_harnesscontext": "",
    "$Opzpega_web_mashup": "",
    "$OpxTextInput": "",
    "$OpzDecimalInclude": "",
    "pyEncodedParameters": True,
    "pzKeepPageMessages": False,
    "strPHarnessClass": "Data-Portal",
    "strPHarnessPurpose": "SearchStrataScheme",
    "UITemplatingStatus": "Y",
    "StreamName": "SchemeSearch",
    "BaseReference": "SchemeSearch",
    "bClientValidation": True,
    "FormError": "NONE",
    "pyCustomError": "DisplayErrors",
    "UsingPage": True,
    "HeaderButtonSectionName": "-1",
    "PagesToRemove": "",
    "pzHarnessID": "HID387D2E2FCEE4EC200B5BAEA8C6A5D859",
    "inStandardsMode": True
}



with requests.Session() as s:
    s.headers.update(headers)
    res = s.get(link)
    soup = BeautifulSoup(res.text,"lxml")
    code_url = soup.select_one("iframe[title='Strata Search Production']")['data-src']
    s.headers['referer'] = code_url
    payload['pzuiactionzzz'] = code_url.split("?")[-1]
    r = s.post(url,data=payload)
    print(r.status_code)
    print(r.text)

如何生成包含我要查找的地址的结果?

python python-3.x web-scraping beautifulsoup python-requests
1个回答
0
投票

我认为更简单的方法是改变获取数据的策略:

当您看到结果并单击“查看地图”时,页面会发出请求,该请求要简单得多并且地址就在那里。

例如:

import requests

api_url = (
    "https://portal.spatial.nsw.gov.au/server/rest/services/StrataHub/MapServer/0/query"
)

plan = "planlabel='SP{}'"

params = {
    "f": "json",
    "where": None,
    "returnGeometry": "true",
    "spatialRel": "esriSpatialRelIntersects",
    "maxAllowableOffset": "0.00001",
    "outFields": "*",
    "outSR": "102100",
}

params["where"] = plan.format(11)  # <-- change to the number you want

data = requests.get(api_url, params=params).json()
print(data)

打印:

{
    "displayFieldName": "plannumber",
    "fieldAliases": {
        "objectid": "objectid",
        "plannumber": "plannumber",
        "registrationdate": "registrationdate",
        "shape_length": "shape_length",
        "shape_area": "shape_area",
        "address": "address",
        "suburb": "suburb",
        "lga": "lga",
        "lottotal": "lottotal",
        "postcode": "postcode",
        "planlabel": "planlabel",
        "st_area(shape)": "st_area(shape)",
        "st_perimeter(shape)": "st_perimeter(shape)",
    },
    "geometryType": "esriGeometryPolygon",
    "spatialReference": {"wkid": 102100, "latestWkid": 3857},
    "fields": [
        {"name": "objectid", "type": "esriFieldTypeOID", "alias": "objectid"},
        {"name": "plannumber", "type": "esriFieldTypeInteger", "alias": "plannumber"},
        {
            "name": "registrationdate",
            "type": "esriFieldTypeDate",
            "alias": "registrationdate",
            "length": 8,
        },
        {
            "name": "shape_length",
            "type": "esriFieldTypeDouble",
            "alias": "shape_length",
        },
        {"name": "shape_area", "type": "esriFieldTypeDouble", "alias": "shape_area"},
        {
            "name": "address",
            "type": "esriFieldTypeString",
            "alias": "address",
            "length": 255,
        },
        {
            "name": "suburb",
            "type": "esriFieldTypeString",
            "alias": "suburb",
            "length": 255,
        },
        {"name": "lga", "type": "esriFieldTypeString", "alias": "lga", "length": 255},
        {"name": "lottotal", "type": "esriFieldTypeSmallInteger", "alias": "lottotal"},
        {"name": "postcode", "type": "esriFieldTypeInteger", "alias": "postcode"},
        {
            "name": "planlabel",
            "type": "esriFieldTypeString",
            "alias": "planlabel",
            "length": 255,
        },
        {
            "name": "st_area(shape)",
            "type": "esriFieldTypeDouble",
            "alias": "st_area(shape)",
        },
        {
            "name": "st_perimeter(shape)",
            "type": "esriFieldTypeDouble",
            "alias": "st_perimeter(shape)",
        },
    ],
    "features": [
        {
            "attributes": {
                "objectid": 9,
                "plannumber": 11,
                "registrationdate": -259545600000,
                "shape_length": 0.00128814089728482,
                "shape_area": 9.29112433528216e-08,
                "address": "6 BURRANEER BAY ROAD CRONULLA",
                "suburb": "CRONULLA",
                "lga": "SUTHERLAND SHIRE",
                "lottotal": 14,
                "postcode": 2230,
                "planlabel": "SP11",
                "st_area(shape)": 9.291124335282166e-08,
                "st_perimeter(shape)": 0.001288140897284823,
            },
            "geometry": {
                "rings": [
                    [
                        [16825771.206571, -4035936.439062],
                        [16825749.918205, -4035952.800724],
                        [16825718.247838, -4035911.771859],
                        [16825739.304771, -4035895.376397],
                        [16825771.206571, -4035936.439062],
                    ]
                ]
            },
        }
    ],
}

地址在“功能”键下。

© www.soinside.com 2019 - 2024. All rights reserved.