如何在 Python 请求时返回正确的 json。发布到 joom.com 以显示更多产品?

问题描述 投票:0回答:0

当访问 joom.com 的任何类别时,需要按“显示更多”按钮加载下一页,我尝试使用 Python

requests.post
它的 ajax api url 来获取英文 json,但是,它只返回
api.malformed_json 
或不可读的 utf-8 代码(似乎是机器人验证码)。

你能帮帮我吗?我的代码有什么问题?

import re
import requests
from bs4
import BeautifulSoup
import json
from jsoncomment
import JsonComment

header1 = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'zh-CN,zh;q=0.9',
    'upgrade-insecure-requests': '1',
}

url = 'https://www.joom.com/en/search/c.1663522401361498522-50-2-30725-1284316286'
proxies = {
    'http': 'socks5://127.0.0.1:10808',
    'https': 'socks5://127.0.0.1:10808'
}

result = requests.get(
    url = url,
    headers = header1,
    proxies = proxies
)

cookies_dict = requests.utils.dict_from_cookiejar(result.cookies)
soup = BeautifulSoup(result.text, 'html.parser')
script_tag = soup.find('script', {
    'nonce': True
})
if script_tag:
    nonce = script_tag['nonce']
print(nonce)
print(script_tag)
else :
    print('No script tag with nonce found.')
find_renderingConfig = re.compile(r '.*window\.__renderingConfig=(.*?);')

renderingConfig = re.findall(find_renderingConfig, str(script_tag))
renderid = ''
if len(renderingConfig):
    parser = JsonComment(json)
r2 = parser.loads(renderingConfig[0])
print(f 'renderingConfig:{r2["id"]}')
renderid = r2["id"]

findNextPageToken = re.compile(r '.*window\.__data=(.*?);')
windowdata = re.findall(findNextPageToken, str(script_tag))
nextPageToken = ''

if len(windowdata):
    parser = JsonComment(json)
data_json = parser.loads(windowdata[0])
nextPageToken = data_json['search']['products']['data']['nextPageToken']
print(f 'nextPageToken={nextPageToken}')

import subprocess

js_code = ""
"
const o = (a = [572, 551, 778, 660, 394, 162, 251, 972, 586, 60, 3, 637, 732, 974, 807, 603, 476, 756, 167, 602, 256, 493, 475, 514, 931, 642, 642, 702, 862, 150, 282, 331, 367, 305, 602, 247, 171, 721, 180, 384, 431, 135, 389, 225, 424, 934, 549, 364, 484, 797, 467, 52, 601, 292, 306, 466, 358, 795, 988, 685, 761, 970], [705, 685, 913, 796, 531, 300, 390, 1112, 727, 202, 146, 781, 877, 1120, 954, 751, 625, 906, 318, 754, 409, 647, 630, 670, 1088, 800, 743, 804, 965, 254, 387, 437, 474, 413, 711, 357, 282, 833, 293, 498, 546, 251, 506, 343, 543, 1054, 670, 486, 607, 921, 592, 178, 685, 377, 392, 553, 446, 884, 1078, 776, 853, 1063].map((e => e - 36)).map(((e, s) => String.fromCharCode(e - a[s]))).join(""));
var a;
const g = () => Array.from({
    length: 16
}).map((() => Math.round(61 * Math.random() + 0)));

function nl() {
    const e = g();
    return [e.map((e => o[e])).join(""), [410, 410, 457, 411, 426, 402, 404, 460, 459, 413, 440, 416, 449, 434, 444, 428].map((e => e - 402)).map(((s, i) => s + e[i])).map((e => o[e % 62])).join("")]
}

const [e, n] = (0, nl)();
result = `${e}${n}`
""
"

process = subprocess.Popen(['node', '-p', js_code], stdout = subprocess.PIPE)
output, error = process.communicate()

x_api_token = output.decode("utf-8").strip()
print(x_api_token)

header2 = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
    'accept': '*/*',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'zh-CN,zh;q=0.9',
    'content-length': '245',
    'content-type': 'application/json',
    'origin': 'https://www.joom.com',
    'referer': 'https://www.joom.com/en/search/c.1663522401361498522-50-2-30725-1284316286',
    'authorization': f 'Bearer {cookies_dict["accesstoken"]}',
    'x-api-token': x_api_token,
    #'x-api-token': 'FY4kzc4NTMclZfMuN6XtXc6JOXOzKLsU',
    'x-ostype': 'windows',
    'x-rendering-id': renderid,
    'x-request-path': '/en/search/c.1663522401361498522-50-2-30725-1284316286',
    'x-version': '4.8.5-1680785765',
}

import random
import string

random_string = ''.join(random.choices(string.ascii_letters + string.digits, k = 4))
print(f 'lgan{random_string}')

url2 = f 'https://www.joom.com/api/1.1/search/content?language=en-US&_lgan{random_string}'
data = {
    "filters": [{
        "id": "categoryId",
        "value": {
            "type": "categories",
            "items": [{
                "id": "1663522401361498522-50-2-30725-1284316286"
            }]
        }
    }],
    "appearance": {
        "productColumns": 12
    },
    "count": 36,
    "pageToken": nextPageToken
}

result2 = requests.post(
    url = url2,
    headers = header2,
    proxies = proxies,
    json = json.dumps(data), #cookies = result.cookies, #json = {
        'json_payload': data
    }
)

print(result2.text)
if result2.status_code == 200:
    print('meet 200')

result2.text展示 �{“类型”:“api.malformed_json”}

ajax
© www.soinside.com 2019 - 2024. All rights reserved.