当使用page.evaluation.Puppeteer错误。TypeError.无法读取null的'innerText'属性。无法读取null的 "innerText "属性。

问题描述 投票:0回答:1

请在此输入图片描述我是木偶人的新手,我想从这个页面的一个表格中刮取数据。https:/www.ewrc-results.comseason1995wrc-1.

下面是DOM中表格的截图。

Here is a screenshot of the table in the DOM.

我使用的代码如下。

const puppeteer = require('puppeteer');

async function getChampTable(year) {
    try {

        const browser = await puppeteer.launch();

        const page = await browser.newPage();

        const url = `https://www.ewrc-results.com/season/${year}/1-wrc/`;

        await page.goto(url, {waitUntil: 'domcontentloaded'});

        const driverTable = await page.evaluate(() => {
            const grabFromRow = (row, classname) => row
                .querySelector(`td.${classname}`)
                .innerText
                .trim()

            const DRIVER_ROW_SELECTOR = 'tr.table_sude'

            const data = []

            const driverRows = document.querySelectorAll(DRIVER_ROW_SELECTOR)

            for (const tr of driverRows) {
                data.push({
                    position: grabFromRow(tr, 'points-pos'),
                    name: grabFromRow(tr, 'a'),
                    pointsTotal: grabFromRow(tr, 'points-total')
                })
            }
            return data
        })

        console.log(JSON.stringify(driverTable, null, 2))

    } catch (error) {
        console.log(error)
    }
}

getChampTable(1995);

这是我收到的错误。

Error: Evaluation failed: TypeError: Cannot read property 'innerText' of null
    at grabFromRow (__puppeteer_evaluation_script__:4:5)
    at __puppeteer_evaluation_script__:16:12
    at ExecutionContext._evaluateInternal (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/ExecutionContext.js:102:19)
    at processTicksAndRejections (internal/process/task_queues.js:97:5)
    at async ExecutionContext.evaluate (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/ExecutionContext.js:33:16)
    at async getChampTable (/Users/jamescowell/Desktop/Projects/Bobble/scraper/index.js:14:23)
  -- ASYNC --
    at ExecutionContext.<anonymous> (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/helper.js:94:19)
    at DOMWorld.evaluate (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/DOMWorld.js:89:24)
    at processTicksAndRejections (internal/process/task_queues.js:97:5)
  -- ASYNC --
    at Frame.<anonymous> (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/helper.js:94:19)
    at Page.evaluate (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/Page.js:591:14)
    at Page.<anonymous> (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/helper.js:95:27)
    at getChampTable (/Users/jamescowell/Desktop/Projects/Bobble/scraper/index.js:14:34)
    at processTicksAndRejections (internal/process/task_queues.js:97:5)

任何帮助将是非常感激的!

javascript node.js dom web-scraping puppeteer
1个回答
0
投票

tr.table_sude 在页面上的一些表格中,但不是所有的表格都有需要的子选择器。你需要把选择器做得更具体。(另外还需要对提取球员名字进行一些修正)。

'use strict';

const puppeteer = require('puppeteer');

async function getChampTable(year) {
    try {
        const browser = await puppeteer.launch();
        const page = await browser.newPage();
        const url = `https://www.ewrc-results.com/season/${year}/1-wrc/`;
        await page.goto(url, {waitUntil: 'domcontentloaded'});

        const driverTable = await page.evaluate(() => {
            const grabFromRow = (row, classname) => row
                .querySelector(classname)
                .innerText
                .trim()

            const DRIVER_ROW_SELECTOR = 'div#points + table tr.table_sude'
            const data = []
            const driverRows = document.querySelectorAll(DRIVER_ROW_SELECTOR)

            for (const tr of driverRows) {
                data.push({
                    position: grabFromRow(tr, 'td.points-pos'),
                    name: grabFromRow(tr, 'a'),
                    pointsTotal: grabFromRow(tr, 'td.points-total')
                })
                console.log(data);
            }
            return data
        })
        console.log(JSON.stringify(driverTable, null, 2))
    } catch (error) {
        console.log(error)
    }
}

getChampTable(1995);
© www.soinside.com 2019 - 2024. All rights reserved.