请在此输入图片描述我是木偶人的新手,我想从这个页面的一个表格中刮取数据。https:/www.ewrc-results.comseason1995wrc-1.
下面是DOM中表格的截图。
我使用的代码如下。
const puppeteer = require('puppeteer');
async function getChampTable(year) {
try {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const url = `https://www.ewrc-results.com/season/${year}/1-wrc/`;
await page.goto(url, {waitUntil: 'domcontentloaded'});
const driverTable = await page.evaluate(() => {
const grabFromRow = (row, classname) => row
.querySelector(`td.${classname}`)
.innerText
.trim()
const DRIVER_ROW_SELECTOR = 'tr.table_sude'
const data = []
const driverRows = document.querySelectorAll(DRIVER_ROW_SELECTOR)
for (const tr of driverRows) {
data.push({
position: grabFromRow(tr, 'points-pos'),
name: grabFromRow(tr, 'a'),
pointsTotal: grabFromRow(tr, 'points-total')
})
}
return data
})
console.log(JSON.stringify(driverTable, null, 2))
} catch (error) {
console.log(error)
}
}
getChampTable(1995);
这是我收到的错误。
Error: Evaluation failed: TypeError: Cannot read property 'innerText' of null
at grabFromRow (__puppeteer_evaluation_script__:4:5)
at __puppeteer_evaluation_script__:16:12
at ExecutionContext._evaluateInternal (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/ExecutionContext.js:102:19)
at processTicksAndRejections (internal/process/task_queues.js:97:5)
at async ExecutionContext.evaluate (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/ExecutionContext.js:33:16)
at async getChampTable (/Users/jamescowell/Desktop/Projects/Bobble/scraper/index.js:14:23)
-- ASYNC --
at ExecutionContext.<anonymous> (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/helper.js:94:19)
at DOMWorld.evaluate (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/DOMWorld.js:89:24)
at processTicksAndRejections (internal/process/task_queues.js:97:5)
-- ASYNC --
at Frame.<anonymous> (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/helper.js:94:19)
at Page.evaluate (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/Page.js:591:14)
at Page.<anonymous> (/Users/jamescowell/Desktop/Projects/Bobble/scraper/node_modules/puppeteer/lib/helper.js:95:27)
at getChampTable (/Users/jamescowell/Desktop/Projects/Bobble/scraper/index.js:14:34)
at processTicksAndRejections (internal/process/task_queues.js:97:5)
任何帮助将是非常感激的!
有 tr.table_sude
在页面上的一些表格中,但不是所有的表格都有需要的子选择器。你需要把选择器做得更具体。(另外还需要对提取球员名字进行一些修正)。
'use strict';
const puppeteer = require('puppeteer');
async function getChampTable(year) {
try {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const url = `https://www.ewrc-results.com/season/${year}/1-wrc/`;
await page.goto(url, {waitUntil: 'domcontentloaded'});
const driverTable = await page.evaluate(() => {
const grabFromRow = (row, classname) => row
.querySelector(classname)
.innerText
.trim()
const DRIVER_ROW_SELECTOR = 'div#points + table tr.table_sude'
const data = []
const driverRows = document.querySelectorAll(DRIVER_ROW_SELECTOR)
for (const tr of driverRows) {
data.push({
position: grabFromRow(tr, 'td.points-pos'),
name: grabFromRow(tr, 'a'),
pointsTotal: grabFromRow(tr, 'td.points-total')
})
console.log(data);
}
return data
})
console.log(JSON.stringify(driverTable, null, 2))
} catch (error) {
console.log(error)
}
}
getChampTable(1995);