尝试捕获页面中的所有
<a>
console.log 返回未定义,但我不明白为什么 这是
const anchors = Array.from(document.querySelectorAll(sel));
正确吗?
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
headless: false,
userDataDir: "C:\\Users\\johndoe\\AppData\\Local\\Google\\Chrome\\User Data\\Default"
});
const page = await browser.newPage();
await page.setViewport({
width: 1920,
height: 1080,
deviceScaleFactor: 1,
});
await page.goto('https://www.facebook.com/groups/632312010245152/members');
//https://github.com/puppeteer/puppeteer/blob/main/examples/search.js
let membri = await page.evaluate((sel) => {
const anchors = Array.from(document.querySelectorAll(sel));
return anchors;
}, 'a');
console.log(membri);
})();
const findLinks = await page.evaluate(() =>
Array.from(document.querySelectorAll("a")).map((info) => ({
information: info.href.split()
}))
);
links = [];
findLinks.forEach((link) => {
if (link.information.length) {
links.push(link.information);
}
});
await console.log(links);
await page.close();
return links;
不确定这是否是最优化的解决方案,但它确实有效。如果您能给我发送这段代码的清理版本,我将非常感激:)
const arrayList = await page.evaluate(() => {
const nodeListLinks = document.querySelectorAll('a'),
array = [...nodeListLinks],
list = array.map(({href}) => ({href}))
return list
})
console.log(arrayList)