我正在尝试抓取网站并获取仅在通过浏览器的“网络”选项卡提供的请求中可用的信息。
我发现了两种情况:
我无法在运行时获取路由,因为
page.tracing()
将所有信息保存在一个文件中,即使在生成文件之后我也无法在程序运行时读取该文件。如果我使用其他技巧,例如page.on('request', ...)
,我无法获得我想要的路线。显然并非所有路线都被捕获。
当我尝试使用带有
headless: true
的浏览器运行程序时,我显然得到一个错误:TimeoutError: waiting for target failed: timeout 30000ms exceeded
.
下面我将留下我的示例代码:
import puppeteer from "puppeteer-extra";
import StealthPlugin from "puppeteer-extra-plugin-stealth";
function holdOn(time?: number) {
time = time ?? Math.floor(Math.random() * 3000 + 1000);
return new Promise((resolve) => setTimeout(resolve, time));
}
async function crawler() {
puppeteer.use(StealthPlugin());
const browser = await puppeteer.launch({
headless: true,
defaultViewport: null,
ignoreHTTPSErrors: true,
args: [
"accept-language:en-US,en;q=0.9",
"--user-agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
],
ignoreDefaultArgs: [
"--disable-extensions",
"--disable-default-apps",
"--disable-component-extensions-with-background-pages",
],
});
const [page] = await browser.pages();
await page.tracing.start({
screenshots: true,
categories: ["devtools.timeline"],
path: "./tracing.json",
});
page.setDefaultNavigationTimeout(0);
await page.goto("http://pixbet.com/", { waitUntil: "networkidle0" });
await page.waitForSelector(".reg_login_btn_area");
const element = await page.$(".btn_general");
await element.click();
await page.waitForSelector("div#fe_login_box_popup");
await holdOn();
await page.focus('input[name="username"]');
await page.keyboard.type("user_teste_sample", { delay: 40 });
await holdOn();
await page.focus('input[name="password"]');
await page.keyboard.type("P4$$W0RD_S4MPL3", { delay: 100 });
await page.click("div.fhtxt > button");
await page.waitForNavigation({
waitUntil: "networkidle0",
});
await page.setRequestInterception(true);
page.on("request", (request) => {
console.log(">>", request.method(), request.url());
request.continue();
});
await page.goto("https://pixbet.com/casino/game/35423-live-spaceman", {
waitUntil: "networkidle0",
timeout: 0,
});
await page.tracing.stop();
console.log("Finish");
await page.close();
await browser.close();
}
crawler();