我一直在使用
selenium-webdriver
npm 包通过受保护的 squid
代理(此处已清理)导航到页面:
'use strict';
const { Builder } = require('selenium-webdriver'),
proxy = require('selenium-webdriver/proxy'),
proxyAddr = '<IP:PORT>',
chrome = require('selenium-webdriver/chrome');
(async () => {
try {
const chromeOptions = new chrome.Options(),
driverProxyConfig = proxy.manual({
http: proxyAddr,
https: proxyAddr,
bypass: ['accounts.google.com']
});
chromeOptions.setProxy(driverProxyConfig);
const driver = await new Builder()
.forBrowser('chrome')
.setChromeOptions(chromeOptions)
.build(),
pageCdpConnection = await driver.createCDPConnection('page');
await driver.register('<username>', '<password>', pageCdpConnection);
await driver.get('https://www.selenium.dev/');
await driver.sleep(10_000);
await driver.quit();
} catch (err) {
console.error(err);
}
})();
示例基于官方文档中的一个片段。
selenium-webdriver
:4.8.1当运行这样的脚本(使用正确的代理地址和凭证值)并查看
squid
访问日志时,在会话开始时,我总是看到至少 两个连续的未经身份验证的请求(在这种情况下,到 www.selenium.dev
)像下面这样(消毒):
2023-03-20 10:59:51,580 0 <client-ip> TCP_DENIED/407 4284 CONNECT www.selenium.dev:443 - HIER_NONE/- text/html
2023-03-20 10:59:51,584 0 <client-ip> TCP_DENIED/407 4284 CONNECT www.selenium.dev:443 - HIER_NONE/- text/html
2023-03-20 10:59:51,954 1 <client-ip> TCP_TUNNEL/200 39 CONNECT cdn.jsdelivr.net:443 <username> HIER_DIRECT/146.75.121.229 -
2023-03-20 10:59:52,017 59 <client-ip> TCP_TUNNEL/200 4890 CONNECT fonts.googleapis.com:443 <username> HIER_DIRECT/142.250.186.42 -
2023-03-20 10:59:52,017 60 <client-ip> TCP_TUNNEL/200 4891 CONNECT fonts.googleapis.com:443 <username> HIER_DIRECT/142.250.186.42 -
2023-03-20 10:59:52,438 312 <client-ip> TCP_TUNNEL/200 6586 CONNECT www.paypal.com:443 <username> HIER_DIRECT/146.75.121.21 -
2023-03-20 10:59:52,750 97 <client-ip> TCP_TUNNEL/200 6505 CONNECT www.paypalobjects.com:443 <username> HIER_DIRECT/192.229.221.25 -
2023-03-20 11:00:02,801 10908 <client-ip> TCP_TUNNEL/200 39117 CONNECT cdn.jsdelivr.net:443 <username> HIER_DIRECT/146.75.121.229 -
2023-03-20 11:00:02,801 10674 <client-ip> TCP_TUNNEL/200 5150 CONNECT www.netlify.com:443 <username> HIER_DIRECT/3.74.73.235 -
2023-03-20 11:00:02,808 10314 <client-ip> TCP_TUNNEL/200 3455 CONNECT www.netlify.com:443 <username> HIER_DIRECT/3.74.73.235 -
2023-03-20 11:00:02,808 10860 <client-ip> TCP_TUNNEL/200 9388 CONNECT fonts.googleapis.com:443 <username> HIER_DIRECT/142.250.186.42 -
2023-03-20 11:00:02,808 11188 <client-ip> TCP_TUNNEL/200 572173 CONNECT www.selenium.dev:443 <username> HIER_DIRECT/185.199.110.153 -
2023-03-20 11:00:02,809 10683 <client-ip> TCP_TUNNEL/200 6651 CONNECT plausible.io:443 <username> HIER_DIRECT/169.150.247.34 -
2023-03-20 11:00:02,809 10939 <client-ip> TCP_TUNNEL/200 43293 CONNECT code.jquery.com:443 <username> HIER_DIRECT/69.16.175.42 -
2023-03-20 11:00:02,809 10683 <client-ip> TCP_TUNNEL/200 32814 CONNECT fonts.gstatic.com:443 <username> HIER_DIRECT/142.250.186.163 -
2023-03-20 11:00:02,809 10683 <client-ip> TCP_TUNNEL/200 8110 CONNECT www.paypal.com:443 <username> HIER_DIRECT/146.75.121.21 -
2023-03-20 11:00:02,809 10161 <client-ip> TCP_TUNNEL/200 10864 CONNECT www.paypalobjects.com:443 <username> HIER_DIRECT/192.229.221.25 -
在我的特殊情况下,看到一个并且只有一个初始未经身份验证的请求是至关重要的。有办法实现吗?