我正在为blur.io 上的nft 贷款数据构建一个selenium 网络抓取工具,并且它在NON Headless 模式下完美运行。但在无头模式下,无法找到需要访问的可滚动元素,用于滚动加载内容。这只会导致脚本在无头模式下出错。
我尝试过以下修复
options.add_argument("--headless=new")
options.add_argument("--window-size=1440, 900")
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument("--start-maximized")
我还让驱动程序等待直到元素可见,但它似乎仍然找不到它并且只是出错了
WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CLASS_NAME, 'rows')))
这是我的完整代码,谢谢!
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import time
from tkinter import *
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
global nftData
def removePRCNT(string):
return float(string.replace("%", ""))
nftData = []
def execute_loan_checker(apyThreshold, ltvThreshold, ethThreshold):
global nftData
del nftData[:]
path = "MYPATH/YOURPATH"
service = Service(path)
options = Options()
#OPTIONS IVE TRIED, DIDNT WORK TO FIX HEADLESS ISSUE
options.add_argument("--headless=new") #works fine without this line
options.add_argument("--window-size=1440, 900")
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument("--start-maximized")
#OTHER MISC OPTIONS
options.add_experimental_option("detach", True)
options.add_experimental_option("excludeSwitches",["enable-automation"])
driver = webdriver.Chrome(service=service, options=options)
collection_links = ["https://blur.io/eth/collection/wrapped-cryptopunks/loans", "https://blur.io/eth/collection/azuki/loans", "https://blur.io/eth/collection/milady/loans", "https://blur.io/eth/collection/degods-eth/loans", "https://blur.io/eth/collection/boredapeyachtclub/loans", "https://blur.io/eth/collection/mutant-ape-yacht-club/loans", "https://blur.io/eth/collection/kanpai-pandas/loans", "https://blur.io/eth/collection/remilio-babies/loans", "https://blur.io/eth/collection/pudgypenguins/loans", "https://blur.io/eth/collection/otherdeed/loans", "https://blur.io/eth/collection/bored-ape-kennel-club/loans", "https://blur.io/eth/collection/clonex/loans", "https://blur.io/eth/collection/beanzofficial/loans", "https://blur.io/eth/collection/azukielementalbeans/loans", "https://blur.io/eth/collection/azukielementals/loans", "https://blur.io/eth/collection/proof-moonbirds/loans", "https://blur.io/eth/collection/lilpudgys/loans"]
def gatherLoanData():
addedNFTnames = []
for link in collection_links:
driver.get(link)
#waiting until element is clickable then click it
loans_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[.='All Loans']")))
loans_button.click()
time.sleep(.4) #might need to adjust sleep time based on computer speed, caused errors depending on wait timing
#THIS IS WHERE ITS BEEN GETTING STUCK
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CLASS_NAME, 'rows')))
scrollable_element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CLASS_NAME, "rows")))
scroll_amount = 500 # Amount of pixels to scroll each time
status = "AUCTION" #set status to auction for first iteration
# Only scrolls while the status is AUCTION, to get live loans
while status == "AUCTION":
# Scroll down by scroll_amount pixels each time
print("pp")
for loan_row in driver.find_elements(By.XPATH, "//div[@id= 'COLLECTION_MAIN']//div[@role='rowgroup']//div[@role='row']"):
nftName = loan_row.find_element(By.XPATH, "div[1]").text #get nft title
status = loan_row.find_element(By.XPATH, "div[2]").text #get auction/active status to filter
if status == "ACTIVE":
break
borrowAmount = loan_row.find_element(By.XPATH, "div[3]").text # get borrow amount
ltv = loan_row.find_element(By.XPATH, "div[4]").text # get the ltv value
apy = loan_row.find_element(By.XPATH, "div[5]").text # get the apy value
if nftName not in addedNFTnames and ethThreshold > float(borrowAmount) and ltvThreshold > removePRCNT(ltv) and removePRCNT(apy) > apyThreshold:
nftData.append([nftName, borrowAmount, ltv, apy])
addedNFTnames.append(nftName) #add to list of nfts, to check that it hasnt been added again
driver.execute_script('arguments[0].scrollTop = arguments[0].scrollTop + {};'.format(scroll_amount), scrollable_element)
time.sleep(.05) # Delay, might need to be increased based on load speed
gatherLoanData()
driver.close()
return nftData
execute_loan_checker(0,999,999) #CALLS SCRIPT WITH NO FILTERING OPTIONS FOR TESTING
经过一番搜索后找到了答案!来自此线程:线程。
刚刚添加了这个选项,效果非常好:
options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36");