如何无头为硒刮刀工作?

问题描述 投票:0回答:1

我正在为blur.io 上的nft 贷款数据构建一个selenium 网络抓取工具,并且它在NON Headless 模式下完美运行。但在无头模式下,无法找到需要访问的可滚动元素,用于滚动加载内容。这只会导致脚本在无头模式下出错。

我尝试过以下修复

options.add_argument("--headless=new")
options.add_argument("--window-size=1440, 900")
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument("--start-maximized")

我还让驱动程序等待直到元素可见,但它似乎仍然找不到它并且只是出错了

WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CLASS_NAME, 'rows')))

这是我的完整代码,谢谢!

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import time
from tkinter import *
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

global nftData

def removePRCNT(string):
    return float(string.replace("%", ""))

nftData = []
def execute_loan_checker(apyThreshold, ltvThreshold, ethThreshold):
    global nftData
    del nftData[:]
    path = "MYPATH/YOURPATH"
    service = Service(path)

    options = Options()

    #OPTIONS IVE TRIED, DIDNT WORK TO FIX HEADLESS ISSUE
    options.add_argument("--headless=new") #works fine without this line
    options.add_argument("--window-size=1440, 900")
    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    options.add_argument("--start-maximized")

    #OTHER MISC OPTIONS
    options.add_experimental_option("detach", True)
    options.add_experimental_option("excludeSwitches",["enable-automation"])

    driver = webdriver.Chrome(service=service, options=options)
    
    collection_links = ["https://blur.io/eth/collection/wrapped-cryptopunks/loans", "https://blur.io/eth/collection/azuki/loans", "https://blur.io/eth/collection/milady/loans", "https://blur.io/eth/collection/degods-eth/loans", "https://blur.io/eth/collection/boredapeyachtclub/loans", "https://blur.io/eth/collection/mutant-ape-yacht-club/loans", "https://blur.io/eth/collection/kanpai-pandas/loans", "https://blur.io/eth/collection/remilio-babies/loans", "https://blur.io/eth/collection/pudgypenguins/loans", "https://blur.io/eth/collection/otherdeed/loans", "https://blur.io/eth/collection/bored-ape-kennel-club/loans", "https://blur.io/eth/collection/clonex/loans", "https://blur.io/eth/collection/beanzofficial/loans", "https://blur.io/eth/collection/azukielementalbeans/loans", "https://blur.io/eth/collection/azukielementals/loans", "https://blur.io/eth/collection/proof-moonbirds/loans", "https://blur.io/eth/collection/lilpudgys/loans"]
    def gatherLoanData():
        addedNFTnames = []

        for link in collection_links:
            driver.get(link)
            
            #waiting until element is clickable then click it
            loans_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[.='All Loans']")))
            loans_button.click()
            time.sleep(.4) #might need to adjust sleep time based on computer speed, caused errors depending on wait timing
            

            #THIS IS WHERE ITS BEEN GETTING STUCK
            WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CLASS_NAME, 'rows')))
            scrollable_element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CLASS_NAME, "rows")))
            scroll_amount = 500  # Amount of pixels to scroll each time


            status = "AUCTION" #set status to auction for first iteration
            # Only scrolls while the status is AUCTION, to get live loans
            while status == "AUCTION":
                # Scroll down by scroll_amount pixels each time
                print("pp")
                for loan_row in driver.find_elements(By.XPATH, "//div[@id= 'COLLECTION_MAIN']//div[@role='rowgroup']//div[@role='row']"):
                    nftName = loan_row.find_element(By.XPATH, "div[1]").text #get nft title
                    status = loan_row.find_element(By.XPATH, "div[2]").text #get auction/active status to filter
                    if status == "ACTIVE":
                        break
                    borrowAmount = loan_row.find_element(By.XPATH, "div[3]").text # get borrow amount
                    ltv = loan_row.find_element(By.XPATH, "div[4]").text # get the ltv value
                    apy = loan_row.find_element(By.XPATH, "div[5]").text # get the apy value
                    if nftName not in addedNFTnames and ethThreshold > float(borrowAmount) and ltvThreshold > removePRCNT(ltv) and removePRCNT(apy) > apyThreshold:
                        nftData.append([nftName, borrowAmount, ltv, apy]) 
                        addedNFTnames.append(nftName) #add to list of nfts, to check that it hasnt been added again
                
                driver.execute_script('arguments[0].scrollTop = arguments[0].scrollTop + {};'.format(scroll_amount), scrollable_element)
                time.sleep(.05)  # Delay, might need to be increased based on load speed

    
    gatherLoanData()
    driver.close()
    return nftData

execute_loan_checker(0,999,999) #CALLS SCRIPT WITH NO FILTERING OPTIONS FOR TESTING
    
python selenium-webdriver selenium-chromedriver
1个回答
0
投票

经过一番搜索后找到了答案!来自此线程:线程

刚刚添加了这个选项,效果非常好:

 options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36");

© www.soinside.com 2019 - 2024. All rights reserved.