Selenium 错误：返回 self._execute(Command.FIND_CHILD_ELEMENT, {"using": by, "value": value})["value"]

Question

所以我尝试使用 Selenium 创建一个代码来抓取动态网站。我目前陷入如何从多个列表中提取数据的困境（该页面有多个列表）。

这是代码：

from selenium import webdriver
#to enable Wait for Page Loading
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#to enable scrolling
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import requests

#Initialize browser
options = webdriver.ChromeOptions()
#options.add_argument('--headless')  # Not headless because there's an error with Hotjar
driver = webdriver.Chrome(options=options)

url = "https://www.archify.com/id/professionals"
driver.get(url)

#Click Load More Button
l = driver.find_element("xpath", "//button[text()='Load More']")
l.click()

# Scroll until bottom of the page
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")

#Tell browser to wait for for loading (for elements to load / for 60 seconds)
wait = WebDriverWait(driver, 60) 

# Extract product details
product_elements = driver.find_elements(By.CLASS_NAME, 'professional-box')
product_data = []
for product_element in product_elements:
    link = product_element.find_element(By.NAME, 'href').text
    title = product_element.find_element(By.NAME, '.title').text
    subtitle = product_element.find_element(By.NAME,"subtitle").text
    product_data.append({'title': title, 'subtitle': subtitle, 'link': link})
# Print extracted data
for product in product_data:
    print(f"Title: {product['title']}, Subtitle: {product['subtitle']}, link: {product['link']}")
    
driver.quit

以及下面的回溯：

PS C:\Users\user\Desktop\Code> & C:/Users/user/AppData/Local/Programs/Python/Python311/python.exe c:/Users/user/Desktop/Code/Scrape_Selenium.py

DevTools listening on ws://127.0.0.1:51089/devtools/browser/581a68b5-c26f-42fd-93e0-f4970c14fd1b
Traceback (most recent call last):
  File "c:\Users\user\Desktop\Code\Scrape_Selenium.py", line 43, in <module>
    link = product_element.find_element(By.NAME, 'href').text
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\user\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\webelement.py", line 417, in find_element
    return self._execute(Command.FIND_CHILD_ELEMENT, {"using": by, "value": value})["value"]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\user\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\webelement.py", line 395, in _execute
    return self._parent.execute(command, params)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\user\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 347, in execute
    self.error_handler.check_response(response)
  File "C:\Users\user\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 229, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":"[name="href"]"}
  (Session info: chrome=122.0.6261.112); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
        GetHandleVerifier [0x00007FF6B60AAD32+56930]
        (No symbol) [0x00007FF6B601F632]
        (No symbol) [0x00007FF6B5ED42E5]
        (No symbol) [0x00007FF6B5F198ED]
        (No symbol) [0x00007FF6B5F19A2C]
        (No symbol) [0x00007FF6B5F0F13C]
        (No symbol) [0x00007FF6B5F3BCDF]
        (No symbol) [0x00007FF6B5F0F09A]
        (No symbol) [0x00007FF6B5F3BEB0]
        (No symbol) [0x00007FF6B5F581E2]
        (No symbol) [0x00007FF6B5F3BA43]
        (No symbol) [0x00007FF6B5F0D438]
        (No symbol) [0x00007FF6B5F0E4D1]
        GetHandleVerifier [0x00007FF6B6426ABD+3709933]
        GetHandleVerifier [0x00007FF6B647FFFD+4075821]
        GetHandleVerifier [0x00007FF6B647818F+4043455]
        GetHandleVerifier [0x00007FF6B6149766+706710]
        (No symbol) [0x00007FF6B602B90F]
        (No symbol) [0x00007FF6B6026AF4]
        (No symbol) [0x00007FF6B6026C4C]
        (No symbol) [0x00007FF6B6016904]
        BaseThreadInitThunk [0x00007FFEA0697344+20]
        RtlUserThreadStart [0x00007FFEA08A26B1+33]

我看到另一个步骤提到我需要将“product_elements”放入for循环中，但我想我已经在上面的代码中做到了这一点。

谢谢你

Answer 1

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from bs4 import BeautifulSoup

options = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=options)

url = "https://www.archify.com/id/professionals"
driver.get(url)

l = driver.find_element("xpath", "//button[text()='Load More']")
l.click()
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
wait = WebDriverWait(driver, 60)

soup = BeautifulSoup(driver.page_source, 'html.parser')
 # Here you made mistake
product_elements = soup.find_all('div', class_='professional-box') # find all div element 
product_data = []
for product_element in product_elements:
    content = product_element.find('div', class_='text-box type-a')
    if content:
        href = content.find('a').get('href')
        title = content.find('p', class_='title').text
        subtitle = content.find('p', class_='subtitle').text
        product_data.append({'title': title, 'subtitle': subtitle, 'link': href})
# Print extracted data
for product in product_data:
    print(f"Title: {product['title']}, Subtitle: {product['subtitle']}, link: {product['link']}")

driver.quit()

Selenium 错误：返回 self._execute(Command.FIND_CHILD_ELEMENT, {"using": by, "value": value})["value"]

问题描述投票：0回答：1

1个回答

最新问题

Selenium 错误：返回 self._execute(Command.FIND_CHILD_ELEMENT, {"using": by, "value": value})["value"]

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1