我尝试过保留 DOM,但不起作用。我尝试过睡眠时间和等待时间,但仍然遇到同样的错误。
工作。我尝试过睡眠时间和等待时间,但仍然遇到同样的错误。
错误消息:发生错误:消息:过时元素引用:找不到过时元素
我希望你能帮助我让代码正常工作。
import os.path
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import urllib.request
import time
import ssl
from selenium.common import exceptions
ssl._create_default_https_context = ssl._create_unverified_context
URL = "https://m.kbsec.com/go.able?linkcd=s060200020000"
IMG_COUNT = 5
def set_chrome_driver():
options = webdriver.ChromeOptions()
options.add_argument("headless")
options.add_argument("lang=ko_KR")
options.add_experimental_option(
"excludeSwitches", ["enable-logging"]
)
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()), options=options
)
return driver
def get_events():
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
idx = 1
while True:
html_events = driver.find_elements(By.XPATH, '// *[ @ id = "bbsList"] / li')
print("===================")
print(f"{idx} count : " + str(len(html_events)))
idx += 1
if len(html_events) >= 1000:
break
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(3)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
return html_events
if __name__ == "__main__":
start = time.time()
data = []
driver = set_chrome_driver()
driver.get(URL)
html_events = get_events()
# save img files to disk
for event in html_events:
try:
print(event.text)
split_text = event.text.split("\n")
# title_period
save_path = "./images/" + split_text[0]
href = event.find_element(By.CSS_SELECTOR, 'a')
href.get_attribute('href')
href.click()
driver.refresh()
images = driver.find_elements(By.XPATH, f' // *[ @ id = "etcimgSec"] / div[1] / div[2] / div / li /img')
for i, img in enumerate(images):
if i == IMG_COUNT:
break
url = img.get_attribute('src')
if not os.path.isdir(save_path):
os.makedirs(save_path)
urllib.request.urlretrieve(url, save_path + f"/{i}.jpg")
print("saved images to " + save_path)
print("===================")
except exceptions.StaleElementReferenceException as e:
print(f"An error occurred: {e}")
except exceptions.NoSuchElementException:
print("Element not found")
break
except Exception as e:
print(f"An error occurred: {e}")
break
end = time.time()
print(f"{end - start:.5f} sec")
我认为问题在于
driver.refresh()
电话。找到第一个元素,然后刷新驱动程序,突然间您正在迭代的列表中的所有其他元素都变得过时。如果您需要在循环内刷新驱动程序,则可能需要在每次刷新后再次获取元素。