我试图在抓取网站时循环遍历所有选项。问题是这些选项似乎隐藏在一个javascript应用程序中,我无法找到需要单击的元素来获取选项列表。
我尝试运行javascript应用程序并选择正确的元素,但似乎没用。
from selenium.webdriver.support.ui import Select
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
import time
import json
import os
url = 'https://www.mfsa.com.mt/financial-services-register/'
driver = webdriver.Chrome()
driver.get(url)
time.sleep(1)
select = Select(driver.find_element_by_id("select1"))
select2 = Select(driver.find_element_by_id("select2"))
soup = BeautifulSoup(driver.page_source, 'html.parser')
test_soup = soup.find(id="select2-drop-mask")
driver.execute_script("document.getElementById('select2-drop-mask').style.display='block';")
test1 = driver.find_element_by_id("select2-drop-mask")
test1 = driver.find_element_by_class_name('select2-drop-mask')
test1 = driver.find_element_by_class_name('select2-search')
print(test1.get_attribute('HTML'))
test1.click()
test1 = driver.find_elements_by_id('select2-results-1')
for li in test1:
print(li.get_attribute('HTML'))
test1.click()
test1 = driver.find_element_by_css_selector("a[href*='0']")
print(test1.get_attribute('innerHTML'))
test1 = driver.find_element_by_class_name('select2-hidden-accessible')
print(test1.get_attribute('innerHTML'))
test1.click()
options = [x for x in test1.find_elements_by_tag_name("a")]
for option in options:
print(option.get_attribute('innerHTML'))
test1 = driver.find_element_by_id('select2-drop')
test1.click()
test1 = driver.find_element_by_class_name('select2-arrow')
print(test1.get_attribute('innerHTML'))
test1.click()
test1 = driver.find_element_by_id('select2-chosen-1')
print(test1)
test1.click()
test1 = driver.find_element_by_id('select2-drop-mask')
print(test1)
test1.click()
test1 = driver.find_element_by_id('select2-results-2')
print(test1)
test1.click()
test1 = driver.find_element_by_id('s2id_autogen2_search')
print(test1)
test1.click()
test1 = driver.find_element_by_id('s2id_select1')
print(test1)
test1.click()
test1 = driver.find_element_by_class_name('select2-search')
print(test1)
test1.click()
我期待的结果是所有选项的列表,所以我可以循环这些并获得注册公司的列表。谢谢!
您需要在第一个下拉列表中选择第二个选项。您需要单击两个选项列表。它们不是select
元素。它们是父li
中列出的ul
元素。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url = 'https://www.mfsa.com.mt/financial-services-register/'
driver = webdriver.Chrome()
driver.get(url)
driver.find_element_by_css_selector('#cn-accept-cookie').click()
WebDriverWait(driver,5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#select2-chosen-1"))).click()
dropDown1Options = WebDriverWait(driver,5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#select2-results-1 li")))
print(len(dropDown1Options))
dropDown1Options[2].click()
driver.find_element_by_id('s2id_autogen2_search').click() #OP needed s2id_select2 instead in their attempt
dropDown2Options = WebDriverWait(driver,5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#select2-results-2 li")))
print(len(dropDown2Options))