我仍然是初学者,正在尝试提取图卢兹市 100 个最佳花园的评级(例如 4.1 颗星)和评论数量。我已经编写了下面的代码,但是我仍然无法提取所需的信息。
有人可以帮忙吗
提前致谢, 伊利亚斯。
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
import time
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
driver = webdriver.Chrome(options=chrome_options)
actionChains = ActionChains(driver)
wait = WebDriverWait(driver, 20)
def wait_for_element_location_to_be_stable(element):
initial_location = element.location
previous_location = initial_location
start_time = time.time()
while time.time() - start_time < 1:
current_location = element.location
if current_location != previous_location:
previous_location = current_location
start_time = time.time()
time.sleep(0.4)
driver.get("https://www.google.com/")
driver.find_element(By.ID, "L2AGLb").click()
driver.get("https://www.google.com/maps")
# Wait for the page to load and display the search box
time.sleep(3)
# Input the search query for "jardins in toulouse" and press Enter
search_box = driver.find_element(By.ID, "searchboxinput")
search_box.send_keys("jardins in toulouse")
search_box.send_keys(Keys.RETURN)
# Wait for the search results to load
time.sleep(5)
# Updated XPath for the rating
rating_xpath = "/html/body/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div[1]"
# Get the elements based on the below XPath locator
results = driver.find_elements(By.XPATH, "//a[@class='hfpxzc']")
break_condition = False
focus_element = driver.find_element(By.ID, 'zero-input')
while not break_condition:
temp = results[-1]
actionChains.scroll_to_element(results[-1]).perform()
actionChains.move_to_element(focus_element).click().perform()
for i in range(3):
actionChains.send_keys(Keys.ARROW_DOWN).perform()
time.sleep(0.5)
wait_for_element_location_to_be_stable(temp)
results = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//a[@class='hfpxzc']")))
if results[-1] == temp:
break_condition = True
# Loop through the results and print name and rating
for i, result in enumerate(results[:100], start=1):
name = result.get_attribute('aria-label')
rating_element = result.find_element(By.XPATH, rating_xpath)
rating = rating_element.text if rating_element else "Rating not available"
print(f"Result {i}: {name} - Rating: {rating}")
# Close the browser
driver.quit()
我知道这里有非常熟练的人员可以提供帮助......
我很高兴您使用我的解决方案来向下滚动 Google 地图。 :)
在当前情况下,您使用 DOM 根的绝对 xpath 获得评级,这是行不通的。该 xpath 仅从上下文根通向第一个评级元素。 并且
result
变量本身没有您定义为评级 xpath 的元素(它不是上下文根)。
评级放置在卡片容器中。卡片容器根是
a
元素的父级。所以,你定义..
构建定位器的第二步是查找评级元素。它有独特的类别
MW4etd
因此,结果元素中的评级定位器是
..//*[@class='MW4etd']
要截断非评级结果,可以从
result
获取元素列表并检查它是否大于0。
for i, result in enumerate(results[:100], start=1):
name = result.get_attribute('aria-label')
ratings = result.find_elements(By.XPATH, "..//*[@class='MW4etd']")
rating = ratings[0].text if len(ratings) > 0 else "Rating not available"
print(f"Result {i}: {name} - Rating: {rating}")