问题很简单。我在列表中列出了该产品,这是搜索结果。当它点击一个按钮时,它会进入一个新选项卡。我的意图很简单:单击此产品,关注此新选项卡,废弃我想要的信息,然后关闭此新选项卡并返回到上一个选项卡,依此类推。这是我尝试实现的方法,但程序只是忽略它:它不会给我错误,但它不会执行我想要的操作,它甚至不会打开新选项卡。
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
import logging
import requests
import pandas as pd
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import re
from fpdf import FPDF
from selenium import webdriver
from urllib.parse import urlparse
import undetected_chromedriver as uc
from selenium import webdriver
precos = []
tamanhos = []
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
driver = uc.Chrome(options=options)
#options.add_argument(f"--proxy-server={proxy}")
#options.add_argument("start-maximized")
#options.add_argument("disable-infobars")options.add_argument("--disable-extensions")
actions = ActionChains(driver)
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-sandbox")
options.add_argument("--verbose")
#options.add_argument("--remote-debugging-port=9222")
options.add_argument('--headless')
options.add_argument("--disable-gpu")
data_planilha = []
precos_m2 = []
url_base = "https://www.vivareal.com.br/venda/ceara/caucaia/bairros/caucaia/#onde=,Cear%C3%A1,Caucaia,Bairros,Caucaia,,,,BR%3ECeara%3ENULL%3ECaucaia%3EBarrios%3ECaucaia,,,/"
driver.get(url_base)
#driver.find_element(By.XPATH, "//ul[@class = 'header__nav-links']/li[2]/a").click()
#driver.implicitly_wait(30)
resultados = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "//section[@class = 'results__main']" )))
quantidade_imoveis = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "//strong[@class = 'results-summary__count js-total-records']"))).text
quantidade_imoveis = int(quantidade_imoveis)
print(quantidade_imoveis)
try:
botao_proximo = WebDriverWait(driver, 20).until(
EC.element_to_be_clickable((By.XPATH, "//button[@title = 'Próxima página']")))
while True:
time.sleep(1)
imoveis = WebDriverWait(driver, 200).until(EC.visibility_of_all_elements_located((By.XPATH, "//div[@class = 'results-list js-results-list']/div")))
print("entrou")
for index, value in enumerate(imoveis):
driver.implicitly_wait(20)
imoveis = WebDriverWait(driver, 200).until(EC.visibility_of_all_elements_located((By.XPATH, "//div[@class = 'results-list js-results-list']/div")))
driver.execute_script("arguments[0].scrollIntoView(true);", imoveis[index])
time.sleep(3)
tamanho = imoveis[index].find_element(By.XPATH, ".//span[@class = 'property-card__detail-value js-property-card-value property-card__detail-area js-property-card-detail-area']").text
#driver.execute_script("arguments[0].scrollIntoView(true);", tamanho)
# tamanho = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, ".//span[@class = 'property-card__detail-value js-property-card-value property-card__detail-area js-property-card-detail-area']"))).text
print(tamanho)
driver.implicitly_wait(20)
preco = imoveis[index].find_element(By.XPATH, ".//div[@class = 'property-card__price js-property-card-prices js-property-card__price-small']/p").text
preco_imovel = "".join(re.findall('\d+', preco))
price = int(preco_imovel)
links = imoveis[index].find_elements(By.CLASS_NAME, "property-card__labels-container js-main-info js-listing-labels-link")
for link in links:
link.click()
driver.switch_to.window(driver.window_handles[1])
driver.execute_script('window.scrollBy(0, 500)')
anuncio_ano = WebDriverWait(driver, 100).until(EC.visibility_of_element_located((By.XPATH, "//div[@class = 'description__info-date big-space']/span[2]"))).text
driver.implicitly_wait(20)
driver.close()
driver.switch_to.window(driver.window_handles[0])
#preco = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, ".//div[@class = 'property-card__price js-property-card-prices js-property-card__price-small']/p"))).text
size = int(tamanho)
print(preco_imovel)
precos.append(int(price))
tamanhos.append(size)
preco_m2 = price/size
print(preco_m2)
precos_m2.append(preco_m2)
print(anuncio_ano)
if("2024" in anuncio_ano):
quantidade_2024 += 1
botao_proximo = driver.find_element(By.XPATH, "//button[@title = 'Próxima página']")
valor = botao_proximo.get_attribute("data-page")
if valor != "":
driver.execute_script('arguments[0].click()', botao_proximo)
else:
break
except NoSuchElementException:
print("última página")
tamanho_medio = sum(tamanhos)/quantidade_imoveis
preco_medio_total = sum(precos)/quantidade_imoveis
precos_m2_total = sum(precos_m2)/quantidade_imoveis
data_planilha.append({
'URL': url_base,
'Número de anúncios': quantidade_imoveis,
'Preço Médio': preco_medio_total,
'Tamanho Médio': tamanho_medio,
'Preço Médio por m2': precos_m2_total,
})
df_planilha = pd.DataFrame(data_planilha)
df_planilha.to_excel("catalogo_vivareal.xlsx")
print("salvou")
driver.quit()
关键是在 Selenium 中正确管理选项卡/窗口句柄,并在需要时使用 JavaScript 打开新选项卡。
# Find and click the link to open in new tab
link = imovel.find_element(By.CLASS_NAME, "property-card__content js-card-title")
# Open in new tab using JavaScript
driver.execute_script('window.open(arguments[0].href, "_blank");', link)
# Switch to the new tab (will always be the last handle)
driver.switch_to.window(driver.window_handles[-1])
try:
# Do your scraping in the new tab
anuncio_ano = WebDriverWait(driver, 20).until(
EC.visibility_of_element_located((By.XPATH, "//div[@class = 'description__info-date big-space']/span[2]"))
).text
finally:
# Always close the new tab and switch back
driver.close()
driver.switch_to.window(driver.window_handles[0])