我确实有这个简单的页面,我使用 selenium 和 BeautifulSoup。据我所知,该页面加载了 Javascript。有一个加载更多按钮,因此它会点击直到按钮不再出现。但我正在尝试用 BeautifulSoup 进行一些“导航”。它将获取链接 href,然后加载它。但是,由于某种原因,它没有读取该页面。这是代码:
def extrai_dados_prodirectsports():
print("entrou")
url_linha = "https://www.prodirectsport.com/soccer/l/adults/departments-boots/activity-football/brand-adidas/silo-predator/",
driver.get(url_linha)
#Cookies
WebDriverWait(driver, 100).until(EC.element_to_be_clickable((By.XPATH,"//button[@title='Accept all cookies']"))).click()
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,"//div[@id='zonos']")))
botao_close = driver.find_element(By.XPATH, "//a[@class = 'z-close']")
driver.execute_script("arguments[0].click();",botao_close)
#WebDriverWait(driver, 100).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='global-popup']")))
botao = driver.find_element(By.XPATH,"//button[@aria-label='Close']")
driver.execute_script("arguments[0].click();",botao)
#driver.implicitly_wait(500000)
x = 0
driver.maximize_window()
while True:
try:
# Reencontrar o botão dentro do loop para evitar stale element
button_seemore = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.XPATH, "//div[@class='product-listing__view-more']/button"))
)
if button_seemore.is_displayed():
driver.execute_script("arguments[0].scrollIntoView();", button_seemore)
driver.execute_script("arguments[0].click();", button_seemore)
x += 1
print(x)
else:
break # Sai do loop se o botão não for mais exibido
except StaleElementReferenceException:
print("Elemento 'Ver mais' tornou-se stale, reencontrando o botão...")
continue # Tenta novamente o loop
except TimeoutException:
print("Tempo esgotado esperando o botão 'Ver mais'.")
break
current_page = driver.page_source
soup = BeautifulSoup(pagina_atual, "html.parser")
list_products = soup.find_all("a", class_ = "_link_129ai_20")
for product in list_products:
link = product.attrs["href"]
r = requests.get(link)
soup = BeautifulSoup(r.text, "html.parser")
name_product = soup.find("h1", "ml-meta__title").text
print(name_product)
preco_elemento = soup.find("div", "ml-prices__price").text
preco_produto = re.sub("£", "", preco_elemento)
preco = float(preco_produto)
print(preco)
product_code = driver.current_url.split('-')[-1]
print(code_product)
您可以通过API获取产品:
import requests
from urllib.parse import urlparse
def get_products(url, page):
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
}
path = urlparse(url_linha).path
params = {
'location': f'{path}?pg={page}',
}
url = 'https://www.prodirectsport.com/api/v1/search'
response = requests.get(url, params=params, headers=headers)
data = response.json()
return data['products']
url_linha = "https://www.prodirectsport.com/soccer/l/adults/departments-boots/activity-football/brand-adidas/silo-predator/"
products = get_products(url=url_linha, page=1)
for product in products:
print(f'{product["name"]} = {product["pricing"]["current"]}')