在Python中使用beautifulsoup和selenium,我试图向下滚动播放列表中的歌曲列表来解析歌曲名称。然而,该代码不会越过前 30 首歌曲并进一步向下滚动。这是为什么?
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import csv
from bs4 import BeautifulSoup
# Initialize the WebDriver
driver = webdriver.Chrome()
# Open the website
driver.get("SPOTIFY PLAYLIST URL")
# Scroll to the bottom of the page using PAGE_DOWN key
body = driver.find_element(By.TAG_NAME, "body")
for _ in range(30): # Adjust the range as needed
body.send_keys(Keys.PAGE_DOWN)
time.sleep(1) # Adjust the sleep time as needed
# Parse the page source with BeautifulSoup
soup = BeautifulSoup(driver.page_source, 'html.parser')
# Find song elements
songs = soup.find_all("div", class_="btE2c3IKaOXZ4VNAb8WQ")
# Check if songs are found
if not songs:
print("No songs found. Please check the class name or the page structure.")
else:
print(f"Found {len(songs)} songs.")
# Write to CSV
with open('songs.csv', 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['SONGS'])
for song in songs:
song_text = song.get_text(strip=True)
print(song_text)
writer.writerow([song_text])
# Close the WebDriver
driver.quit()
您将向下翻页操作发送到网页的错误部分。在播放列表中,您必须向下滚动播放列表才能加载网格中的所有歌曲。
而不是使用
body = driver.find_element(By.TAG_NAME, "body")
试试这个
playlist = driver.find_element(By.CSS_SELECTOR, "div[data-testid='playlist-tracklist']")
所以你的代码就变成了
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import csv
from bs4 import BeautifulSoup
# Initialize the WebDriver
driver = webdriver.Chrome()
# Open the website
driver.get("SPOTIFY PLAYLIST URL")
# Scroll to the bottom of the page using PAGE_DOWN key
playlist = driver.find_element(By.CSS_SELECTOR, "div[data-testid='playlist-tracklist']")
for _ in range(30): # Adjust the range as needed
playlist.send_keys(Keys.PAGE_DOWN)
time.sleep(1) # Adjust the sleep time as needed
# Parse the page source with BeautifulSoup
soup = BeautifulSoup(driver.page_source, 'html.parser')
# Find song elements
songs = soup.find_all("div", class_="btE2c3IKaOXZ4VNAb8WQ")
# Check if songs are found
if not songs:
print("No songs found. Please check the class name or the page structure.")
else:
print(f"Found {len(songs)} songs.")
# Write to CSV
with open('songs.csv', 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['SONGS'])
for song in songs:
song_text = song.get_text(strip=True)
print(song_text)
writer.writerow([song_text])
# Close the WebDriver
driver.quit()