当我尝试从 Youtube 频道获取视频时,我得到
None
。附上我的代码:
url = "https://www.youtube.com/@ecbeuro/videos"
service = Service("/usr/bin/chromedriver")
options = Options()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome(service=service, options=options)
driver.get(url)
news_links = driver.find_elements(By.XPATH, '//*[@id="video-title"]')
for link in news_links:
print(link.get_attribute('href'))
感谢帮助。亲切的问候!
如果您打算从 YouTube 频道中提取一些最新视频,您可以查看 this,仅使用 Python 的 requests
库即可抓取
30 个最新视频。
但是,如果您想抓取 YouTube 频道上的所有可用视频,则需要多次滚动才能加载更多/所有可用视频。要实现此目的,您可以使用
Selenium
。
import time
from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
options = ChromeOptions()
options.add_argument("--start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
driver = Chrome(options=options)
wait = WebDriverWait(driver, 10)
driver.get("https://www.youtube.com/@ecbeuro/videos")
last_height = 0
print("Start scrolling!")
while True:
driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
new_height = driver.execute_script("return document.documentElement.scrollHeight")
wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, 'div#dismissible')))
if last_height == new_height:
print("Stop scrolling, reached the bottom!")
break
else:
last_height = new_height
time.sleep(1)
data = []
videos = driver.find_elements(By.CSS_SELECTOR, 'div#dismissible')
for video in videos:
url = video.find_element(By.CSS_SELECTOR, 'div#thumbnail>ytd-thumbnail>a').get_attribute('href')
details = video.find_element(By.CSS_SELECTOR, 'div#details')
title = details.find_element(By.CSS_SELECTOR, 'div#meta>h3').text
views_date = details.find_elements(By.CSS_SELECTOR, 'span.inline-metadata-item.style-scope.ytd-video-meta-block')
views = views_date[0].text.strip()
date = views_date[1].text.strip()
data.append({"title": title, "url": url, "views": views, "posted_date": date})
print(f"Total videos: {len(data)}")
print(data)
输出:
Start scrolling!
Stop scrolling, reached the bottom!
Total videos: 1480
[{'title': 'President Lagarde presents the latest monetary policy decisions – 27July 2023', 'url': 'https://www.youtube.com/watch?v=eUlRXBy3pBU', 'views': '2.3K views', 'posted_date': '5 days ago'}, {'title': 'Panel Discussion at the CESEE Conference 2023', 'url': 'https://www.youtube.com/watch?v=YMir_50lWhc', 'views': '746 views', 'posted_date': '13 days ago'}, {'title': 'Panel Discussion number 2 and Closing remarks at the CESEE Conference 2023', 'url': 'https://www.youtube.com/watch?v=bt-_Scd3864', 'views': '432 views', 'posted_date': '13 days ago'}, {'title': 'Civil Society Seminar Series: The evolution of European banking supervision', 'url': 'https://www.youtube.com/watch?v=d6MfWiHfua8', 'views': '412 views', 'posted_date': '2 weeks ago'}, {'title': 'Keynote speech of Valdis Dombrovskis at the CESEE Conference 2023', 'url': 'https://www.youtube.com/watch?v=-GnWOVKxFEk', 'views': '209 views', 'posted_date': '2 weeks ago'}, {'title': "Christine Lagarde's opening remarks for the CESEE Conference 2023", 'url': 'https://www.youtube.com/watch?v=pQzcvSXlI0M', 'views': '1K views', 'posted_date': '2 weeks ago'}, {'title': 'Keynote speech of Beata Javorcik at the CESEE Conference 2023', 'url': 'https://www.youtube.com/watch?v=rZITdIZYxBQ', 'views': '397 views', 'posted_date': '2 weeks ago'}, {'title': 'Civil Society Seminar Series: A digital euro for everyone', 'url': 'https://www.youtube.com/watch?v=gXD_7BDIn8Q', 'views': '1.8K views', 'posted_date': '2 weeks ago'}, {'title': 'New Euro banknotes re-design survey', 'url': 'https://www.youtube.com/watch?v=-ynWm1sYA9Q', 'views': '39K views', 'posted_date': '3 weeks ago'}, ....... {'title': 'ECB Press Conference - 13 January 2011 - Part 1/2', 'url': 'https://www.youtube.com/watch?v=fl_zhb4lW6c', 'views': '434 views', 'posted_date': '12 years ago'}, {'title': 'Preisstabilität: Warum ist sie für dich wichtig?', 'url': 'https://www.youtube.com/watch?v=6bSdXmxFcEE', 'views': '76K views', 'posted_date': '12 years ago'}, {'title': 'A estabilidade de preços é importante porquê?', 'url': 'https://www.youtube.com/watch?v=v4Zmx5OsKM8', 'views': '16K views', 'posted_date': '12 years ago'}, {'title': 'La stabilité des prix : pourquoi est-elle importante pour vous ?', 'url': 'https://www.youtube.com/watch?v=0xqcKYG9ax4', 'views': '37K views', 'posted_date': '12 years ago'}, {'title': 'Price stability: why is it important for you ?', 'url': 'https://www.youtube.com/watch?v=F6PvX625JCs', 'views': '66K views', 'posted_date': '12 years ago'}, {'title': 'Hinnastabiilsus – miks see on oluline?', 'url': 'https://www.youtube.com/watch?v=LhdGJ_g8k2M', 'views': '2.5K views', 'posted_date': '12 years ago'}, {'title': 'ECB Press Conference - 2 December 2010 - Part 1/2', 'url': 'https://www.youtube.com/watch?v=KsHgS6VslIk', 'views': '263 views', 'posted_date': '12 years ago'}, {'title': 'ECB Press Conference - 2 December 2010 - Part 2/2', 'url': 'https://www.youtube.com/watch?v=SP8PCanl93o', 'views': '221 views', 'posted_date': '12 years ago'}, {'title': 'ECB Statistics', 'url': 'https://www.youtube.com/watch?v=FyHiyPYyDp0', 'views': '3.3K views', 'posted_date': '12 years ago'}, {'title': 'The ECB launches new educational games', 'url': 'https://www.youtube.com/watch?v=HMIsUkNWKnE', 'views': '1.7K views', 'posted_date': '12 years ago'}, {'title': 'ECB - Inflation Island and Economia: Educational Games', 'url': 'https://www.youtube.com/watch?v=hcQUJSz82oQ', 'views': '9.5K views', 'posted_date': '12 years ago'}]
您似乎正在使用 Selenium 和 Chrome WebDriver 来检索 YouTube 频道的视频 URL。但是,由于 YouTube 网站上的更改或您尝试提取视频网址的方式出现问题,您的代码可能无法按预期运行。 YouTube 的网站会定期更新,元素的排列可能会发生变化,这可能会导致您的 XPath 无效。
这是使用
google-api-python-client
库通过 YouTube 数据 API 获取 YouTube 频道的视频 URL 的替代方法。此方法更可靠,建议用于自动访问 YouTube 数据:
google-api-python-client
库(如果尚未安装)。您可以使用pip
安装它:pip install google-api-python-client
接下来,您需要在 Google Cloud Console 中设置 YouTube Data API 并获取 API 密钥。请按照以下步骤操作:
现在,您可以使用以下Python代码来检索YouTube频道的视频URL:
from googleapiclient.discovery import build
# Replace 'YOUR_API_KEY' with your actual YouTube Data API key
API_KEY = 'YOUR_API_KEY'
CHANNEL_ID = 'UC_yEUfZIzF0gub8Rr5diYmA' # Replace with your channel ID
def fetch_video_urls(api_key, channel_id):
youtube = build('youtube', 'v3', developerKey=api_key)
video_urls = []
next_page_token = None
while True:
playlist_items = youtube.playlistItems().list(
part='snippet',
playlistId=channel_id,
maxResults=50, # Maximum number of videos per request
pageToken=next_page_token
).execute()
for item in playlist_items['items']:
video_id = item['snippet']['resourceId']['videoId']
video_url = f'https://www.youtube.com/watch?v={video_id}'
video_urls.append(video_url)
next_page_token = playlist_items.get('nextPageToken')
if not next_page_token:
break
return video_urls
if __name__ == '__main__':
video_urls = fetch_video_urls(API_KEY, CHANNEL_ID)
for url in video_urls:
print(url)
将
'YOUR_API_KEY'
替换为您从 Google Cloud Console 获取的 API 密钥。此外,将 CHANNEL_ID
设置为您要检索视频的 YouTube 频道的 ID。 fetch_video_urls
函数使用 YouTube 数据 API 从频道的播放列表中获取视频 URL。