from selenium import webdriver
import time
import pandas as pd
import os
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
url1="https://www.linkedin.com/jobs/search/?currentJobId=3553036109&geoId=118368896&keywords=data%20scientist&location=Tashkent%2C%20Uzbekistan&refresh=true"
driver = webdriver.Chrome(executable_path='C:/Users/Muhammadyusuf/chromedriver.exe')
driver.implicitly_wait(10)
driver.get(url1)
elements = driver.find_elements(By.CLASS_NAME, 'base-search-card__title')
for element in elements:
titlename.append(element.text)
from selenium.common.exceptions import TimeoutException
import time
for element in elements:
try:
# Click on the job title
actions = ActionChains(driver)
actions.move_to_element(element).click().perform()
# Wait for job description to load
time.sleep(2)
page_elements = driver.find_elements(By.XPATH, "//*")
for page_element in page_elements:
print(page_element.text)
# Scroll down to the next job title
actions = ActionChains(driver)
actions.move_to_element(element).send_keys(Keys.DOWN).perform()
# Wait for the next job title to load before proceeding
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'base-search-card__title')))
except (TimeoutException, NoSuchElementException) as e:
print(f"Encountered error: {e}")
# Continue to the next job title if there is an error
continue
从 linkedin 中抓取工作细节,代码首先将标题作为元素,对于它点击的每个元素,然后获取该工作的所有描述。
右边是点击加载job description页面,但是取details只取右边,像title, location, posting time,不取全文。单击它后,我需要每个作业的详细信息。请帮我解决。就是这样。
左图和右图的理解,请看下图。右侧是每项工作的完整描述。左侧只是工作列表。