我正在尝试使用 Python 自动更新我的 LinkedIn 个人资料。我已经使用 Selenium 成功登录,现在我尝试使用 requests 库仅为“关于”部分提交表单数据。但是,在尝试提交表单时,我始终收到 403 Forbidden 错误。
这是我到目前为止所做的:
使用 Selenium 登录: 我可以使用 Selenium 成功登录 LinkedIn 并导航到我的个人资料页面。
提取 Cookie 和 CSRF 令牌: 我从 Selenium 会话中提取了 cookie,并从个人资料页面的 HTML 中获取了 CSRF 令牌。
提交带有请求的表单数据: 我尝试使用请求库提交“关于”部分表单数据,包括 cookie 和 CSRF 令牌。
但是无论我如何更改,我都会不断收到 403 Forbidden 响应。
我的代码:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests
import json
import re
username = 'my_username'
password = 'my_password'
options = webdriver.ChromeOptions()
# options.add_argument('--headless') # for headless mode
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
def get_cookies(driver):
return {cookie['name']: cookie['value'] for cookie in driver.get_cookies()}
def extract_csrf_token(html):
match = re.search(r'name="csrfToken" content="(.*?)"', html)
return match.group(1) if match else None
def about(session, link, val, csrf_token, headers):
summary_field_name = 'gai-text-form-component-profileEditFormElement-SUMMARY-profile-ACoAAE9YpSsBDmNZhmxG3ss87KP-i8rqiD6k-gM-summary'
data = {
summary_field_name: val,
'save_button_name': 'Save',
'csrfToken': csrf_token
}
response = session.post(link, data=data, headers=headers)
if response.status_code == 200:
print("Form submitted successfully!")
else:
print(f"Failed to submit form: {response.status_code}")
print(response.text)
try:
driver.get('https://www.linkedin.com/login')
username_field = driver.find_element(By.ID, 'username')
username_field.send_keys(username)
password_field = driver.find_element(By.ID, 'password')
password_field.send_keys(password)
password_field.send_keys(Keys.RETURN)
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//a[@class="ember-view block"]'))
).click()
profile_page = driver.current_url
driver.get(profile_page)
csrf_token = extract_csrf_token(driver.page_source)
base = "https://www.linkedin.com/in/timmy-scrown-a6399b311/edit/forms/{}/new/?profileFormEntryPoint=PROFILE_COMPLETION_HUB"
cookies = driver.get_cookies()
session = requests.Session()
for cookie in cookies:
session.cookies.set(cookie['name'], cookie['value'])
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': profile_page,
'X-CSRF-Token': csrf_token,
'X-Requested-With': 'XMLHttpRequest'
}
sections = {
"about": "summary",
"education": "education",
"positions": "position",
"career_break": "career-break",
"skills": "skills",
"certifications": "certification",
"projects": "project",
"courses": "course",
"volunteer_experience": "volunteer-experience",
"publications": "publication",
"patents": "patent",
"honors_awards": "honor",
"languages": "language",
"organizations": "organization",
"test_scores": "test-score",
"causes": "causes"
}
functions = {
"about": about,
"education": education,
"positions": positions,
"career_break": career_break,
"skills": skills,
"certifications": certifications,
"projects": projects,
"courses": courses,
"volunteer_experience": volunteer_experience,
"publications": publications,
"patents": patents,
"honors_awards": honors_awards,
"languages": languages,
"organizations": organizations,
"test_scores": test_scores,
"causes": causes
}
with open('core.json', 'r') as file:
data = json.load(file)
for key, value in data.items():
if key in functions:
link = base.format(sections[key])
functions[key](session, link, value, csrf_token, headers)
print(link, key, value)
finally:
driver.quit()
core.json
{
"about": "Experienced software engineer with a passion for developing innovative programs.",
"education": [
{
"school": "University of Technology",
"degree": "Bachelor of Science in Computer Science",
"field_of_study": "Computer Science",
"start_year": "2016",
"end_year": "2020"
}
],
"positions": [
{
"title": "Senior Developer",
"company": "TechCorp",
"location": "New York, NY",
"start_month": "January",
"start_year": "2021",
"end_month": "Present",
"description": "Leading a team of developers to create innovative software solutions."
}
],
"skills": ["Python", "Java", "JavaScript"],
"certifications": [
{
"name": "Certified Java Developer",
"issuing_organization": "Oracle",
"issue_date": "2021-06"
}
]
}
我的错误
Failed to submit form: 403
<!DOCTYPE html>
<html lang="en">
<head>
<title>Unauthorized</title>
<style>
html, body, pre {
margin: 0;
padding: 0;
font-family: Monaco, 'Lucida Console', monospace;
background: #ECECEC;
}
h1 {
margin: 0;
background: #333;
padding: 20px 45px;
color: #fff;
text-shadow: 1px 1px 1px rgba(0,0,0,.3);
border-bottom: 1px solid #111;
font-size: 28px;
}
p#detail {
margin: 0;
padding: 15px 45px;
background: #888;
border-top: 4px solid #666;
color: #111;
text-shadow: 1px 1px 1px rgba(255,255,255,.3);
font-size: 14px;
border-bottom: 1px solid #333;
}
</style>
</head>
<body>
<h1>Unauthorized</h1>
<p id="detail">
You must be authenticated to access this page.
</p>
</body>
</html>
尝试未检测到的 chromedriver
pip install undetected-chromedriver
不会触发反机器人服务
import undetected_chromedriver as browser
driver = browser.Chrome()
...