我知道 pandas 发行说明可以在这里找到:https://pandas.pydata.org/pandas-docs/stable/whatsnew/index.html
但是,这些对我来说几乎没有用,因为它们不容易搜索。我需要弄清楚哪个版本的 Pandas 中删除/添加了哪些功能,以帮助创建 python 包的需求/依赖项列表。某处是否存在完整的单个文本文件更改日志,以便我可以搜索所有版本的更改?
我编写了一个Python脚本来生成这样一个文件(使用LLM的帮助!),如下所示。它有
requests
和 beautifulsoup4
python 要求。就我而言,我只想要版本 1 的发行说明,但您可以取消注释相关行以获得完整的发行说明。
import re
import requests
from bs4 import BeautifulSoup
# List of versions to iterate over
versions = [
"2.2.3", "2.2.2", "2.2.1", "2.2.0",
"2.1.4", "2.1.3", "2.1.2", "2.1.1", "2.1.0",
"2.0.3", "2.0.2", "2.0.1", "2.0.0",
"1.5.3", "1.5.2", "1.5.1", "1.5.0",
"1.4.4", "1.4.3", "1.4.2", "1.4.1", "1.4.0",
"1.3.5", "1.3.4", "1.3.3", "1.3.2", "1.3.1", "1.3.0",
"1.2.5", "1.2.4", "1.2.3", "1.2.2", "1.2.1", "1.2.0",
"1.1.5", "1.1.4", "1.1.3", "1.1.2", "1.1.1", "1.1.0",
"1.0.5", "1.0.4", "1.0.3", "1.0.2", "1.0.1", "1.0.0",
# "0.25.3", "0.25.2", "0.25.1", "0.25.0",
# "0.24.2", "0.24.1", "0.24.0",
# "0.23.4", "0.23.3", "0.23.2", "0.23.1", "0.23.0",
# "0.22.0",
# "0.21.1", "0.21.0",
# "0.20.3", "0.20.2", "0.20.0",
# "0.19.2", "0.19.1", "0.19.0",
# "0.18.1", "0.18.0",
# "0.17.1", "0.17.0",
# "0.16.2", "0.16.1", "0.16.0",
# "0.15.2", "0.15.1", "0.15.0",
# "0.14.1", "0.14.0",
# "0.13.1", "0.13.0",
# "0.12.0",
# "0.11.0",
# "0.10.1", "0.10.0",
# "0.9.1", "0.9.0",
# "0.8.1", "0.8.0",
# "0.7.3", "0.7.2", "0.7.1", "0.7.0",
# "0.6.1", "0.6.0",
# "0.5.0",
# "0.4.x"
]
def fetch_release_notes(version):
url = f"https://pandas.pydata.org/docs/whatsnew/v{version}.html"
try:
response = requests.get(url)
response.raise_for_status()
response.encoding = response.apparent_encoding
soup = BeautifulSoup(response.text, 'html.parser')
article = soup.find('article', {'role': 'main'})
if article is None:
print(f"Could not find main article for version {version}")
return ""
# Collect and format text content appropriately
formatted_text = []
# Process each tag in article
for element in article.children:
formatted_text.append(element.get_text() + '\n')
# Final clean up to remove unnecessary spacing
result = '\n'.join(formatted_text).strip()
result = re.sub(r'\n\s*\n', '\n\n', result) # Normalize multiple newlines
return result
except requests.exceptions.RequestException as e:
print(f"Failed to fetch version {version}: {e}")
return ""
if __name__ == "__main__":
with open("./PANDAS_FULL_CHANGELOG.txt", "w") as f:
for version in versions:
version_text = fetch_release_notes(version)
f.write(f"## {version}\n\n{version_text}\n\n")