phrases_to_search = [
"artificial intelligence",
"machine learning",
"neural network"
]
# Search for phrases and get counts
phrase_counts = search_phrases(pdf_texts, phrases_to_search)
# Print the results
for phrase, count in phrase_counts.items():
print(f"'{phrase}': {count} occurrences")
输出:
File "<ipython-input-43-70576973716e>", line 2
phrases_to_search = [
^
IndentationError: unexpected indent
我需要计算 PDF 列表中的关键字数量
这是一个缩进错误!确保 python 对缩进非常敏感。
您可以尝试以下方法:
def search_phrases(pdf_texts, phrases_to_search):
phrase_counts = {}
for text in pdf_texts:
for phrase in phrases_to_search:
# Lowercase both text and phrase for case-insensitive search
lower_text = text.lower()
lower_phrase = phrase.lower()
count = lower_text.count(lower_phrase)
phrase_counts[phrase] = phrase_counts.get(phrase, 0) + count # Initialize count to 0 if not found
return phrase_counts
# Assuming you have a function to extract text from PDFs (replace with your implementation)
pdf_texts = extract_text_from_pdfs(["path/to/pdf1.pdf", "path/to/pdf2.pdf"])
# List of phrases to search for
phrases_to_search = [
"artificial intelligence",
"machine learning",
"neural network"
]
# Search for phrases and get counts
phrase_counts = search_phrases(pdf_texts, phrases_to_search)
# Print the results
for phrase, count in phrase_counts.items():
print(f"'{phrase}': {count} occurrences")