我正在使用文档加载器做一个微服务,当尝试导入 langchain 的 UnstructedMarkdownLoader 时,应用程序无法在导入级别启动
$ flask --app main run --debug
Traceback (most recent call last):
File "venv/bin/flask", line 8, in <module>
sys.exit(main())
File "venv/lib/python3.9/site-packages/flask/cli.py", line 1063, in main
cli.main()
File "venv/lib/python3.9/site-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "venv/lib/python3.9/site-packages/click/core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "venv/lib/python3.9/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "venv/lib/python3.9/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "venv/lib/python3.9/site-packages/click/decorators.py", line 84, in new_func
return ctx.invoke(f, obj, *args, **kwargs)
File "venv/lib/python3.9/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "venv/lib/python3.9/site-packages/flask/cli.py", line 911, in run_command
raise e from None
File "venv/lib/python3.9/site-packages/flask/cli.py", line 897, in run_command
app = info.load_app()
File "venv/lib/python3.9/site-packages/flask/cli.py", line 308, in load_app
app = locate_app(import_name, name)
File "venv/lib/python3.9/site-packages/flask/cli.py", line 218, in locate_app
__import__(module_name)
File "main.py", line 5, in <module>
from lc_indexer import index_documents
File "lc_indexer.py", line 5, in <module>
from langchain.document_loaders import UnstructuredMarkdownLoader
File "venv/lib/python3.9/site-packages/langchain/__init__.py", line 6, in <module>
from langchain.agents import MRKLChain, ReActChain, SelfAskWithSearchChain
File "venv/lib/python3.9/site-packages/langchain/agents/__init__.py", line 2, in <module>
from langchain.agents.agent import (
File "venv/lib/python3.9/site-packages/langchain/agents/agent.py", line 16, in <module>
from langchain.agents.tools import InvalidTool
File "venv/lib/python3.9/site-packages/langchain/agents/tools.py", line 8, in <module>
from langchain.tools.base import BaseTool, Tool, tool
File "venv/lib/python3.9/site-packages/langchain/tools/__init__.py", line 42, in <module>
from langchain.tools.vectorstore.tool import (
File "venv/lib/python3.9/site-packages/langchain/tools/vectorstore/tool.py", line 13, in <module>
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain
File "venv/lib/python3.9/site-packages/langchain/chains/__init__.py", line 2, in <module>
from langchain.chains.api.base import APIChain
File "venv/lib/python3.9/site-packages/langchain/chains/api/base.py", line 13, in <module>
from langchain.chains.api.prompt import API_RESPONSE_PROMPT, API_URL_PROMPT
File "venv/lib/python3.9/site-packages/langchain/chains/api/prompt.py", line 2, in <module>
from langchain.prompts.prompt import PromptTemplate
File "venv/lib/python3.9/site-packages/langchain/prompts/__init__.py", line 3, in <module>
from langchain.prompts.chat import (
File "venv/lib/python3.9/site-packages/langchain/prompts/chat.py", line 10, in <module>
from langchain.memory.buffer import get_buffer_string
File "venv/lib/python3.9/site-packages/langchain/memory/__init__.py", line 28, in <module>
from langchain.memory.vectorstore import VectorStoreRetrieverMemory
File "venv/lib/python3.9/site-packages/langchain/memory/vectorstore.py", line 10, in <module>
from langchain.vectorstores.base import VectorStoreRetriever
File "venv/lib/python3.9/site-packages/langchain/vectorstores/__init__.py", line 2, in <module>
from langchain.vectorstores.analyticdb import AnalyticDB
File "venv/lib/python3.9/site-packages/langchain/vectorstores/analyticdb.py", line 16, in <module>
from langchain.embeddings.base import Embeddings
File "venv/lib/python3.9/site-packages/langchain/embeddings/__init__.py", line 19, in <module>
from langchain.embeddings.openai import OpenAIEmbeddings
File "venv/lib/python3.9/site-packages/langchain/embeddings/openai.py", line 67, in <module>
class OpenAIEmbeddings(BaseModel, Embeddings):
File "pydantic/main.py", line 197, in pydantic.main.ModelMetaclass.__new__
File "pydantic/fields.py", line 506, in pydantic.fields.ModelField.infer
File "pydantic/fields.py", line 436, in pydantic.fields.ModelField.__init__
File "pydantic/fields.py", line 552, in pydantic.fields.ModelField.prepare
File "pydantic/fields.py", line 663, in pydantic.fields.ModelField._type_analysis
File "pydantic/fields.py", line 808, in pydantic.fields.ModelField._create_sub_type
File "pydantic/fields.py", line 436, in pydantic.fields.ModelField.__init__
File "pydantic/fields.py", line 552, in pydantic.fields.ModelField.prepare
File "pydantic/fields.py", line 668, in pydantic.fields.ModelField._type_analysis
File "/home/my_username/.pyenv/versions/3.9.16/lib/python3.9/typing.py", line 852, in __subclasscheck__
return issubclass(cls, self.__origin__)
TypeError: issubclass() arg 1 must be a class
这里是
lc_indexer.py
发生 langchain 导入的内容
# INDEX DOCUMENTS
import os
from os.path import join, isfile
from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import TokenTextSplitter, CharacterTextSplitter
from langchain.vectorstores import Chroma
def index_documents(source_directories: list[str], persist_directory: str, chunk_size: int = 1000,
chunk_overlap: int = 15):
"""
Indexe les documents venant des répertoires fournis
:param source_directories: list[str]
:param persist_directory: str
:param chunk_size: int = 1000
:param chunk_overlap: int = 15
:return:
"""
only_files = []
for directory in source_directories:
my_path = f'{directory}'
for f in os.listdir(my_path):
if isfile(join(my_path, f)):
only_files.append(f'{my_path}/{f}')
embeddings = OpenAIEmbeddings()
for file in only_files:
index_file_to_chroma(file, persist_directory, embeddings, chunk_size, chunk_overlap)
def index_file_to_chroma(file: str, persist_directory: str, embeddings: OpenAIEmbeddings, chunk_size: int, chunk_overlap: int):
"""
Indexe un document dans Chroma
:param embeddings: OpenAIEmbeddings
:param file: str
:param persist_directory: str
:param chunk_size: int
:param chunk_overlap: int
:return:
"""
loader = UnstructuredMarkdownLoader(file_path=file, encoding='utf8')
docs = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
pages = text_splitter.split_documents(docs)
text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
texts = text_splitter.split_documents(pages)
db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
db.persist()
print(f'Indexed file {file} for module {persist_directory}')
db = None
# /INDEX DOCUMENTS
此文件是从测试项目复制的,在尝试时根本没有发生此类错误,但它是从 CLI 进行测试的,因此可能会更改此处的某些内容。
已尝试将这些函数和导入复制到
main.py
文件中,但我遇到了相同的错误。
我尝试在
lc_indexer.py
中注释 index_documents
的导入和对 main.py
函数的调用,并且它启动没有问题。
问题的根源是什么? Langchain要求已安装
尝试将
typing-extensions
模块降级回版本 4.5.0
。
我刚刚遇到了版本
4.6.0
的问题,在 Poetry 更新依赖项后,该版本破坏了我的项目执行。降级了吗,现在又可以工作了。
花了很多时间后我找到了解决方案。我尝试降级 typingextension==4.5.0 和更多选项,但这对我不起作用。
这个问题的解决方案是安装这个:
pip 安装 pydantic==1.10.8
pip安装打字检查==0.8.0打字_扩展==4.5。
pip 安装 chromadb==0.3.26
修复了将
pydantic
更新到 1.10.8 并将 typing-extensions
更新到 4.12.2 的问题。
用 spacy v3.3.3 完成修复,检查 changelog 以获得稳定版本