我有这个 LCEL 解决方案:
from langchain.document_loaders.pdf import PyMuPDFLoader
import os
from typing import List, Tuple
from dotenv import load_dotenv
from langchain.prompts.prompt import PromptTemplate
from langchain.schema import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import (
RunnableParallel,
RunnableLambda
)
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOllama
from langchain.embeddings import GPT4AllEmbeddings
from langchain.vectorstores import Qdrant
from langchain.utils.math import cosine_similarity
load_dotenv()
# Define a dictionary to map file extensions to their respective loaders
loaders = {
'.pdf': PyMuPDFLoader,
'.txt': TextLoader
}
# Define a function to create a DirectoryLoader for a specific file type
def create_directory_loader(file_type, directory_path):
return DirectoryLoader(
path=directory_path,
glob=f"**/*{file_type}",
loader_cls=loaders[file_type],
show_progress=True,
use_multithreading=True
)
dirpath = os.environ.get('DOCS_DIRECTORY')
pdf_loader = create_directory_loader('.pdf', dirpath)
txt_loader = create_directory_loader('.txt', dirpath)
pdfs = pdf_loader.load()
texts = txt_loader.load()
full_text = ''
for paper in texts:
full_text = full_text + paper.page_content
for paper in pdfs:
full_text = full_text + paper.page_content
full_text = " ".join(l for l in full_text.splitlines() if l)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1024,
chunk_overlap=100
)
document_chunks = text_splitter.create_documents([full_text])
embeddings = GPT4AllEmbeddings()
url = 'http://0.0.0.0:6333'
qdrant = Qdrant.from_documents(
documents=document_chunks,
embedding=embeddings,
url=url,
collection_name="v2_local",
prefer_grpc=True,
force_recreate=True
)
# do two different kinda searches using
# different algoriths, then submit both combined
# as context.
def _custom_retriever(inputs):
question = inputs["question"]
sim_search = qdrant.similarity_search(query=question, k=2)
marg_search = qdrant.max_marginal_relevance_search(query=question, k=5)
combined_retrieved_context = sim_search[0].page_content + \
marg_search[0].page_content
return combined_retrieved_context
# Ollama
ollama_llm = os.environ.get('MODEL_NAME') or "llama2:chat"
model = ChatOllama(model=ollama_llm, temperature=0.9)
def prepare_prompts(prompts):
prompt_templates = [item.prompt for item in prompts]
prompt_embeddings = embeddings.embed_documents(prompt_templates)
return [prompt_embeddings, prompt_templates]
def prompt_router(input):
prompt_embeddings, prompt_templates = prepare_prompts(input["prompt"])
query_embedding = embeddings.embed_query(input["question"])
similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
most_similar = prompt_templates[similarity.argmax()]
return PromptTemplate.from_template(most_similar)
def _format_chat_history(chat_history: List[List[str]]) -> List:
buffer = []
for human, ai in chat_history:
buffer.append(HumanMessage(content=f'[INST]{human}[/INST]'))
buffer.append(AIMessage(content=ai))
return buffer
class PromptItem(BaseModel):
name: str
prompt: str
class ChatHistory(BaseModel):
chat_history: List[Tuple[str, str]] = Field(..., extra={
"widget": {"type": "chat"}})
question: str
prompt: List[PromptItem]
_inputs = RunnableParallel(
{
"context": _custom_retriever,
"question": lambda x: x["question"],
"chat_history": lambda x: _format_chat_history(x["chat_history"]),
"prompt": lambda x: x["prompt"]
}
).with_types(input_type=ChatHistory)
chain = _inputs | RunnableLambda(prompt_router) | model | StrOutputParser()
它目前工作正常,但对于任何与计算相关的问题来说绝对很糟糕。我想通过代理将
llm-math
工具包含到该链中。
尝试通过使用 agents LCEL 示例以我的方式工作,甚至会失败并出现 ValueError 使用他们的默认食谱示例
我试图实现的最终结果是添加通过像
llm-math
这样的代理工具处理数学计算的能力,与上面通过 LCEL 的当前方法相结合。
从您提到的问题/讨论#15634中,它会在
langchain-ai/langchain@libs/langchain/langchain/agents/output_parsers/xml.py def parse(self, text: str)
上失败
def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
if "</tool>" in text:
tool, tool_input = text.split("</tool>")
_tool = tool.split("<tool>")[1]
_tool_input = tool_input.split("<tool_input>")[1]
if "</tool_input>" in _tool_input:
_tool_input = _tool_input.split("</tool_input>")[0]
return AgentAction(tool=_tool, tool_input=_tool_input, log=text)
elif "<final_answer>" in text:
_, answer = text.split("<final_answer>")
if "</final_answer>" in answer:
answer = answer.split("</final_answer>")[0]
return AgentFinish(return_values={"output": answer}, log=text)
else:
raise ValueError
这意味着当输入文本不符合该方法设计处理的预期 XML 结构时,将触发
ValueError
文件中的 parse
方法中的 langchain-ai/langchain
。具体来说:
xml.py
和
<tool>
标签:该方法需要输入文本中包含 <final_answer>
或 </tool>
标签。如果这些标签都不存在,则该方法将无法将输入正确分类为 <final_answer>
或 AgentAction
。这种情况会导致 AgentFinish
分支,引发 else
。
ValueError
、
<tool>
或<tool_input>
未正确闭合(例如,缺少相应的结束标签,如<final_answer>
),则字符串拆分操作将无法按预期进行。这可能会导致工具名称、工具输入或最终答案的解析不完整或不正确。
</tool>
出现在
<tool_input>
之前),基于这些标签的字符串拆分将产生不正确的结果,从而导致解析错误。
<tool>
、
PyMuPDFLoader
)。如果文档具有特殊格式或非标准换行符,则连接不同文档中的文本和分割行的方法可能会导致意外结果。
TextLoader
函数执行相似性和相关性搜索。如果
_custom_retriever
对象未按预期运行,例如,如果查询未返回预期结果,或者搜索算法存在问题,则可能会发生错误。所以:
首先确认 ValueError 正是