我是聊天机器人开发人员世界的初学者,目前正在构建一个碎布代码来创建基于上下文的聊天机器人,但我不断收到此错误,我相信它发生在文本被分割时,因为即使在调用该函数之后,案文仍保留“ ” 分隔符。 回溯的最后一行发生在 Huggingface 库中。
回溯:
Traceback (most recent call last):
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "c:\Users\sophi\.vscode\extensions\ms-python.debugpy-2024.6.0-win32-x64\bundled\libs\debugpy\__main__.py", line 39, in <module>
cli.main()
File "c:\Users\sophi\.vscode\extensions\ms-python.debugpy-2024.6.0-win32-x64\bundled\libs\debugpy/..\debugpy\server\cli.py", line 430, in main
run()
File "c:\Users\sophi\.vscode\extensions\ms-python.debugpy-2024.6.0-win32-x64\bundled\libs\debugpy/..\debugpy\server\cli.py", line 284, in run_file
runpy.run_path(target, run_name="__main__")
File "c:\Users\sophi\.vscode\extensions\ms-python.debugpy-2024.6.0-win32-x64\bundled\libs\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 321, in run_path
return _run_module_code(code, init_globals, run_name,
File "c:\Users\sophi\.vscode\extensions\ms-python.debugpy-2024.6.0-win32-x64\bundled\libs\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 135, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "c:\Users\sophi\.vscode\extensions\ms-python.debugpy-2024.6.0-win32-x64\bundled\libs\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 124, in _run_code
exec(code, run_globals)
File "c:\Users\sophi\Documents\ProjetosdePesquisa\Projeto-de-Pesquisa-SOLIRIS\llm_rag _ver4\utils\rag.py", line 130, in <module>
main()
File "c:\Users\sophi\Documents\ProjetosdePesquisa\Projeto-de-Pesquisa-SOLIRIS\llm_rag _ver4\utils\rag.py", line 126, in main
response = qa.invoke({"input": {"context": context, "question": question}})
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_core\runnables\base.py", line 4588, in invoke
return self.bound.invoke(
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_core\runnables\base.py", line 2505, in invoke
input = step.invoke(input, config, **kwargs)
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_core\runnables\passthrough.py", line 469, in invoke
return self._call_with_config(self._invoke, input, config, **kwargs)
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_core\runnables\base.py", line 1599, in _call_with_config
context.run(
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_core\runnables\config.py", line 380, in call_func_with_variable_args
return func(input, **kwargs) # type: ignore[call-arg]
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_core\runnables\passthrough.py", line 456, in _invoke
**self.mapper.invoke(
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_core\runnables\base.py", line 3152, in invoke
output = {key: future.result() for key, future in zip(steps, futures)}
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_core\runnables\base.py", line 3152, in <dictcomp>
output = {key: future.result() for key, future in zip(steps, futures)}
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\concurrent\futures\_base.py", line 446, in result
return self.__get_result()
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\concurrent\futures\_base.py", line 391, in __get_result
raise self._exception
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\concurrent\futures\thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_core\runnables\base.py", line 4588, in invoke
return self.bound.invoke(
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_core\runnables\base.py", line 2507, in invoke
input = step.invoke(input, config)
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_core\retrievers.py", line 221, in invoke
raise e
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_core\retrievers.py", line 214, in invoke
result = self._get_relevant_documents(
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_core\vectorstores.py", line 797, in _get_relevant_documents
docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_community\vectorstores\chroma.py", line 349, in similarity_search
docs_and_scores = self.similarity_search_with_score(
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_community\vectorstores\chroma.py", line 438, in similarity_search_with_score
query_embedding = self._embedding_function.embed_query(query)
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_huggingface\embeddings\huggingface.py", line 102, in embed_query
return self.embed_documents([text])[0]
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-packages\langchain_huggingface\embeddings\huggingface.py", line 81, in embed_documents
texts = list(map(lambda x: x.replace("\n", " "), texts))
File "c:\Users\sophi\miniconda3\envs\ambiente3.9\lib\site-**packages\langchain_huggingface\embeddings\huggingface.py", line 81, in <lambda>
texts = list(map(lambda x: x.replace("\n", " "), texts))
AttributeError: 'dict' object has no attribute 'replace'**
import sys
import os
from langchain_core.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain.chains import create_retrieval_chain
from langchain_community.document_loaders import TextLoader
from sentence_transformers import SentenceTransformer
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.vectorstores import Chroma
from langchain.docstore.document import Document
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.llms import CTransformers
# Caminho para o arquivo PDF
PDF_PATH = 'pdf_handling/entrevistas.pdf'
# Caminho para salvar os dados do ChromaDB
CHROMA_DATA_PATH = "chroma_data"
# Modelo de embeddings
EMBED_MODEL = "all-MiniLM-L6-v2"
# Nome da coleção
COLLECTION_NAME = "ruth_docs"
def dict_to_string(input_dict):
# Convert the dictionary into a string representation
# This uses a list comprehension to create a list of "key: value" strings
# and then joins them with a comma and a space.
return ', '.join([f"{key}: {value}" for key, value in input_dict.items()])
# Função para extrair texto de um PDF e retornar uma lista de objetos Document
def extract_text_from_pdf(file_path):
try:
with open(file_path, 'rb') as pdf_file:
pdf = PyPDF2.PdfReader(pdf_file)
paginas = len(pdf.pages)
text = ""
for i in range(paginas):
page = pdf.pages[i]
text += page.extract_text()
# print(type(text))
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50,
length_function=len,
separators=['\n\n\n','\n\n','\n', ' ', '']
)
documents = text_splitter.create_documents([text])
splitted_documents = text_splitter.split_documents(documents)
# print(documents)
# print("---------------------- vs ---------------------")
# print(splitted_documents)
return splitted_documents
except FileNotFoundError:
print("Arquivo não encontrado")
return []
class criar_vectordb:
def save_db(self, documents, embeddings, db_path):
self.db_path = db_path
self.embeddings = embeddings
self.documents = documents
input=self.documents
vectordb = Chroma.from_documents(input, self.embeddings, persist_directory=self.db_path)
vectordb = None
vectordb = Chroma(db_path, embeddings)
return vectordb
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device':'cpu'})
# Extraindo texto do PDF e criando a base de dados vetorial
documents = extract_text_from_pdf(PDF_PATH)
vectordb = criar_vectordb().save_db(documents, embeddings, CHROMA_DATA_PATH)
os.environ["GROQ_API_KEY"] = "-"
ruth_prompt_template = """
Você é um assistente virtual de RH utilizando documentos para embasar sua resposta sempre em fatos,
Use as informações presentes no documento para responder a resposta do candidato,
sua resposta deve ser o mais semelhante possível com a descrição presente nos documentos
contexto: {context}
pergunta: {question}
Apenas retorne as respostas úteis em ajudar na avaliação e seleção de candidatos e nada mais, usando uma linguagem gentil e empática.
Sempre responda em português, uma descrição em texto contínua, além disso adicione
um ou mais emojis às vezes para demonstrar empatia e emoção.
"""
prompt = PromptTemplate(template=ruth_prompt_template, input_variables=['context', 'question'])
'''
llm = CTransformers(
model = "model/llama-2-7b-chat.ggmlv3.q8_0.bin",
model_type = "llama",
config={'max_new_tokens': 512,
'temperature': 0.03,
'context_length': 1000,
'repetition_penalty': 1.15}
)
'''
llm = ChatGroq(model_name="llama3-70b-8192", api_key=os.environ["GROQ_API_KEY"])
retriever = vectordb.as_retriever(search_kwargs={"k": 2})
combine_docs_chain = create_stuff_documents_chain(
llm, prompt
)
qa = create_retrieval_chain(retriever, combine_docs_chain)
# Main
def main():
# Exemplo de uso
context = "Feedback negativo"
question = "Como você lida com feedback negativo?"
response = qa.invoke({"input": {"context": context, "question": question}})
print(response)
if __name__ == "__main__":
main()
from typing import Any, Dict, List, Optional
from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, Extra, Field
DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
class HuggingFaceEmbeddings(BaseModel, Embeddings):
"""HuggingFace sentence_transformers embedding models.
To use, you should have the ``sentence_transformers`` python package installed.
Example:
.. code-block:: python
from langchain_huggingface import HuggingFaceEmbeddings
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
"""
client: Any #: :meta private:
model_name: str = DEFAULT_MODEL_NAME
"""Model name to use."""
cache_folder: Optional[str] = None
"""Path to store models.
Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable."""
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Keyword arguments to pass to the Sentence Transformer model, such as `device`,
`prompts`, `default_prompt_name`, `revision`, `trust_remote_code`, or `token`.
See also the Sentence Transformer documentation: https://sbert.net/docs/package_reference/SentenceTransformer.html#sentence_transformers.SentenceTransformer"""
encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Keyword arguments to pass when calling the `encode` method of the Sentence
Transformer model, such as `prompt_name`, `prompt`, `batch_size`, `precision`,
`normalize_embeddings`, and more.
See also the Sentence Transformer documentation: https://sbert.net/docs/package_reference/SentenceTransformer.html#sentence_transformers.SentenceTransformer.encode"""
multi_process: bool = False
"""Run encode() on multiple GPUs."""
show_progress: bool = False
"""Whether to show a progress bar."""
def __init__(self, **kwargs: Any):
"""Initialize the sentence_transformer."""
super().__init__(**kwargs)
try:
import sentence_transformers # type: ignore[import]
except ImportError as exc:
raise ImportError(
"Could not import sentence_transformers python package. "
"Please install it with `pip install sentence-transformers`."
) from exc
self.client = sentence_transformers.SentenceTransformer(
self.model_name, cache_folder=self.cache_folder, **self.model_kwargs
)
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Compute doc embeddings using a HuggingFace transformer model.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
import sentence_transformers # type: ignore[import]
texts = list(map(lambda x: x.replace("\n", " "), texts))
if self.multi_process:
pool = self.client.start_multi_process_pool()
embeddings = self.client.encode_multi_process(texts, pool)
sentence_transformers.SentenceTransformer.stop_multi_process_pool(pool)
else:
embeddings = self.client.encode(
texts, show_progress_bar=self.show_progress, **self.encode_kwargs
)
return embeddings.tolist()
def embed_query(self, text: str) -> List[float]:
"""Compute query embeddings using a HuggingFace transformer model.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
return self.embed_documents([text])[0]
在调试时,我尝试使用 split_text() 和 split_documents() 而不是 create_documents(),但它也不起作用,它们都给了我相同的输出:此错误,并且我的文本仍然包含所有“ “。我不知道它是否可能是代码中的其他内容,因为这是处理分隔符的唯一部分。 请帮忙! 谢谢!
我有类似的错误,结果我传递的是
list
而不是提示str
。dict
:
response = qa.invoke({"input": {"context": context, "question": question}})
代替字符串。因此,你会得到错误:
texts = list(map(lambda x: x.replace("\n", " "), texts))
AttributeError: 'dict' object has no attribute 'replace'**
因为当您提供
str
时,它期待着 dict
。