这是我的完整代码:
!pip install -q transformers einops accelerate langchain bitsandbytes sentence_transformers faiss-cpu pypdf sentencepiece
from langchain import HuggingFacePipeline
from transformers import AutoTokenizer
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.vectorstores import FAISS, Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.question_answering import load_qa_chain
from langchain.memory import ConversationBufferMemory
import accelerate
import transformers
import torch
import textwrap
loader = CSVLoader('/kaggle/input/csvdata/chatdata.csv', encoding="utf-8", csv_args={'delimiter': ','})
data = loader.load()
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cpu'})
db = FAISS.from_documents(data, embeddings)
#Mistral 7B model llm
import torch
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
GenerationConfig,
TextStreamer,
pipeline,
)
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME, device_map="auto", torch_dtype=torch.float16, load_in_8bit=True
)
generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.do_sample = True
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
llm = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
return_full_text=True,
generation_config=generation_config,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
streamer=streamer,
)
def format_prompt(prompt, system_prompt=""):
if system_prompt.strip():
return f"[INST] {system_prompt} {prompt} [/INST]"
return f"[INST] {prompt} [/INST]"
SYSTEM_PROMPT = """
You are a Clinical Data Scientist and Data Analyst specializing in statistical data analysis and report generation. Your mission is to provide accurate and insightful data-driven solutions for healthcare and clinical research. As you respond, channel the expertise and precision typical of a seasoned data professional in the field of clinical data science.
If you encounter a question for which you don't have the necessary information, it's important to refrain from providing speculative or inaccurate answers.
""".strip()
chain = ConversationalRetrievalChain.from_llm(
llm,
chain_type="stuff",
retriever=db.as_retriever(),
return_source_documents=True,
verbose=True,
)
这里我面临错误:
ValidationError: 2 validation errors for LLMChain
llm
instance of Runnable expected (type=type_error.arbitrary_type; expected_arbitrary_type=Runnable)
llm
instance of Runnable expected (type=type_error.arbitrary_type; expected_arbitrary_type=Runnable)
from textwrap import fill
result = chain(input("ClinicalTrial Planimeter ChatBot ---")
)
print(fill(result["result"].strip(), width=80))
此 llm 链被编程为使用 llm、矢量数据库和提示与 csv 聊天,我在运行 ConversationalRetrievalChain 时遇到上述错误
我想你只需要升级langchain版本
pip install langchain --upgrade
它为我解决了错误。