我需要使用 LLM Llama 3.1 8B Instruct 进行抽象文本摘要。我的问题是如何一步步完成这个任务?我目前的程序是:
这是正确的程序还是我可以做得更好?
LLM参数训练的应用
我将提供一个代码片段,这将指导您如何构建自己的法学硕士。我建议你学习一些方法,例如 RAG-Rank、RAG-Fusion。
第一步是下载
ollama
,请遵循此链接,无论何时您想运行我在下面提供的Python类,始终确保应用程序正在运行。
import ollama
from operator import itemgetter
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredPDFLoader
from ollama.embeddings import OllamaEmbeddings
from ollama import ChatOllama
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_core.runnables import RunnablePassthrough
# Define a prompt for the RAG model
SYSTEM_PROMPT = """ You are very helpful assistant and also
an expert in summarizing documents,
use all available resources to generate answers to the questions asked.
You can return queries that you think are relevant to the question asked. \
You will be asked to summarize a document here is the question: {question} \n
"""
def read_pdf_file(data):
"""
Retrieve the data from the file
Args: data: The data to be retrieved
Returns: list: List of documents
"""
local_path = "your_pdf_file_path"
try:
# Local PDF file uploads
if local_path:
loader = UnstructuredPDFLoader(file_path=local_path)
data = loader.load()
else:
logging.error("Upload a PDF file")
except Exception as e:
raise # Re-raise the exception to stop further execution
# Split and chunk
text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
chunks = text_splitter.split_documents(data)
try:
ollama.embeddings(
model="llama_3.1_8B_instruct",
# prompt='Llamas are members of the camelid family',
)
embedding_model = (OllamaEmbeddings(model="llama_3.1_8B_instruct"),)
vectorstore_db = FAISS.from_documents(
documents=chunks, embedding=embedding_model
)
vectorstore_db.save_local("faiss_index")
vector_retriever = vectorstore_db.as_retriever()
except Exception as e:
raise # Re-raise the exception to stop further execution
return vector_retriever
# RAG FUSION
def main(self) -> str:
"""
Implement the RAG fusion
Args:
question: The question to be answered
Returns:
str: The answer to the question
"""
LLM_PROMPT = PromptTemplate(
input_variables=["question"],
template="""You are an AI language model assistant. Your task is to generate five
summaries. Provide these alternative questions separated by newlines.
Original question: {question}""",
)
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
local_model = "llama_3.1_8B_instruct." #find the real name from ollama and download it
llm = ChatOllama(model=local_model)
retriever = read_pdf_file(self.data)
retriever = MultiQueryRetriever.from_llm(
retriever, llm, prompt=LLM_PROMPT
)
chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
response = chain.invoke("summarize this documents about climate change")
print(response)