我正在研究流行的 PDF 聊天项目。我的设置方式是将所有 LLM 代码放在 Python 脚本中,并使用 Flask 将其设置为我可以从前端调用的 API。现在,当我启动应用程序时,我会执行正常的“npm run dev”,然后我还必须执行“python llmpython.py”来启动我的Python脚本。有什么有效的方法可以做到这一点,以便当我的应用程序启动时,Python 脚本也会启动?如果我在网上制作这个,它会如何运作?
这是我的 llmpython.py 文件:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering import load_qa_chain
from flask import Flask, jsonify, request
from flask_cors import CORS, cross_origin
#Setting Environment variables
from dotenv import load_dotenv
import os
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
# app instance
app = Flask(__name__)
CORS(app)
@cross_origin()
@app.route("/api/home", methods=['POST'])
def chat_document():
data = request.get_json()
pdfUrl = data['url']
query = data['chat']
#Load PDF
#The url should be coming from the front end through a post request
loader = PyPDFLoader(pdfUrl)
if loader:
data = loader.load_and_split()
else:
return "Error loading PDF"
#Text Splitting
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(data)
#Embedding and vector storage
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
vectorstore = FAISS.from_documents(texts, embeddings)
#query
# query = "What's the main point of the document?"
docs = vectorstore.similarity_search(query)
#Load LLM and chatchain
llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff")
llmresponse = chain.run(input_documents=docs, question=query)
response = jsonify({
'message': llmresponse,
'role': 'ai'
})
response.headers.add('Access-Control-Allow-Origin', '*')
return response
@app.route("/api/guest", methods=['POST'])
def guest_document():
data = request.get_json()
pdfUrl = data['url']
query1 = data['chat1']
query2 = data['chat2']
#Load PDF
#The url should be coming from the front end through a post request
loader = PyPDFLoader(pdfUrl)
if loader:
data = loader.load_and_split()
else:
return "Error loading PDF"
#Text Splitting
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(data)
#Embedding and vector storage
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
vectorstore = FAISS.from_documents(texts, embeddings)
#query
# query = "What's the main point of the document?"
docs1 = vectorstore.similarity_search(query1)
docs2 = vectorstore.similarity_search(query2)
#Load LLM and chatchain
llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff")
llmresponse1 = chain.run(input_documents=docs1, question=query1)
llmresponse2 = chain.run(input_documents=docs2, question=query2)
response = jsonify({
'message1': llmresponse1,
'message2': llmresponse2,
'role': 'ai'
})
response.headers.add('Access-Control-Allow-Origin', '*')
return response
if __name__ == "__main__":
app.run(debug=True, port=8080)
这是我调用 Flask 应用程序的组件之一:
import { fileName, guestpdfUrl } from "@/components/Hero";
import { Button } from "@/components/ui/button";
import { useState } from "react";
import TabsSec from "./TabsSec";
const Guest = () => {
const [summary, setSummary] = useState<string>(""); // <-- specify type here
const [bulletSummary, setBulletSummary] = useState<string>(""); // <-- specify type here
const [isLoading, setIsLoading] = useState<boolean>(false); // <-- specify type here
const processDocument = (event: React.FormEvent) => {
event.preventDefault();
setIsLoading(true);
fetch("http://localhost:8080/api/guest", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
url: guestpdfUrl,
chat1: "Create a summary of this text",
chat2: "Create a 10 bullet point summary of this text",
}),
})
.then((response) => response.json())
.then((data) => {
console.log(data.message2);
setSummary(data.message1);
setBulletSummary(data.message2);
setIsLoading(false);
});
};
return (
<div className="flex items-center justify-center flex-col">
<div className=" text-[#202942] mb-4 text-4xl md:text-5xl tracking-tight font-extrabold">
Welcome Guest
</div>
<div className=" text-[#202942] my-4 text-center text-xl md:text-2xl tracking-tight font-extrabold">
You've uploaded a PDF called {fileName}
</div>
<div className="mb-8">
<Button
className="rounded-full bg-[#202942] text-[#dfeff4]
hover:bg-[#3a435e]
font-bold text-sm md:text-base py-2 px-3"
onClick={processDocument}
>
Process Document
</Button>
</div>
<div></div>
<TabsSec
summary={summary}
bulletSummary={bulletSummary}
isLoading={isLoading}
/>{" "}
</div>
);
};
export default Guest;
您如何建议我实现这一目标。我正在考虑使用 exec 子进程在 useEffect 中运行“python llmpython.py”,但看起来这是不可能的。我将不胜感激任何建议
有一个名为supervisor的工具,它允许您管理后台进程并获取错误和输出日志。我建议你使用gunicorn+supervisor运行你的python应用程序。我对 NextJS 不太了解,但是 https://nextjs.org/learn-pages-router/basics/deploying-nextjs-app 看看这个。
另外不要忘记在不同的端口上运行它们,并且不要直接使用端口
80
和 443
。相反,请使用像 NGINX
这样的代理。还要确保设置后您的 Flask API 在 localhost 之外可用,这意味着您需要使用 host=yourdomain.com
才能从 nextjs 访问。
这里有一个视频可能会有所帮助:https://www.youtube.com/watch?v=goToXTC96Co&list=PL-osiE80TeTs4UjLw5MM6OjgkjFeUxCYH&index=13