๐[FastAPI] FastAPI์ ChatGPT๋ฅผ ์ฌ์ฉํ PDF ๊ด๋ จ ์ง์ ์๋ต
main.py
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from routers.upload import router as upload_router
from routers.read import router as read_router
from routers.index import router as index_router
from dotenv import load_dotenv
load_dotenv()
app = FastAPI()
app.mount("/static", StaticFiles(directory="static"), name="static")
app.include_router(index_router, prefix="")
app.include_router(upload_router, prefix="/upload")
app.include_router(read_router, prefix="/read")
app.mount("/static", StaticFiles(directory="static"), name="static")
: ์ ์ ํ์ผ ์๋น์ ์ํด, static ํด๋๋ฅผ ๋ช ์ํด์ค๋ค.app.include_router(index_router, prefix="")
:include_router
๋ฅผ ์ฌ์ฉํด main.py์์ ๋ชจ๋ ๋ก์ง์ ๊ด๋ฆฌํ์ง ์๊ณ , ๋ถ๋ฆฌํด์ ๊ด๋ฆฌํ ์ ์๋๋ก ํ๋ค.- Flask์์
BluePrint
์ ๋น์ทํ ์ญํ ์ด๋ผ๊ณ ์๊ฐํ ์ ์์ ๋ฏ ํจ! prefix=""
์ต์ ์ ์ค์ ํด, ์๋ ํฌ์ธํธ๋ฅผ ์ค์ ํด์ค๋ค!- django์
urls.py
์ ๋น์ทํ ์ญํ
- django์
- Flask์์
routers/upload.py
from fastapi import File, UploadFile, Form, APIRouter
from fastapi.responses import JSONResponse
from utils.load_pdf import load_pdf
from utils.save_answer import save_pdf_answer, save_none_pdf_answer
from utils.gpt_config import get_answer
from dotenv import load_dotenv
from utils.save_pdf import save_pdf
from datetime import datetime
load_dotenv()
router = APIRouter()
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
@router.post("")
async def upload_file(user_id: str = Form(...), pdf: UploadFile = File(None), question: str = Form(...)):
pdf_path = await save_pdf(pdf, user_id, now)
context = await load_pdf(pdf_path, question)
prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
answer = get_answer(prompt)
save_pdf_answer(user_id, pdf_path, answer, pdf, now)
return JSONResponse(content={"answer": answer})
@router.post("/nonepdf")
async def upload_file(user_id: str = Form(...), pdf: UploadFile = File(None), question: str = Form(...)):
prompt = f"Question: {question}\n\nAnswer:"
answer = get_answer(prompt)
save_none_pdf_answer(user_id, answer, pdf, now)
return JSONResponse(content={"answer": answer})
์์ ๊ฐ์ด ํ์ผ์ ์์ฑํ๋ฉด, pdf๋ฅผ ์ฒจ๋ถํ ์ํ์์์ ์ง๋ฌธ๊ณผ pdf๋ฅผ ์ฒจ๋ถํ์ง ์์ ์ํ์์์ ์ง๋ฌธ์ ์ฒ๋ฆฌํ ์ ์๋ค!
- pdf๋ฅผ ์ฒจ๋ถํ ์ํ์ ์๋ ํฌ์ธํธ: ~/chatbot
- pdf๋ฅผ ์ฒจ๋ถํ์ง ์์ ์ํ์ ์๋ ํฌ์ธํธ: ~/chatbot/nonepdf
์ฌ๊ธฐ์ async
์ await
๋ django, flask์์ ์ฌ์ฉํด๋ณธ ๊ฒฝํ์ด ์๋ ์์ฝ์ด๋ผ์ ์๋์ ์ ๋ฆฌํด๋ณธ๋ค!
์ต์ ํ์ด์ฌ ๋ฒ์ ์ async
์ await
๋ฌธ๋ฒ๊ณผ ํจ๊ป ์ฝ๋ฃจํด
์ด๋ผ๋ ๊ฒ์ ์ฌ์ฉํ๋ ๋น๋๊ธฐ ํ๋ก๊ทธ๋๋ฐ
๋ฅผ ์ง์ํ๋ค๊ณ ํ๋ค!
๋น๋๊ธฐ ํ๋ก๊ทธ๋๋ฐ
๋น๋๊ธฐ ํ๋ก๊ทธ๋๋ฐ
์ด๋, ํน์ ์ฝ๋์ ์คํ ์๋ฃ๋ฅผ ๊ธฐ๋ค๋ฆฌ์ง ์๊ณ ๋ค์ ์ฝ๋๋ฅผ ์คํํ๋ ํ๋ก๊ทธ๋๋ฐ ํจ๋ฌ๋ค์์ด๋ค.
I/O ์์
, ๋คํธ์ํฌ ์์ฒญ ๋ฑ์ ๋ธ๋กํน ์ฐ์ฐ์์ ํนํ ์ ์ฉํ๋ฉฐ, asyncio
๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ํตํด ๋น๋๊ธฐ ํ๋ก๊ทธ๋๋ฐ์ ์ฌ์ฉํ ์ ์๋ค!
async def & await
async def
: ๋น๋๊ธฐ ํจ์๋ฅผ ์ ์ธํ๋ ์์ฝ์ด์. ์ด๋ ๊ฒ ์ ์ธ๋ ํจ์๋ ์ฝ๋ฃจํด
์ด๋ผ๋ ๊ฒ์ ๋ฐํํ๋ค.
await
: ์ฝ๋ฃจํด์ ์คํ์ ์ผ์ ์ค์งํ๊ณ , ์๋ฃ๋ ๋๊น์ง ๊ธฐ๋ค๋ฆฌ๋ ์์ฝ์ด์. await
๋ async
ํจ์ ๋ด์์๋ง ์ฌ์ฉ ๊ฐ๋ฅํจ!
์ฝ๋ฃจํด
๊ทธ๋ผ ์ฝ๋ฃจํด์ด ๋ญ๊น?
์๋ฅผ ๋ค์ด, ํ ๋ฒ์ ์ฌ๋ฌ ๊ฐ์ ์ฃผ๋ฌธ์ด ๋์์ ๋ค์ด์๋๋ฐ ์๋ฆฌ์ฌ๋ ํ ๋ช ์ด๋ค.
๊ทธ๋ผ ์๋ฆฌ์ฌ๋ ์ฌ๋ฌ ์๋ฆฌ๋ฅผ ๋์์ ๋ง๋ค์ด์ผ ํ๋๋ฐ, ์ด๋ฐ ๋ฐฉ์์ ๋ณ๋ ฌ์ ์ผ๋ก ์ฒ๋ฆฌํ๋ค๊ณ ๋งํ๋ค.
์ด๋ฐ ์ํฉ์์ ์๋ฆฌ์ฌ๋ ์ฝ๋ฃจํด
๊ณผ ๊ฐ์ด ๋์ํ๋ ๊ฒ์ด๋ผ๊ณ ์๊ฐํ๋ฉด ๋๋ค!
- ์ฌ๋ฌ ์์ ์ ๋ฒ๊ฐ์ ์ํ: ํ๋์ ์์ ์ด ๋๋๊ธฐ๋ฅผ ๊ธฐ๋ค๋ฆฌ๋ ๋์, ๋ค๋ฅธ ์์ ์ ์ํํ ์ ์์.
- ์์ ์ ์ค๋จ๊ณผ ์ฌ๊ฐ: ์งํ ์ค์ด๋ ์์ ์์ ๋ค๋ฅธ ์์ ์ผ๋ก ์ ํํ ๋, ํ์ฌ ์ํ๋ฅผ ๊ธฐ์ตํด์ผ ํ๋ค. ์ด ์ฒ๋ผ ์ฝ๋ฃจํด์ ์ค๋จ๋ ์ง์ ์ ๊ธฐ์ตํ๊ณ , ๋์ค์ ๊ทธ ์ง์ ๋ถํฐ ์์ ์ ์ฌ๊ฐํ๋ค.
- ํจ์จ์ ์ธ ์์ ๊ด๋ฆฌ: ์ฌ๋ฌ ์์ ์ ๋ณ๋ ฌ์ ์ผ๋ก ์ฒ๋ฆฌํจ์ผ๋ก์จ ์๊ฐ์ ์ ์ฝํ ์ ์๋ค.
ํ์ด์ฌ์ async
์ await
๋ฅผ ์ฌ์ฉํด ์ฝ๋ฃจํด์ ๋ง๋ค๊ณ , async
๋ก ์ ์๋ ํจ์๋ โํ ์ผ ๋ชฉ๋กโ, await
๋ โ์ด ํ ์ผ์ด ๋๋ ๋๊น์ง ๊ธฐ๋ค๋ฆฌ๋ ๊ฒโ ์ด๋ผ๊ณ ์๊ฐํ ์ ์๋ค!
์ฆ, ์ฝ๋ฃจํด์ ์ฌ๋ฌ ์ผ์ ๋์์ ํ๋ฉด์, ๊ฐ๊ฐ์ ์ผ์ด ์๋ก ๋ฐฉํด๋ฐ์ง ์๋๋ก ํ๋ ๋ฐฉ๋ฒ์ด๋ค!
utils ํด๋
๋๋ controller์์ ์ฌ์ฉ๋๋ ๋ก์ง์ด ๊ธธ๊ณ ๋ณต์กํ ๊ฒฝ์ฐ, utils
ํ์ผ์ ์์ฑํ์ฌ ๋ฉ์๋๋ก ๊ด๋ฆฌํ๋ ๊ฒ์ ์ข์ํ๋ค.
AOP ๋ฐฉ์์ด๋ผ๊ณ ํ ์ ์์์ง๋ ๋ชจ๋ฅด๊ฒ ์ง๋ง, ์ฐ์ ์์ฃผ ์ฌ์ฉ๋๋ ๋ก์ง ํน์ ํ๋ก์ ํธ ์ ์ฒด์ ๊ฑธ์ณ ์ฌ์ฉ๋๋ ๋ก์ง, ๋์ค(Depth)๊ฐ ๊น์ด ๊ฐ๋ ์ฑ์ ์ ํดํ๋ ์ฝ๋ ๋ฑ์ ๋ฐ๋ก ๋ชจ๋ํํ์ฌ ๊ด๋ฆฌํ๋ ๋ฐฉ์์ด๋ค.
๋จผ์ ChatGPT API๋ฅผ ์ฌ์ฉํ์ฌ ๋ชจ๋ธ์ ์ ์ ํ๋ ๋ฑ์ ์ค์ ์ ์ ์ํ๋ git_config.py
, pdf๋ฅผ loadํ์ฌ ์ฝ๋ load_pdf.py
, ์ฒจ๋ถ๋ pdf๋ฅผ ์ง์ ๋ ๊ฒฝ๋ก์ ์ ์ฅํ๋ save_pdf.py
, ChatGPT์ ๋ต๋ณ์ ์ง์ ๋ ๊ฒฝ๋ก์ ์ ์ฅํ๋ save_answer.py
๊ฐ ์๋ค.
gpt_config.py
import os
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
OPENAI_KEY = os.getenv("OPENAI_KEY")
MODEL = "gpt-3.5-turbo"
client = OpenAI(
api_key = OPENAI_KEY
)
def get_answer(prompt: str):
response = client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
)
return response.choices[0].message.content
load_pdf.py
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from utils.gpt_config import OPENAI_KEY
async def load_pdf(pdf_path: str, question: str) -> str:
loader = PyPDFLoader(pdf_path)
pages = loader.load_and_split()
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_KEY)
faiss_index = FAISS.from_documents(pages, embeddings)
docs = faiss_index.similarity_search(question, k=4)
context = "\n\n".join([doc.page_content for doc in docs])
return context
save_answer.py
import os
from fastapi import File, UploadFile
def save_pdf_answer(user_id: str, pdf_path: str, answer: str, pdf: UploadFile = File(None), time: str = None) -> bool:
try:
answer_filename = f"answers/pdfs/{os.path.basename(pdf_path) if pdf else 'no_pdf'}_{time}.txt"
os.makedirs(os.path.dirname(answer_filename), exist_ok=True)
with open(answer_filename, "w") as f:
f.write(answer)
return True
except Exception as e:
print(e)
return False
def save_none_pdf_answer(user_id: str, answer: str, pdf: UploadFile = File(None), time: str = None) -> bool:
try:
answer_filename = f"answers/nonepdfs/{user_id}_none_pdf_{time}.txt"
os.makedirs(os.path.dirname(answer_filename), exist_ok=True)
with open(answer_filename, "w") as f:
f.write(answer)
return True
except Exception as e:
print(e)
return False
save_pdf.py
from fastapi import UploadFile
async def save_pdf(pdf: UploadFile, user_id: str, time: str) -> str:
try:
pdf_path = f"pdfs/{user_id}_{pdf.filename}_{time}"
with open(pdf_path, "wb") as buffer:
buffer.write(await pdf.read())
return pdf_path
except Exception as e:
print(e)
return None
static ํด๋
static ํด๋์์๋ html, css, javascript ํ์ผ์ ๊ด๋ฆฌํ๋ค.
index.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>PDF Q&A System</title>
<link rel="stylesheet" href="/static/styles.css">
</head>
<body>
<div class="container">
<h1>PDF Q&A System</h1>
<form id="uploadForm">
<input type="text" id="userId" placeholder="User ID" required>
<input type="file" id="pdfFile" accept=".pdf">
<textarea id="question" placeholder="Enter your question" required></textarea>
<button type="submit">Submit</button>
</form>
<div id="answer"></div>
</div>
<script src="/static/script.js"></script>
</body>
</html>
script.js
// seperate situation of pdf and none pdf
document.getElementById('uploadForm').addEventListener('submit', async (e) => {
e.preventDefault();
const userId = document.getElementById('userId').value;
const pdfFile = document.getElementById('pdfFile').files[0];
const question = document.getElementById('question').value;
const formData = new FormData();
formData.append('user_id', userId);
formData.append('question', question);
if (pdfFile) {
formData.append('pdf', pdfFile);
try {
const response = await fetch('/upload/', {
method: 'POST',
body: formData
});
const data = await response.json();
document.getElementById('answer').innerHTML = `<h2>Answer:</h2><p>${data.answer}</p>`;
} catch (error) {
console.error('Error:', error);
document.getElementById('answer').innerHTML = '<p>An error occurred. Please try again.</p>';
}
}
else {
try{
const response = await fetch('/upload/nonepdf/', {
method: 'POST',
body: formData
});
const data = await response.json();
document.getElementById('answer').innerHTML = `<h2>Answer:</h2><p>${data.answer}</p>`;
} catch (error) {
console.error('Error:', error);
document.getElementById('answer').innerHTML = '<p>An error occurred. Please try again.</p>';
}
}
});
- pdf๋ฅผ ์ฒจ๋ถํ๋์ง ์ฌ๋ถ์ ๋ฐ๋ผ ์๋ํฌ์ธํธ๋ฅผ ๋ค๋ฅด๊ฒ ์ค์ ํ์ฌ POST ์์ฒญ์๋ณด๋ธ๋ค!
styles.css
body {
font-family: Arial, sans-serif;
line-height: 1.6;
margin: 0;
padding: 0;
background-color: #f4f4f4;
}
.container {
width: 80%;
margin: auto;
overflow: hidden;
padding: 20px;
}
form {
background: #fff;
padding: 20px;
margin-bottom: 20px;
}
input[type="text"], input[type="file"], textarea {
width: 100%;
padding: 10px;
margin-bottom: 10px;
}
button {
display: block;
width: 100%;
padding: 10px;
background: #333;
color: #fff;
border: none;
cursor: pointer;
}
button:hover {
background: #555;
}
#answer {
background: #fff;
padding: 20px;
margin-top: 20px;
}
์ฐธ๊ณ ์๋ฃ
๋์์ฑ๊ณผ async / await - FastAPI
ํ์ด์ฌ ๋น๋๊ธฐ(async)ํจ์์ ์ฝ๋ฃจํด(coroutine) ํ๋ฆ ์ดํดํ๊ธฐ
๋๊ธ๋จ๊ธฐ๊ธฐ