| import os |
| from langchain.embeddings.openai import OpenAIEmbeddings |
| from langchain.text_splitter import RecursiveCharacterTextSplitter |
| from langchain.vectorstores import Chroma |
| from langchain.document_loaders import PyPDFium2Loader |
| from langchain.chains.question_answering import load_qa_chain |
| |
| from langchain.chat_models import ChatOpenAI |
|
|
|
|
| class PDFQuery: |
| def __init__(self): |
| os.environ["OPENAI_API_KEY"] = "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" |
| self.embeddings = OpenAIEmbeddings() |
| self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=200) |
| |
| self.llm = ChatOpenAI(temperature=0) |
| self.chain = None |
| self.db = None |
|
|
| def ask(self, question: str) -> str: |
| if self.chain is None: |
| response = "Please, add a document." |
| else: |
| docs = self.db.get_relevant_documents(question) |
| response = self.chain.run(input_documents=docs, question=question) |
| return response |
|
|
| def ingest(self, file_path: os.PathLike) -> None: |
| loader = PyPDFium2Loader(file_path) |
| documents = loader.load() |
| splitted_documents = self.text_splitter.split_documents(documents) |
| self.db = Chroma.from_documents(splitted_documents, self.embeddings).as_retriever() |
| |
| self.chain = load_qa_chain(ChatOpenAI(temperature=0), chain_type="stuff") |
|
|