martoh commited on
Commit
0eaf4b8
·
1 Parent(s): e6cd267

Additional Commits

Browse files
Files changed (2) hide show
  1. app.py +10 -2
  2. requirements.txt +1 -0
app.py CHANGED
@@ -5,6 +5,14 @@ import streamlit as st
5
  from dotenv import load_dotenv
6
  import os
7
  import time
 
 
 
 
 
 
 
 
8
 
9
  load_dotenv()
10
  jina_api_key = os.getenv('JINA_API_KEY')
@@ -42,7 +50,7 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
42
 
43
  def get_chunks(text):
44
  text_splitter = RecursiveCharacterTextSplitter(
45
- chunk_size = 1000,
46
  chunk_overlap = 100
47
  )
48
  chunks = text_splitter.split_text(text)
@@ -52,7 +60,7 @@ def get_chunks(text):
52
  from langchain_community.embeddings import JinaEmbeddings,HuggingFaceEmbeddings
53
  from langchain_community.vectorstores import FAISS
54
 
55
- def get_vectorestore(chunks,batch_size=512):
56
  embeddings = JinaEmbeddings(
57
  model_name='jina-embeddings-v4',
58
  jina_api_key=jina_api_key
 
5
  from dotenv import load_dotenv
6
  import os
7
  import time
8
+ import warnings
9
+
10
+ # Suppress TensorFlow, PyTorch, and protobuf warnings
11
+ warnings.filterwarnings('ignore', category=UserWarning)
12
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TensorFlow info and warning messages
13
+ import logging
14
+ logging.getLogger('tensorflow').setLevel(logging.ERROR)
15
+ logging.getLogger('torch').setLevel(logging.ERROR)
16
 
17
  load_dotenv()
18
  jina_api_key = os.getenv('JINA_API_KEY')
 
50
 
51
  def get_chunks(text):
52
  text_splitter = RecursiveCharacterTextSplitter(
53
+ chunk_size = 800,
54
  chunk_overlap = 100
55
  )
56
  chunks = text_splitter.split_text(text)
 
60
  from langchain_community.embeddings import JinaEmbeddings,HuggingFaceEmbeddings
61
  from langchain_community.vectorstores import FAISS
62
 
63
+ def get_vectorestore(chunks,batch_size=16):
64
  embeddings = JinaEmbeddings(
65
  model_name='jina-embeddings-v4',
66
  jina_api_key=jina_api_key
requirements.txt CHANGED
@@ -10,3 +10,4 @@ langchain-text-splitters
10
  langchain>=0.1.0
11
  langchain-community>=0.0.30
12
  google-generativeai
 
 
10
  langchain>=0.1.0
11
  langchain-community>=0.0.30
12
  google-generativeai
13
+ protobuf>=5.26.1,<6.0.0