rishisriv-bh commited on
Commit
5cdda35
Β·
verified Β·
1 Parent(s): e900a6d

Update potential_app.py

Browse files
Files changed (1) hide show
  1. potential_app.py +18 -105
potential_app.py CHANGED
@@ -1,113 +1,26 @@
1
  import gradio as gr
2
- import openpyxl
3
- import PyPDF2
4
- import pandas as pd
5
  from PIL import Image
6
- import pytesseract # Replaced EasyOCR
7
  import io
8
- import os
9
- from huggingface_hub import InferenceClient
10
 
11
- # Access the Hugging Face token from the environment variable
12
- hf_token = os.environ.get("HF_TOKEN")
13
-
14
- def reconcile_statements(erp_file, bank_file, status_text):
15
- status_text.set_value("⏳ Processing your request... Please wait.")
16
-
17
- try:
18
- # Extract ERP statement
19
- erp_statement = ""
20
- erp_filename = erp_file.name
21
-
22
- if erp_filename.endswith((".xlsx", ".xls")):
23
- workbook = openpyxl.load_workbook(erp_filename)
24
- sheet = workbook.active
25
- for row in sheet.iter_rows():
26
- for cell in row:
27
- erp_statement += str(cell.value) + "\t"
28
- erp_statement += "\n"
29
- elif erp_filename.endswith(".pdf"):
30
- pdf_reader = PyPDF2.PdfReader(erp_filename)
31
- for page in pdf_reader.pages:
32
- erp_statement += page.extract_text() or ""
33
- elif erp_filename.endswith((".jpg", ".jpeg", ".png")):
34
- image = Image.open(io.BytesIO(erp_file.read()))
35
- erp_statement = pytesseract.image_to_string(image) # Tesseract OCR
36
- elif erp_filename.endswith(".csv"):
37
- df = pd.read_csv(erp_filename)
38
- erp_statement = df.to_string()
39
- else:
40
- raise ValueError("Unsupported ERP file format.")
41
-
42
- # Extract bank statement (similar logic as above)
43
- bank_statement = ""
44
- bank_filename = bank_file.name
45
-
46
- if bank_filename.endswith((".xlsx", ".xls")):
47
- workbook = openpyxl.load_workbook(bank_filename)
48
- sheet = workbook.active
49
- for row in sheet.iter_rows():
50
- for cell in row:
51
- bank_statement += str(cell.value) + "\t"
52
- bank_statement += "\n"
53
- elif bank_filename.endswith(".pdf"):
54
- pdf_reader = PyPDF2.PdfReader(bank_filename)
55
- for page in pdf_reader.pages:
56
- bank_statement += page.extract_text() or ""
57
- elif bank_filename.endswith((".jpg", ".jpeg", ".png")):
58
- image = Image.open(io.BytesIO(bank_file.read()))
59
- bank_statement = pytesseract.image_to_string(image) # Tesseract OCR
60
- elif bank_filename.endswith(".csv"):
61
- df = pd.read_csv(bank_filename)
62
- bank_statement = df.to_string()
63
- else:
64
- raise ValueError("Unsupported bank file format.")
65
-
66
- # Prepare prompt for the model
67
- prompt = f"Reconcile these statements:\nERP:\n{erp_statement}\nBank:\n{bank_statement}"
68
-
69
- # Call the model
70
- client = InferenceClient(provider="together", api_key=hf_token)
71
- completion = client.chat.completions.create(
72
- model="deepseek-ai/DeepSeek-R1",
73
- messages=[{"role": "user", "content": prompt}],
74
- )
75
-
76
- if completion.choices:
77
- reconciliation_results = completion.choices[0].message.get('content', '')
78
- else:
79
- reconciliation_results = "⚠️ No response received from the model."
80
-
81
- # Format output
82
- output = f"""
83
- <div style="font-family: Arial, sans-serif; background-color: #f8f9fa; padding: 20px; border-radius: 10px; box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);">
84
- <h2 style="color: #4a90e2; text-align: center;">πŸ” Reconciliation Results</h2>
85
- <div style="background-color: white; padding: 15px; border-radius: 8px; border: 1px solid #ddd;">
86
- <pre style="white-space: pre-wrap; font-size: 14px; line-height: 1.5; color: #333;">{reconciliation_results}</pre>
87
- </div>
88
- </div>
89
- """
90
 
91
- status_text.set_value("βœ… Processing complete! Here are your results:")
92
- return status_text, output
 
 
 
 
93
 
94
- except Exception as e:
95
- status_text.set_value(f"❌ Error: {e}")
96
- return status_text, f"<h1>Error</h1><p>{e}</p>"
97
 
98
- with gr.Blocks() as iface:
99
- status_text = gr.Markdown("πŸ‘‹ Upload your files to begin reconciliation.")
100
- with gr.Row():
101
- erp_input = gr.File(label="πŸ“‚ Upload ERP Statement", type="filepath")
102
- bank_input = gr.File(label="πŸ“‚ Upload Bank Statement", type="filepath")
103
- submit_btn = gr.Button("πŸ”„ Start Reconciliation")
104
- result_output = gr.HTML()
105
 
106
- submit_btn.click(
107
- fn=reconcile_statements,
108
- inputs=[erp_input, bank_input, status_text],
109
- outputs=[status_text, result_output]
110
- )
111
-
112
- if __name__ == "__main__":
113
- iface.launch(debug=True)
 
1
  import gradio as gr
2
+ import pytesseract
 
 
3
  from PIL import Image
 
4
  import io
 
 
5
 
6
+ def extract_text(file):
7
+ if file is None:
8
+ return "Please upload an invoice."
9
+
10
+ image = Image.open(file.name)
11
+ text = pytesseract.image_to_string(image)
12
+ print(text)
13
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ with gr.Blocks() as demo:
16
+ gr.Markdown("## Invoice OCR Extractor")
17
+
18
+ with gr.Row():
19
+ file_input = gr.File(label="Upload Invoice (PDF or Image)")
20
+ extract_button = gr.Button("Extract Text")
21
 
22
+ text_output = gr.Textbox(label="Extracted Text", lines=10)
 
 
23
 
24
+ extract_button.click(extract_text, inputs=file_input, outputs=text_output)
 
 
 
 
 
 
25
 
26
+ demo.launch()