Spaces:

rishisriv-bh
/

Statements-Reconciliation

Running

App Files Files Community

rishisriv-bh commited on Mar 20, 2025

Commit

5cdda35

verified ·

1 Parent(s): e900a6d

Update potential_app.py

Browse files

Files changed (1) hide show

potential_app.py +18 -105

potential_app.py CHANGED Viewed

@@ -1,113 +1,26 @@
 import gradio as gr
-import openpyxl
-import PyPDF2
-import pandas as pd
 from PIL import Image
-import pytesseract  # Replaced EasyOCR
 import io
-import os
-from huggingface_hub import InferenceClient
-# Access the Hugging Face token from the environment variable
-hf_token = os.environ.get("HF_TOKEN")
-def reconcile_statements(erp_file, bank_file, status_text):
-    status_text.set_value("⏳ Processing your request... Please wait.")
-    try:
-        # Extract ERP statement
-        erp_statement = ""
-        erp_filename = erp_file.name
-        if erp_filename.endswith((".xlsx", ".xls")):
-            workbook = openpyxl.load_workbook(erp_filename)
-            sheet = workbook.active
-            for row in sheet.iter_rows():
-                for cell in row:
-                    erp_statement += str(cell.value) + "\t"
-                erp_statement += "\n"
-        elif erp_filename.endswith(".pdf"):
-            pdf_reader = PyPDF2.PdfReader(erp_filename)
-            for page in pdf_reader.pages:
-                erp_statement += page.extract_text() or ""
-        elif erp_filename.endswith((".jpg", ".jpeg", ".png")):
-            image = Image.open(io.BytesIO(erp_file.read()))
-            erp_statement = pytesseract.image_to_string(image)  # Tesseract OCR
-        elif erp_filename.endswith(".csv"):
-            df = pd.read_csv(erp_filename)
-            erp_statement = df.to_string()
-        else:
-            raise ValueError("Unsupported ERP file format.")
-        # Extract bank statement (similar logic as above)
-        bank_statement = ""
-        bank_filename = bank_file.name
-        if bank_filename.endswith((".xlsx", ".xls")):
-            workbook = openpyxl.load_workbook(bank_filename)
-            sheet = workbook.active
-            for row in sheet.iter_rows():
-                for cell in row:
-                    bank_statement += str(cell.value) + "\t"
-                bank_statement += "\n"
-        elif bank_filename.endswith(".pdf"):
-            pdf_reader = PyPDF2.PdfReader(bank_filename)
-            for page in pdf_reader.pages:
-                bank_statement += page.extract_text() or ""
-        elif bank_filename.endswith((".jpg", ".jpeg", ".png")):
-            image = Image.open(io.BytesIO(bank_file.read()))
-            bank_statement = pytesseract.image_to_string(image)  # Tesseract OCR
-        elif bank_filename.endswith(".csv"):
-            df = pd.read_csv(bank_filename)
-            bank_statement = df.to_string()
-        else:
-            raise ValueError("Unsupported bank file format.")
-        # Prepare prompt for the model
-        prompt = f"Reconcile these statements:\nERP:\n{erp_statement}\nBank:\n{bank_statement}"
-        # Call the model
-        client = InferenceClient(provider="together", api_key=hf_token)
-        completion = client.chat.completions.create(
-            model="deepseek-ai/DeepSeek-R1",
-            messages=[{"role": "user", "content": prompt}],
-        )
-        if completion.choices:
-            reconciliation_results = completion.choices[0].message.get('content', '')
-        else:
-            reconciliation_results = "⚠️ No response received from the model."
-        # Format output
-        output = f"""
-        <div style="font-family: Arial, sans-serif; background-color: #f8f9fa; padding: 20px; border-radius: 10px; box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);">
-            <h2 style="color: #4a90e2; text-align: center;">🔍 Reconciliation Results</h2>
-            <div style="background-color: white; padding: 15px; border-radius: 8px; border: 1px solid #ddd;">
-                <pre style="white-space: pre-wrap; font-size: 14px; line-height: 1.5; color: #333;">{reconciliation_results}</pre>
-            </div>
-        </div>
-        """
-        status_text.set_value("✅ Processing complete! Here are your results:")
-        return status_text, output
-    except Exception as e:
-        status_text.set_value(f"❌ Error: {e}")
-        return status_text, f"<h1>Error</h1><p>{e}</p>"
-with gr.Blocks() as iface:
-    status_text = gr.Markdown("👋 Upload your files to begin reconciliation.")
-    with gr.Row():
-        erp_input = gr.File(label="📂 Upload ERP Statement", type="filepath")
-        bank_input = gr.File(label="📂 Upload Bank Statement", type="filepath")
-    submit_btn = gr.Button("🔄 Start Reconciliation")
-    result_output = gr.HTML()
-    submit_btn.click(
-        fn=reconcile_statements,
-        inputs=[erp_input, bank_input, status_text],
-        outputs=[status_text, result_output]
-    )
-if __name__ == "__main__":
-    iface.launch(debug=True)

 import gradio as gr
+import pytesseract
 from PIL import Image
 import io
+def extract_text(file):
+    if file is None:
+        return "Please upload an invoice."
+    image = Image.open(file.name)
+    text = pytesseract.image_to_string(image)
+    print(text)
+    return text
+with gr.Blocks() as demo:
+    gr.Markdown("## Invoice OCR Extractor")
+    with gr.Row():
+        file_input = gr.File(label="Upload Invoice (PDF or Image)")
+        extract_button = gr.Button("Extract Text")
+    text_output = gr.Textbox(label="Extracted Text", lines=10)
+    extract_button.click(extract_text, inputs=file_input, outputs=text_output)
+demo.launch()