hf-dataset-chat-to-sql

Sleeping

App Files Files Community

augray commited on Sep 13, 2024

Commit

53b44b3

1 Parent(s): 57f2e80

visual tweaks

Browse files

Files changed (2) hide show

README.md +5 -3
app.py +25 -10

README.md CHANGED Viewed

@@ -1,13 +1,15 @@
 ---
 title: Text To SQL Hub Datasets
-emoji: 🐥 🦙 🤗
 colorFrom: blue
 colorTo: blue
 sdk: gradio
 sdk_version: 4.44.0
 app_file: app.py
-pinned: true
 license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Text To SQL Hub Datasets
+emoji: 🐥 🔍 🤗
 colorFrom: blue
 colorTo: blue
 sdk: gradio
 sdk_version: 4.44.0
 app_file: app.py
+pinned: false
 license: apache-2.0
 ---
+A simple space to use an LLM to iteratively refine SQL queries for HuggingFace datasets.
+A fork of
+[davidberenstein1957/text-to-sql-hub-datasets](https://huggingface.co/spaces/davidberenstein1957/text-to-sql-hub-datasets)

app.py CHANGED Viewed

@@ -7,12 +7,26 @@ from typing import Any
 import gradio as gr
 import requests
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
-from huggingface_hub.repocard import CardData, RepoCard
 logger = logging.getLogger(__name__)
 example = HuggingfaceHubSearch().example_value()
 SYSTEM_PROMPT_TEMPLATE = (
     "You are a SQL query expert assistant that returns a DuckDB SQL queries "
@@ -119,7 +133,11 @@ def get_split_choices(card_data: dict[str, Any]) -> list[str]:
 def query_dataset(hub_repo_id, card_data, query, config, split, history):
     if card_data is None or len(card_data) == 0:
-        return "", get_iframe(hub_repo_id), []
     card_data = json.loads(card_data)
     system_prompt = get_system_prompt(card_data, config, split)
     messages = [{"role": "system", "content": system_prompt}]
@@ -166,7 +184,7 @@ def query_dataset(hub_repo_id, card_data, query, config, split, history):
     duck_query = response_dict["choices"][0]["message"]["content"]
     duck_query = _sanitize_duck_query(duck_query)
     history.append((query, duck_query))
-    return duck_query, get_iframe(hub_repo_id, duck_query), history
 def _sanitize_duck_query(duck_query: str) -> str:
@@ -186,12 +204,9 @@ def _sanitize_duck_query(duck_query: str) -> str:
 with gr.Blocks() as demo:
-    gr.Markdown("""# 🐥 🦙 🤗 Text To SQL Hub Datasets 🤗 🦙 🐥
-                This is a basic text to SQL tool that allows you to query datasets on Huggingface Hub.
-                It is built with [DuckDB](https://duckdb.org/), [Huggingface's Inference API](https://huggingface.co/docs/api-inference/index), and [LLama 3.1 70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct).
-                Also, it uses the [dataset-server API](https://redocly.github.io/redoc/?url=https://datasets-server.huggingface.co/openapi.json#operation/isValidDataset).
-                """)
     with gr.Row():
         search_in = HuggingfaceHubSearch(
             label="Search Huggingface Hub",
@@ -269,7 +284,7 @@ with gr.Blocks() as demo:
                 split_selection,
                 chatbot,
             ],
-            outputs=[sql_out, search_out, chatbot],
         )
         gr.on([query_btn.click], fn=lambda: gr.update(open=True), outputs=[accordion])

 import gradio as gr
 import requests
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 logger = logging.getLogger(__name__)
 example = HuggingfaceHubSearch().example_value()
+HEADER_CONTENT = "# 🤗 Dataset DuckDB Query Chatbot"
+ABOUT_CONTENT = """
+This is a basic text to SQL tool that allows you to query datasets on Huggingface Hub.
+It uses [LLama 3.1 70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct).
+via [together.ai](https://together.ai)
+Also, it uses the
+[dataset-server API](https://redocly.github.io/redoc/?url=https://datasets-server.huggingface.co/openapi.json#operation/isValidDataset).
+Query history is saved and given to the chat model so you can chat to refine your query as you go.
+When the DuckDB modal is presented, you may need to click on the name of the
+config/split at the base of the modal to get the table loaded for DuckDB's use.
+Search for and select a dataset to begin.
+"""
 SYSTEM_PROMPT_TEMPLATE = (
     "You are a SQL query expert assistant that returns a DuckDB SQL queries "
 def query_dataset(hub_repo_id, card_data, query, config, split, history):
     if card_data is None or len(card_data) == 0:
+        if hub_repo_id:
+            iframe = get_iframe(hub_repo_id)
+        else:
+            iframe = "<p>No dataset selected.</p>"
+        return "", iframe, [], ""
     card_data = json.loads(card_data)
     system_prompt = get_system_prompt(card_data, config, split)
     messages = [{"role": "system", "content": system_prompt}]
     duck_query = response_dict["choices"][0]["message"]["content"]
     duck_query = _sanitize_duck_query(duck_query)
     history.append((query, duck_query))
+    return duck_query, get_iframe(hub_repo_id, duck_query), history, ""
 def _sanitize_duck_query(duck_query: str) -> str:
 with gr.Blocks() as demo:
+    gr.Markdown(HEADER_CONTENT)
+    with gr.Accordion("About/Help", open=False):
+        gr.Markdown(ABOUT_CONTENT)
     with gr.Row():
         search_in = HuggingfaceHubSearch(
             label="Search Huggingface Hub",
                 split_selection,
                 chatbot,
             ],
+            outputs=[sql_out, search_out, chatbot, query],
         )
         gr.on([query_btn.click], fn=lambda: gr.update(open=True), outputs=[accordion])