Spaces:
Sleeping
Sleeping
augray
commited on
Commit
Β·
53b44b3
1
Parent(s):
57f2e80
visual tweaks
Browse files
README.md
CHANGED
|
@@ -1,13 +1,15 @@
|
|
| 1 |
---
|
| 2 |
title: Text To SQL Hub Datasets
|
| 3 |
-
emoji: π₯
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 4.44.0
|
| 8 |
app_file: app.py
|
| 9 |
-
pinned:
|
| 10 |
license: apache-2.0
|
| 11 |
---
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Text To SQL Hub Datasets
|
| 3 |
+
emoji: π₯ π π€
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 4.44.0
|
| 8 |
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
license: apache-2.0
|
| 11 |
---
|
| 12 |
|
| 13 |
+
A simple space to use an LLM to iteratively refine SQL queries for HuggingFace datasets.
|
| 14 |
+
A fork of
|
| 15 |
+
[davidberenstein1957/text-to-sql-hub-datasets](https://huggingface.co/spaces/davidberenstein1957/text-to-sql-hub-datasets)
|
app.py
CHANGED
|
@@ -7,12 +7,26 @@ from typing import Any
|
|
| 7 |
import gradio as gr
|
| 8 |
import requests
|
| 9 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
| 10 |
-
from huggingface_hub.repocard import CardData, RepoCard
|
| 11 |
|
| 12 |
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
example = HuggingfaceHubSearch().example_value()
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
SYSTEM_PROMPT_TEMPLATE = (
|
| 18 |
"You are a SQL query expert assistant that returns a DuckDB SQL queries "
|
|
@@ -119,7 +133,11 @@ def get_split_choices(card_data: dict[str, Any]) -> list[str]:
|
|
| 119 |
|
| 120 |
def query_dataset(hub_repo_id, card_data, query, config, split, history):
|
| 121 |
if card_data is None or len(card_data) == 0:
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
card_data = json.loads(card_data)
|
| 124 |
system_prompt = get_system_prompt(card_data, config, split)
|
| 125 |
messages = [{"role": "system", "content": system_prompt}]
|
|
@@ -166,7 +184,7 @@ def query_dataset(hub_repo_id, card_data, query, config, split, history):
|
|
| 166 |
duck_query = response_dict["choices"][0]["message"]["content"]
|
| 167 |
duck_query = _sanitize_duck_query(duck_query)
|
| 168 |
history.append((query, duck_query))
|
| 169 |
-
return duck_query, get_iframe(hub_repo_id, duck_query), history
|
| 170 |
|
| 171 |
|
| 172 |
def _sanitize_duck_query(duck_query: str) -> str:
|
|
@@ -186,12 +204,9 @@ def _sanitize_duck_query(duck_query: str) -> str:
|
|
| 186 |
|
| 187 |
|
| 188 |
with gr.Blocks() as demo:
|
| 189 |
-
gr.Markdown(
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
It is built with [DuckDB](https://duckdb.org/), [Huggingface's Inference API](https://huggingface.co/docs/api-inference/index), and [LLama 3.1 70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct).
|
| 193 |
-
Also, it uses the [dataset-server API](https://redocly.github.io/redoc/?url=https://datasets-server.huggingface.co/openapi.json#operation/isValidDataset).
|
| 194 |
-
""")
|
| 195 |
with gr.Row():
|
| 196 |
search_in = HuggingfaceHubSearch(
|
| 197 |
label="Search Huggingface Hub",
|
|
@@ -269,7 +284,7 @@ with gr.Blocks() as demo:
|
|
| 269 |
split_selection,
|
| 270 |
chatbot,
|
| 271 |
],
|
| 272 |
-
outputs=[sql_out, search_out, chatbot],
|
| 273 |
)
|
| 274 |
gr.on([query_btn.click], fn=lambda: gr.update(open=True), outputs=[accordion])
|
| 275 |
|
|
|
|
| 7 |
import gradio as gr
|
| 8 |
import requests
|
| 9 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
example = HuggingfaceHubSearch().example_value()
|
| 14 |
|
| 15 |
+
HEADER_CONTENT = "# π€ Dataset DuckDB Query Chatbot"
|
| 16 |
+
ABOUT_CONTENT = """
|
| 17 |
+
This is a basic text to SQL tool that allows you to query datasets on Huggingface Hub.
|
| 18 |
+
It uses [LLama 3.1 70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct).
|
| 19 |
+
via [together.ai](https://together.ai)
|
| 20 |
+
Also, it uses the
|
| 21 |
+
[dataset-server API](https://redocly.github.io/redoc/?url=https://datasets-server.huggingface.co/openapi.json#operation/isValidDataset).
|
| 22 |
+
|
| 23 |
+
Query history is saved and given to the chat model so you can chat to refine your query as you go.
|
| 24 |
+
|
| 25 |
+
When the DuckDB modal is presented, you may need to click on the name of the
|
| 26 |
+
config/split at the base of the modal to get the table loaded for DuckDB's use.
|
| 27 |
+
|
| 28 |
+
Search for and select a dataset to begin.
|
| 29 |
+
"""
|
| 30 |
|
| 31 |
SYSTEM_PROMPT_TEMPLATE = (
|
| 32 |
"You are a SQL query expert assistant that returns a DuckDB SQL queries "
|
|
|
|
| 133 |
|
| 134 |
def query_dataset(hub_repo_id, card_data, query, config, split, history):
|
| 135 |
if card_data is None or len(card_data) == 0:
|
| 136 |
+
if hub_repo_id:
|
| 137 |
+
iframe = get_iframe(hub_repo_id)
|
| 138 |
+
else:
|
| 139 |
+
iframe = "<p>No dataset selected.</p>"
|
| 140 |
+
return "", iframe, [], ""
|
| 141 |
card_data = json.loads(card_data)
|
| 142 |
system_prompt = get_system_prompt(card_data, config, split)
|
| 143 |
messages = [{"role": "system", "content": system_prompt}]
|
|
|
|
| 184 |
duck_query = response_dict["choices"][0]["message"]["content"]
|
| 185 |
duck_query = _sanitize_duck_query(duck_query)
|
| 186 |
history.append((query, duck_query))
|
| 187 |
+
return duck_query, get_iframe(hub_repo_id, duck_query), history, ""
|
| 188 |
|
| 189 |
|
| 190 |
def _sanitize_duck_query(duck_query: str) -> str:
|
|
|
|
| 204 |
|
| 205 |
|
| 206 |
with gr.Blocks() as demo:
|
| 207 |
+
gr.Markdown(HEADER_CONTENT)
|
| 208 |
+
with gr.Accordion("About/Help", open=False):
|
| 209 |
+
gr.Markdown(ABOUT_CONTENT)
|
|
|
|
|
|
|
|
|
|
| 210 |
with gr.Row():
|
| 211 |
search_in = HuggingfaceHubSearch(
|
| 212 |
label="Search Huggingface Hub",
|
|
|
|
| 284 |
split_selection,
|
| 285 |
chatbot,
|
| 286 |
],
|
| 287 |
+
outputs=[sql_out, search_out, chatbot, query],
|
| 288 |
)
|
| 289 |
gr.on([query_btn.click], fn=lambda: gr.update(open=True), outputs=[accordion])
|
| 290 |
|