DaggrGenerator / daggr_gen /daggr_gen.py
Csplk's picture
Testing gnerative daggr script output
1ba6b3f
"""
HF Space to Daggr Node Generator
================================
Automatically generate daggr nodes from Hugging Face Gradio Spaces.
Extensible architecture supporting future InferenceNode and FnNode generators.
Usage:
python daggr_generator.py "username/space-name" [--api-name /predict] [--output node.py]
"""
import argparse
import json
import re
import sys
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from urllib.parse import urlparse
try:
from gradio_client import Client, handle_file
import gradio as gr
except ImportError:
print("Error: Required packages not installed.")
print("Run: pip install gradio gradio-client daggr")
sys.exit(1)
@dataclass
class PortSchema:
"""Represents an input or output port schema."""
name: str
python_type: str
component_type: Optional[str] = None
label: Optional[str] = None
default: Any = None
description: Optional[str] = None
choices: Optional[List] = None
def to_gradio_component(self) -> str:
"""Convert to gradio component code string."""
type_mapping = {
"str": "gr.Textbox",
"int": "gr.Number",
"float": "gr.Number",
"bool": "gr.Checkbox",
"filepath": "gr.File",
"file": "gr.File",
"image": "gr.Image",
"audio": "gr.Audio",
"video": "gr.Video",
"dict": "gr.JSON",
"list": "gr.JSON",
"dataframe": "gr.Dataframe",
"model3d": "gr.Model3D",
"downloadbutton": "gr.File", # Maps to File for download
"annotatedimage": "gr.AnnotatedImage",
}
comp_base = type_mapping.get(self.python_type, "gr.Textbox")
params = []
if self.label:
params.append(f'label="{self.label}"')
if self.default is not None and self.default != "":
if isinstance(self.default, str):
params.append(f'value="{self.default}"')
else:
params.append(f'value={self.default}')
if self.choices:
params.append(f'choices={self.choices}')
if comp_base == "gr.Textbox" and self.python_type == "str":
if len(str(self.default or "")) > 50:
params.append("lines=3")
return f"{comp_base}({', '.join(params)})" if params else comp_base
@dataclass
class APIEndpoint:
"""Represents a Gradio Space API endpoint."""
name: str
route: str
inputs: List[PortSchema] = field(default_factory=list)
outputs: List[PortSchema] = field(default_factory=list)
description: Optional[str] = None
@dataclass
class NodeTemplate:
"""Generated node configuration."""
space_id: str
endpoint: APIEndpoint
all_endpoints: List[str]
imports: List[str]
node_code: str
wiring_placeholders: List[str]
metadata: Dict = field(default_factory=dict)
class NodeGenerator(ABC):
"""Abstract base class for daggr node generators."""
@abstractmethod
def can_handle(self, space_info: Dict) -> bool:
"""Check if this generator can handle the given space."""
pass
@abstractmethod
def generate(self, *args, **kwargs) -> NodeTemplate:
"""Generate node template."""
pass
class GradioNodeGenerator(NodeGenerator):
"""
Generator for daggr.GradioNode from HF Space URLs.
Introspects API and maps components correctly.
"""
# Mapping of gradio component types to Python types
COMPONENT_TYPE_MAP = {
"textbox": "str",
"number": "float",
"slider": "float",
"checkbox": "bool",
"checkboxgroup": "list",
"radio": "str",
"dropdown": "str",
"image": "filepath",
"file": "filepath",
"audio": "filepath",
"video": "filepath",
"dataframe": "dataframe",
"json": "dict",
"gallery": "list",
"chatbot": "list",
"code": "str",
"colorpicker": "str",
"timer": "float",
"model3d": "model3d",
"downloadbutton": "filepath",
"annotatedimage": "annotatedimage",
}
def can_handle(self, space_info: Dict) -> bool:
"""Check if space has Gradio API."""
return space_info.get("sdk") == "gradio"
def _extract_space_id(self, url_or_id: str) -> str:
"""Extract space ID from URL or return as-is if already ID."""
if url_or_id.startswith("http"):
parsed = urlparse(url_or_id)
# Handle https://huggingface.co/spaces/username/space-name
if "huggingface.co" in parsed.netloc:
path_parts = parsed.path.strip("/").split("/")
if len(path_parts) >= 3 and path_parts[0] == "spaces":
return "/".join(path_parts[1:3])
# Handle direct space URL
return parsed.path.strip("/").split("/")[0]
return url_or_id
def _normalize_type(self, type_val) -> str:
"""Normalize Python type from API (handles both strings and dicts)."""
if type_val is None:
return "str"
if isinstance(type_val, str):
return type_val.lower()
if isinstance(type_val, dict):
# Handle complex types like {"type": "union", ...}
# For now, default to str if complex
if "type" in type_val:
if type_val["type"] == "filepath":
return "filepath"
elif type_val["type"] == "integer":
return "int"
elif type_val["type"] == "float":
return "float"
elif type_val["type"] == "boolean":
return "bool"
return "str"
return "str"
def _parse_type_info(self, param: Dict) -> Tuple[str, str]:
"""Extract python_type and handle Union/Optional types."""
raw_type = param.get("python_type")
python_type = self._normalize_type(raw_type)
# Check if it's optional (Union with None)
if isinstance(raw_type, dict) and raw_type.get("type") == "union":
# Try to find non-null type
choices = raw_type.get("choices", [])
non_none = [c for c in choices if self._normalize_type(c) != "none"]
if non_none:
python_type = self._normalize_type(non_none[0])
return python_type
def _inspect_endpoints(self, client: Client) -> List[APIEndpoint]:
"""Extract all API endpoints from Gradio Client."""
endpoints = []
# Get API info from client
api_info = client.view_api(return_format="dict")
if not api_info or "named_endpoints" not in api_info:
return endpoints
for route, info in api_info["named_endpoints"].items():
endpoint = APIEndpoint(
name=info.get("fn", route),
route=route,
description=info.get("description", "")
)
# Parse inputs
for param in info.get("parameters", []):
comp_type = self._detect_component_type(param)
python_type = self._parse_type_info(param)
port = PortSchema(
name=param.get("parameter_name", "input"),
python_type=self.COMPONENT_TYPE_MAP.get(comp_type, python_type),
component_type=comp_type,
label=param.get("label"),
default=param.get("default"),
description=param.get("description"),
choices=param.get("choices")
)
endpoint.inputs.append(port)
# Parse outputs
returns = info.get("returns", [])
for i, ret in enumerate(returns):
comp_type = self._detect_component_type(ret)
python_type = self._parse_type_info(ret)
# Try to get a meaningful name
ret_name = ret.get("label", "")
if not ret_name:
if len(returns) == 1:
ret_name = "result"
else:
ret_name = f"output_{i}"
# Clean name for Python attribute
ret_name = re.sub(r'[^a-zA-Z0-9_]', '_', ret_name).lower()
if ret_name[0].isdigit():
ret_name = "out_" + ret_name
port = PortSchema(
name=ret_name,
python_type=self.COMPONENT_TYPE_MAP.get(comp_type, python_type),
component_type=comp_type,
label=ret.get("label", f"Output {i+1}"),
description=ret.get("description")
)
endpoint.outputs.append(port)
endpoints.append(endpoint)
return endpoints
def _detect_component_type(self, param: Dict) -> str:
"""Detect Gradio component type from parameter info."""
label = (param.get("label", "") or "").lower()
# Check explicit component field first
component = param.get("component", "")
if component and isinstance(component, str):
return component.lower()
# Check for file paths based on label
if "path" in label or "file" in label:
if "image" in label:
return "image"
elif "audio" in label:
return "audio"
elif "video" in label:
return "video"
elif "3d" in label or "model" in label or "mesh" in label:
return "model3d"
return "file"
# Check python_type for hints
python_type = self._parse_type_info(param)
if "image" in python_type or "pil" in python_type:
return "image"
elif "dataframe" in python_type:
return "dataframe"
elif "filepath" in python_type:
if "image" in label:
return "image"
return "file"
return "textbox"
def generate(
self,
space_url: str,
api_name: Optional[str] = None,
node_name: Optional[str] = None
) -> NodeTemplate:
"""
Generate GradioNode template from space URL.
Args:
space_url: HF Space URL or ID (e.g., 'black-forest-labs/FLUX.1-schnell')
api_name: Specific API endpoint to use (auto-selected if None)
node_name: Custom variable name for the node (auto-generated if None)
"""
space_id = self._extract_space_id(space_url)
var_name = node_name or self._to_snake_case(space_id.split("/")[-1])
# Connect and inspect
print(f"🔍 Inspecting space: {space_id}")
client = Client(space_id)
endpoints = self._inspect_endpoints(client)
if not endpoints:
raise ValueError(f"No API endpoints found for space: {space_id}")
# Select endpoint
if api_name:
selected = next((e for e in endpoints if e.route == api_name), None)
if not selected:
available = ", ".join([e.route for e in endpoints])
raise ValueError(f"API endpoint '{api_name}' not found. Available: {available}")
else:
# Find best endpoint (one with inputs and outputs, not lambda)
candidates = [e for e in endpoints if e.inputs or e.outputs]
candidates = [e for e in candidates if not e.route.startswith("/lambda")]
selected = candidates[0] if candidates else endpoints[0]
print(f"✓ Found {len(endpoints)} endpoint(s), using: {selected.route}")
if selected.inputs:
print(f" Inputs: {len(selected.inputs)} ({', '.join([i.name for i in selected.inputs[:3]])}{'...' if len(selected.inputs) > 3 else ''})")
if selected.outputs:
print(f" Outputs: {len(selected.outputs)} ({', '.join([o.name for o in selected.outputs[:3]])}{'...' if len(selected.outputs) > 3 else ''})")
# Build wiring placeholders
wiring = self._generate_wiring_docs(selected, var_name)
# Generate code
code = self._render_node_code(space_id, var_name, selected)
return NodeTemplate(
space_id=space_id,
endpoint=selected,
all_endpoints=[e.route for e in endpoints],
imports=["from daggr import GradioNode", "import gradio as gr"],
node_code=code,
wiring_placeholders=wiring,
metadata={"generator": "GradioNodeGenerator", "client_kwargs": {}}
)
def _to_snake_case(self, name: str) -> str:
"""Convert space name to valid Python variable name."""
# Remove special chars, convert to snake_case
clean = re.sub(r'[^a-zA-Z0-9]', '_', name)
clean = re.sub(r'([A-Z])', r'_\1', clean).lower()
clean = re.sub(r'_+', '_', clean).strip('_')
return clean or "node"
def _generate_wiring_docs(self, endpoint: APIEndpoint, var_name: str) -> List[str]:
"""Generate documentation for wiring inputs/outputs."""
docs = []
docs.append(f"# === WIRING GUIDE for {var_name} ===")
if endpoint.inputs:
docs.append("# Inputs (what this node expects):")
for inp in endpoint.inputs:
example = f"upstream_node.{inp.name}" if inp.python_type != "str" else f'"{inp.default or "value"}"'
default_info = f" [default: {inp.default}]" if inp.default is not None else ""
docs.append(f"# - {inp.name}: {inp.python_type}{default_info}")
docs.append(f"# Wire: {var_name}.inputs['{inp.name}'] = {example}")
else:
docs.append("# Inputs: None (no parameters required)")
if endpoint.outputs:
docs.append("#")
docs.append("# Outputs (what this node produces):")
for out in endpoint.outputs:
docs.append(f"# - {out.name}: {out.python_type}")
docs.append(f"# Access: {var_name}.{out.name}")
docs.append(f"# Usage: next_node.inputs['{out.name}'] = {var_name}.{out.name}")
else:
docs.append("# Outputs: None")
docs.append("# ===========================================")
return docs
def _render_node_code(self, space_id: str, var_name: str, endpoint: APIEndpoint) -> str:
"""Render the actual Python code for the GradioNode."""
lines = []
# Node definition with docstring
lines.append(f'{var_name} = GradioNode(')
lines.append(f' space_or_url="{space_id}", # Space ID')
lines.append(f' api_name="{endpoint.route}", # API endpoint')
if endpoint.description:
lines.append(f' # Description: {endpoint.description}')
lines.append(f'')
# Inputs section
if endpoint.inputs:
lines.append(f' inputs={{')
for inp in endpoint.inputs:
# Determine default value representation
if inp.default is not None:
if isinstance(inp.default, (int, float, bool)):
default_val = f"{inp.default} # Fixed value"
elif isinstance(inp.default, str):
default_val = f'"{inp.default}" # Fixed value'
else:
default_val = f"{inp.default} # Fixed value"
else:
# Suggest gradio component for UI input
comp = inp.to_gradio_component()
default_val = f"{comp} # UI input - connect to upstream node or provide value"
# Clean up multiline descriptions
comment = ""
if inp.description:
desc = inp.description.replace(chr(10), " ")[:50]
comment = f" # {desc}"
lines.append(f' "{inp.name}": {default_val},{comment}')
lines.append(f' }},')
else:
lines.append(f' inputs={{}}, # No inputs required')
lines.append(f'')
# Outputs section
if endpoint.outputs:
lines.append(f' outputs={{')
for out in endpoint.outputs:
comp = out.to_gradio_component()
lines.append(f' "{out.name}": {comp}, # Display in node card')
lines.append(f' # Use None to hide outputs: "hidden_output": None')
lines.append(f' }},')
else:
lines.append(f' outputs={{}}, # No outputs')
# Optional flags
lines.append(f'')
lines.append(f' # Optional: Transform outputs before downstream flow')
lines.append(f' # postprocess=lambda outputs, final: final,')
lines.append(f')')
return "\n".join(lines)
class InferenceNodeGenerator(NodeGenerator):
"""
Future generator for daggr.InferenceNode (HF Inference Providers).
Placeholder for extension.
"""
def can_handle(self, space_info: Dict) -> bool:
return False
def generate(self, model_id: str, **kwargs) -> NodeTemplate:
raise NotImplementedError("InferenceNode generator coming in next revision")
class FnNodeGenerator(NodeGenerator):
"""
Future generator for daggr.FnNode (custom Python functions).
Placeholder for extension.
"""
def can_handle(self, space_info: Dict) -> bool:
return False
def generate(self, func: Callable, **kwargs) -> NodeTemplate:
raise NotImplementedError("FnNode generator coming in next revision")
class DaggrGenerator:
"""
Main orchestrator for generating daggr workflows.
Supports multiple node types and provides extensible registry.
"""
def __init__(self):
self.generators: Dict[str, NodeGenerator] = {
"gradio": GradioNodeGenerator(),
"inference": InferenceNodeGenerator(),
"function": FnNodeGenerator(),
}
def generate_from_space(
self,
space_url: str,
output_file: Optional[str] = None,
api_name: Optional[str] = None,
node_name: Optional[str] = None,
include_boilerplate: bool = True
) -> str:
"""
Generate daggr node from HF Space.
Args:
space_url: HF Space URL or ID
output_file: Optional file to write (prints to stdout if None)
api_name: Specific API endpoint to use
node_name: Custom variable name for node
include_boilerplate: Include imports and example usage
Returns:
Generated Python code as string
"""
generator = self.generators["gradio"]
try:
template = generator.generate(space_url, api_name, node_name)
code = self._assemble_code(template, include_boilerplate)
if output_file:
Path(output_file).write_text(code)
print(f"\nGenerated node written to: {output_file}")
return code
except Exception as e:
print(f"\nError generating node: {e}")
raise
def _assemble_code(self, template: NodeTemplate, include_boilerplate: bool) -> str:
"""Assemble final Python script."""
lines = []
if include_boilerplate:
lines.append("'''")
lines.append(f'Auto-generated Daggr Node')
lines.append(f'Space: {template.space_id}')
lines.append(f'API: {template.endpoint.route}')
lines.append(f'Endpoints available: {", ".join(template.all_endpoints[:5])}{"..." if len(template.all_endpoints) > 5 else ""}')
lines.append("'''")
lines.append('')
lines.extend(template.imports)
lines.append('from daggr import Graph')
lines.append('')
# Add wiring documentation
lines.extend(template.wiring_placeholders)
lines.append('')
# Add the node code
lines.append(template.node_code)
lines.append('')
if include_boilerplate:
# Extract variable name from first line of node code
var_line = template.node_code.split('\n')[0]
var_name = var_line.split('=')[0].strip()
# Add example graph setup
space_short = template.space_id.split("/")[-1]
lines.append(f'# Example usage')
lines.append(f'if __name__ == "__main__":')
lines.append(f' graph = Graph(')
lines.append(f' name="{space_short} Workflow",')
lines.append(f' nodes=[{var_name}]')
lines.append(f' )')
lines.append(f' graph.launch()')
lines.append('')
lines.append(f' # Or run with: daggr this_file.py')
return "\n".join(lines)
def register_generator(self, name: str, generator: NodeGenerator):
"""Register a new generator for extensibility."""
self.generators[name] = generator
print(f"Registered new generator: {name}")
def main():
parser = argparse.ArgumentParser(
description="Generate daggr nodes from Hugging Face Gradio Spaces"
)
parser.add_argument("space", help="HF Space URL or ID (e.g., 'user/space-name')")
parser.add_argument("--api-name", "-a", help="Specific API endpoint (default: first substantial endpoint)")
parser.add_argument("--output", "-o", help="Output Python file (default: stdout)")
parser.add_argument("--node-name", "-n", help="Variable name for node (default: auto)")
parser.add_argument("--no-boilerplate", action="store_true",
help="Generate only node definition")
args = parser.parse_args()
generator = DaggrGenerator()
code = generator.generate_from_space(
args.space,
output_file=args.output,
api_name=args.api_name,
node_name=args.node_name,
include_boilerplate=not args.no_boilerplate
)
if not args.output:
print("\n" + "="*60)
print("GENERATED DAGGR NODE")
print("="*60)
print(code)
if __name__ == "__main__":
main()