import hashlib import os from dataclasses import dataclass import pandas as pd def _project_root() -> str: return os.path.dirname(os.path.dirname(os.path.abspath(__file__))) def cache_root() -> str: # Priority: env var > /tmp (for HF Spaces/containers) > project root (local dev) env_cache = os.environ.get("CACHE_DIR") if env_cache: path = os.path.join(env_cache, "panel_app_v2") elif os.path.exists("/tmp") and os.access("/tmp", os.W_OK): # On Hugging Face Spaces and Linux containers, /tmp is always writable path = os.path.join("/tmp", "panel_app_v2_cache") else: # Fallback to project root for local development root = _project_root() path = os.path.join(root, ".cache", "panel_app_v2") os.makedirs(path, exist_ok=True) return path def _safe_str(value: object) -> str: try: return str(value or "") except Exception: return "" def fingerprint_bytes(file_bytes: bytes, filename: str = "", extra: str = "") -> str: h = hashlib.blake2b(digest_size=16) if file_bytes: h.update(file_bytes) name = _safe_str(filename) if name: h.update(name.encode("utf-8", errors="ignore")) ex = _safe_str(extra) if ex: h.update(ex.encode("utf-8", errors="ignore")) return h.hexdigest() def _has_pyarrow() -> bool: try: import pyarrow # noqa: F401 return True except Exception: return False def _has_duckdb() -> bool: try: import duckdb # noqa: F401 return True except Exception: return False def write_table(df: pd.DataFrame, path_no_ext: str) -> str: if _has_pyarrow(): path = path_no_ext + ".parquet" df.to_parquet(path, index=False) return path path = path_no_ext + ".pkl" df.to_pickle(path) return path def read_table(path: str) -> pd.DataFrame: if not path or not os.path.exists(path): return pd.DataFrame() p = str(path).lower() if p.endswith(".parquet"): return pd.read_parquet(path) return pd.read_pickle(path) @dataclass(frozen=True) class CachedDataset: dataset_id: str rat: str granularity: str def base_dir(self) -> str: return os.path.join( cache_root(), self.dataset_id, f"rat={self.rat}", f"granularity={self.granularity}", ) def daily_table_base(self) -> str: return os.path.join(self.base_dir(), "daily") def meta_path(self) -> str: return os.path.join(self.base_dir(), "meta.json") def try_load_cached_daily(dataset: CachedDataset) -> pd.DataFrame | None: base = dataset.daily_table_base() candidates = [base + ".parquet", base + ".pkl"] for p in candidates: if os.path.exists(p): try: df = read_table(p) return df if isinstance(df, pd.DataFrame) else pd.DataFrame() except Exception: return pd.DataFrame() return None def save_cached_daily(dataset: CachedDataset, daily: pd.DataFrame) -> str: os.makedirs(dataset.base_dir(), exist_ok=True) return write_table(daily, dataset.daily_table_base()) def ensure_duckdb_available() -> None: if not _has_duckdb(): raise RuntimeError( "DuckDB is not installed. Install it to enable the V2 SQL engine: python -m pip install duckdb" )