Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- server/env/__init__.py +7 -1
- server/env/data_gen.py +44 -14
- server/static/app.js +11 -5
- server/static/style.css +20 -1
- tests/test_graders.py +2 -1
server/env/__init__.py
CHANGED
|
@@ -1,4 +1,10 @@
|
|
| 1 |
-
from .environment import ContentGuardEnv
|
| 2 |
from .tasks import TASKS
|
| 3 |
|
| 4 |
__all__ = ["ContentGuardEnv", "TASKS"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from .tasks import TASKS
|
| 2 |
|
| 3 |
__all__ = ["ContentGuardEnv", "TASKS"]
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def __getattr__(name):
|
| 7 |
+
if name == "ContentGuardEnv":
|
| 8 |
+
from .environment import ContentGuardEnv
|
| 9 |
+
return ContentGuardEnv
|
| 10 |
+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
server/env/data_gen.py
CHANGED
|
@@ -4,26 +4,52 @@ Synthetic & Hugging Face content case generator for ContentGuardEnv.
|
|
| 4 |
Generates realistic social media posts with associated metadata,
|
| 5 |
user account history, and ground-truth moderation decisions.
|
| 6 |
Integrates natively with Hugging Face datasets.
|
|
|
|
|
|
|
|
|
|
| 7 |
"""
|
| 8 |
|
|
|
|
| 9 |
import random
|
| 10 |
import copy
|
| 11 |
-
from typing import Any, Dict,
|
| 12 |
import logging
|
| 13 |
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
try:
|
| 19 |
-
from datasets import load_dataset
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
# ── Post templates (Fallback / Structure) ──────────────────────────────────────
|
|
@@ -83,7 +109,10 @@ DEVICE_TYPES = ["mobile_ios", "mobile_android", "desktop_web", "desktop_app"]
|
|
| 83 |
|
| 84 |
def get_real_hf_case() -> Tuple[str, str, int, str]:
|
| 85 |
"""Pulls a random record from the real Hugging Face dataset."""
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
| 87 |
# SetFit/toxic_conversations uses 'text' and 'label' (0=safe, 1=toxic)
|
| 88 |
content = record.get("text", "")
|
| 89 |
is_toxic = record.get("label", 0) == 1
|
|
@@ -105,10 +134,11 @@ def generate_case(task_id: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
|
| 105 |
"""Generate a content moderation case leveraging real HF data if available."""
|
| 106 |
|
| 107 |
# 1. Base Meta structure
|
| 108 |
-
template = random.choice(POSTS)
|
|
|
|
| 109 |
|
| 110 |
# 2. Content & Ground Truth assignment
|
| 111 |
-
if
|
| 112 |
# Real AI Training Data Path
|
| 113 |
content, violation, severity, action = get_real_hf_case()
|
| 114 |
template["violation"] = violation
|
|
|
|
| 4 |
Generates realistic social media posts with associated metadata,
|
| 5 |
user account history, and ground-truth moderation decisions.
|
| 6 |
Integrates natively with Hugging Face datasets.
|
| 7 |
+
|
| 8 |
+
By default, external dataset loading is disabled to keep startup/test
|
| 9 |
+
paths deterministic. Enable with CONTENT_GUARD_USE_HF_DATA=1.
|
| 10 |
"""
|
| 11 |
|
| 12 |
+
import os
|
| 13 |
import random
|
| 14 |
import copy
|
| 15 |
+
from typing import Any, Dict, Tuple
|
| 16 |
import logging
|
| 17 |
|
| 18 |
logger = logging.getLogger(__name__)
|
| 19 |
|
| 20 |
+
HF_DATASET = None
|
| 21 |
+
_HF_DATA_ATTEMPTED = False
|
| 22 |
+
_HF_DATA_ENABLED = os.getenv("CONTENT_GUARD_USE_HF_DATA", "0").strip().lower() in {"1", "true", "yes", "on"}
|
| 23 |
+
|
| 24 |
try:
|
| 25 |
+
from datasets import load_dataset as _hf_load_dataset
|
| 26 |
+
except Exception:
|
| 27 |
+
_hf_load_dataset = None
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _maybe_load_hf_data() -> None:
|
| 31 |
+
"""Lazy one-time dataset initialization to avoid import-time blocking."""
|
| 32 |
+
global HF_DATASET, _HF_DATA_ATTEMPTED
|
| 33 |
+
|
| 34 |
+
if _HF_DATA_ATTEMPTED:
|
| 35 |
+
return
|
| 36 |
+
_HF_DATA_ATTEMPTED = True
|
| 37 |
+
|
| 38 |
+
if not _HF_DATA_ENABLED:
|
| 39 |
+
logger.info("Hugging Face dataset disabled (CONTENT_GUARD_USE_HF_DATA=0). Using synthetic cases.")
|
| 40 |
+
return
|
| 41 |
+
if _hf_load_dataset is None:
|
| 42 |
+
logger.warning("datasets package unavailable; using synthetic cases.")
|
| 43 |
+
return
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
logger.info("Initializing Hugging Face Datasets connection...")
|
| 47 |
+
# Keep dataset bounded for memory and faster cold starts.
|
| 48 |
+
HF_DATASET = _hf_load_dataset("SetFit/toxic_conversations", split="train[:1000]")
|
| 49 |
+
logger.info(f"Loaded {len(HF_DATASET)} Hugging Face evaluation records.")
|
| 50 |
+
except Exception as e:
|
| 51 |
+
logger.warning(f"Could not load Hugging Face dataset (falling back to synthetic): {str(e)}")
|
| 52 |
+
HF_DATASET = None
|
| 53 |
|
| 54 |
|
| 55 |
# ── Post templates (Fallback / Structure) ──────────────────────────────────────
|
|
|
|
| 109 |
|
| 110 |
def get_real_hf_case() -> Tuple[str, str, int, str]:
|
| 111 |
"""Pulls a random record from the real Hugging Face dataset."""
|
| 112 |
+
if HF_DATASET is None or len(HF_DATASET) == 0:
|
| 113 |
+
raise ValueError("HF dataset is not initialized.")
|
| 114 |
+
|
| 115 |
+
record = HF_DATASET[random.randrange(len(HF_DATASET))]
|
| 116 |
# SetFit/toxic_conversations uses 'text' and 'label' (0=safe, 1=toxic)
|
| 117 |
content = record.get("text", "")
|
| 118 |
is_toxic = record.get("label", 0) == 1
|
|
|
|
| 134 |
"""Generate a content moderation case leveraging real HF data if available."""
|
| 135 |
|
| 136 |
# 1. Base Meta structure
|
| 137 |
+
template = copy.deepcopy(random.choice(POSTS))
|
| 138 |
+
_maybe_load_hf_data()
|
| 139 |
|
| 140 |
# 2. Content & Ground Truth assignment
|
| 141 |
+
if HF_DATASET is not None and len(HF_DATASET) > 0:
|
| 142 |
# Real AI Training Data Path
|
| 143 |
content, violation, severity, action = get_real_hf_case()
|
| 144 |
template["violation"] = violation
|
server/static/app.js
CHANGED
|
@@ -84,9 +84,7 @@ const app = {
|
|
| 84 |
return;
|
| 85 |
}
|
| 86 |
|
| 87 |
-
this.closeReward(true);
|
| 88 |
-
this.terminalPrint('LOG: Quick-cycle trigger received (/). Starting a new episode...');
|
| 89 |
-
this.startEpisode(this.currentTask);
|
| 90 |
},
|
| 91 |
|
| 92 |
connectWS: function() {
|
|
@@ -375,7 +373,7 @@ const app = {
|
|
| 375 |
if (!this.scrollPending) {
|
| 376 |
this.scrollPending = true;
|
| 377 |
requestAnimationFrame(() => {
|
| 378 |
-
this.scrollOversightToBottom(
|
| 379 |
this.scrollPending = false;
|
| 380 |
});
|
| 381 |
}
|
|
@@ -445,8 +443,16 @@ const app = {
|
|
| 445 |
}
|
| 446 |
},
|
| 447 |
|
| 448 |
-
closeReward: function(silent) {
|
| 449 |
document.getElementById('reward-overlay').style.display = 'none';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 450 |
if (!silent && !this.isAutoTraining) {
|
| 451 |
this.terminalPrint(`LOG: Alignment evaluation captured and dismissed.`);
|
| 452 |
}
|
|
|
|
| 84 |
return;
|
| 85 |
}
|
| 86 |
|
| 87 |
+
this.closeReward(true, true);
|
|
|
|
|
|
|
| 88 |
},
|
| 89 |
|
| 90 |
connectWS: function() {
|
|
|
|
| 373 |
if (!this.scrollPending) {
|
| 374 |
this.scrollPending = true;
|
| 375 |
requestAnimationFrame(() => {
|
| 376 |
+
this.scrollOversightToBottom(true);
|
| 377 |
this.scrollPending = false;
|
| 378 |
});
|
| 379 |
}
|
|
|
|
| 443 |
}
|
| 444 |
},
|
| 445 |
|
| 446 |
+
closeReward: function(silent, autoStartNext) {
|
| 447 |
document.getElementById('reward-overlay').style.display = 'none';
|
| 448 |
+
|
| 449 |
+
const shouldAutoStart = !!this.currentTask && (autoStartNext === true || (!this.isAutoTraining && this.episodeDone));
|
| 450 |
+
if (shouldAutoStart) {
|
| 451 |
+
this.terminalPrint('LOG: Dismiss received. Starting next episode...');
|
| 452 |
+
this.startEpisode(this.currentTask);
|
| 453 |
+
return;
|
| 454 |
+
}
|
| 455 |
+
|
| 456 |
if (!silent && !this.isAutoTraining) {
|
| 457 |
this.terminalPrint(`LOG: Alignment evaluation captured and dismissed.`);
|
| 458 |
}
|
server/static/style.css
CHANGED
|
@@ -413,6 +413,11 @@ body::after {
|
|
| 413 |
gap: 22px;
|
| 414 |
}
|
| 415 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
.landing-hero {
|
| 417 |
border: 1px solid rgba(15, 159, 155, 0.18);
|
| 418 |
background: linear-gradient(160deg, rgba(255, 255, 255, 0.9), rgba(255, 248, 238, 0.8));
|
|
@@ -509,6 +514,8 @@ body::after {
|
|
| 509 |
grid-template-columns: minmax(0, 1.12fr) minmax(360px, 0.88fr);
|
| 510 |
gap: 0;
|
| 511 |
min-height: 0;
|
|
|
|
|
|
|
| 512 |
overflow: hidden;
|
| 513 |
}
|
| 514 |
|
|
@@ -517,7 +524,7 @@ body::after {
|
|
| 517 |
flex-direction: column;
|
| 518 |
gap: var(--hud-gap);
|
| 519 |
padding: var(--hud-gap);
|
| 520 |
-
overflow-y:
|
| 521 |
min-width: 0;
|
| 522 |
min-height: 0;
|
| 523 |
}
|
|
@@ -669,10 +676,15 @@ body::after {
|
|
| 669 |
font-family: 'IBM Plex Mono', monospace;
|
| 670 |
color: var(--ink-800);
|
| 671 |
font-weight: 500;
|
|
|
|
|
|
|
| 672 |
}
|
| 673 |
|
| 674 |
#action-panel {
|
| 675 |
display: block;
|
|
|
|
|
|
|
|
|
|
| 676 |
}
|
| 677 |
|
| 678 |
#action-panel .stat-grid {
|
|
@@ -869,6 +881,7 @@ body::after {
|
|
| 869 |
color: rgba(219, 233, 239, 0.9);
|
| 870 |
position: relative;
|
| 871 |
min-height: 0;
|
|
|
|
| 872 |
}
|
| 873 |
|
| 874 |
.log-line {
|
|
@@ -972,6 +985,8 @@ body::after {
|
|
| 972 |
.text-sm {
|
| 973 |
font-size: 0.84rem;
|
| 974 |
line-height: 1.6;
|
|
|
|
|
|
|
| 975 |
}
|
| 976 |
|
| 977 |
::-webkit-scrollbar {
|
|
@@ -1138,6 +1153,10 @@ body::after {
|
|
| 1138 |
padding: 20px;
|
| 1139 |
gap: 18px;
|
| 1140 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1141 |
}
|
| 1142 |
|
| 1143 |
@media (max-width: 700px) {
|
|
|
|
| 413 |
gap: 22px;
|
| 414 |
}
|
| 415 |
|
| 416 |
+
#main-interface {
|
| 417 |
+
height: 100%;
|
| 418 |
+
min-height: 0;
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
.landing-hero {
|
| 422 |
border: 1px solid rgba(15, 159, 155, 0.18);
|
| 423 |
background: linear-gradient(160deg, rgba(255, 255, 255, 0.9), rgba(255, 248, 238, 0.8));
|
|
|
|
| 514 |
grid-template-columns: minmax(0, 1.12fr) minmax(360px, 0.88fr);
|
| 515 |
gap: 0;
|
| 516 |
min-height: 0;
|
| 517 |
+
height: 100%;
|
| 518 |
+
align-items: stretch;
|
| 519 |
overflow: hidden;
|
| 520 |
}
|
| 521 |
|
|
|
|
| 524 |
flex-direction: column;
|
| 525 |
gap: var(--hud-gap);
|
| 526 |
padding: var(--hud-gap);
|
| 527 |
+
overflow-y: hidden;
|
| 528 |
min-width: 0;
|
| 529 |
min-height: 0;
|
| 530 |
}
|
|
|
|
| 676 |
font-family: 'IBM Plex Mono', monospace;
|
| 677 |
color: var(--ink-800);
|
| 678 |
font-weight: 500;
|
| 679 |
+
overflow-wrap: anywhere;
|
| 680 |
+
word-break: break-word;
|
| 681 |
}
|
| 682 |
|
| 683 |
#action-panel {
|
| 684 |
display: block;
|
| 685 |
+
max-height: 30vh;
|
| 686 |
+
overflow-y: auto;
|
| 687 |
+
padding-right: 2px;
|
| 688 |
}
|
| 689 |
|
| 690 |
#action-panel .stat-grid {
|
|
|
|
| 881 |
color: rgba(219, 233, 239, 0.9);
|
| 882 |
position: relative;
|
| 883 |
min-height: 0;
|
| 884 |
+
overscroll-behavior: contain;
|
| 885 |
}
|
| 886 |
|
| 887 |
.log-line {
|
|
|
|
| 985 |
.text-sm {
|
| 986 |
font-size: 0.84rem;
|
| 987 |
line-height: 1.6;
|
| 988 |
+
overflow-wrap: anywhere;
|
| 989 |
+
word-break: break-word;
|
| 990 |
}
|
| 991 |
|
| 992 |
::-webkit-scrollbar {
|
|
|
|
| 1153 |
padding: 20px;
|
| 1154 |
gap: 18px;
|
| 1155 |
}
|
| 1156 |
+
|
| 1157 |
+
.view-col {
|
| 1158 |
+
overflow-y: auto;
|
| 1159 |
+
}
|
| 1160 |
}
|
| 1161 |
|
| 1162 |
@media (max-width: 700px) {
|
tests/test_graders.py
CHANGED
|
@@ -12,7 +12,8 @@ async def test_grade_easy_perfect_match():
|
|
| 12 |
action = {"violation": "hate_speech"}
|
| 13 |
ground_truth = {"violation": "hate_speech"}
|
| 14 |
reward, feedback, rationale = await grade_action_async(action, ground_truth, "easy", {})
|
| 15 |
-
|
|
|
|
| 16 |
assert "Perfect" in feedback
|
| 17 |
assert "Section 12" in rationale
|
| 18 |
|
|
|
|
| 12 |
action = {"violation": "hate_speech"}
|
| 13 |
ground_truth = {"violation": "hate_speech"}
|
| 14 |
reward, feedback, rationale = await grade_action_async(action, ground_truth, "easy", {})
|
| 15 |
+
# Rewards are intentionally bounded by _smooth_reward to [0.05, 0.95].
|
| 16 |
+
assert reward == 0.95
|
| 17 |
assert "Perfect" in feedback
|
| 18 |
assert "Section 12" in rationale
|
| 19 |
|