mj064 commited on
Commit
499375e
·
verified ·
1 Parent(s): 9ea2c4b

Upload folder using huggingface_hub

Browse files
server/env/__init__.py CHANGED
@@ -1,4 +1,10 @@
1
- from .environment import ContentGuardEnv
2
  from .tasks import TASKS
3
 
4
  __all__ = ["ContentGuardEnv", "TASKS"]
 
 
 
 
 
 
 
 
 
1
  from .tasks import TASKS
2
 
3
  __all__ = ["ContentGuardEnv", "TASKS"]
4
+
5
+
6
+ def __getattr__(name):
7
+ if name == "ContentGuardEnv":
8
+ from .environment import ContentGuardEnv
9
+ return ContentGuardEnv
10
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
server/env/data_gen.py CHANGED
@@ -4,26 +4,52 @@ Synthetic & Hugging Face content case generator for ContentGuardEnv.
4
  Generates realistic social media posts with associated metadata,
5
  user account history, and ground-truth moderation decisions.
6
  Integrates natively with Hugging Face datasets.
 
 
 
7
  """
8
 
 
9
  import random
10
  import copy
11
- from typing import Any, Dict, List, Tuple
12
  import logging
13
 
14
  logger = logging.getLogger(__name__)
15
 
16
- # Attempt to load Real World Data from Hugging Face
17
- HF_DATA = None
 
 
18
  try:
19
- from datasets import load_dataset
20
- logger.info("Initializing Hugging Face Datasets connection...")
21
- # Load a small, highly relevant dataset of flagged conversational text
22
- hf_dataset = load_dataset("SetFit/toxic_conversations", split="train")
23
- HF_DATA = list(hf_dataset)
24
- logger.info(f"Loaded {len(HF_DATA)} real evaluation cases from Hugging Face!")
25
- except Exception as e:
26
- logger.warning(f"Could not load Hugging Face dataset (will use synthetic): {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  # ── Post templates (Fallback / Structure) ──────────────────────────────────────
@@ -83,7 +109,10 @@ DEVICE_TYPES = ["mobile_ios", "mobile_android", "desktop_web", "desktop_app"]
83
 
84
  def get_real_hf_case() -> Tuple[str, str, int, str]:
85
  """Pulls a random record from the real Hugging Face dataset."""
86
- record = random.choice(HF_DATA)
 
 
 
87
  # SetFit/toxic_conversations uses 'text' and 'label' (0=safe, 1=toxic)
88
  content = record.get("text", "")
89
  is_toxic = record.get("label", 0) == 1
@@ -105,10 +134,11 @@ def generate_case(task_id: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
105
  """Generate a content moderation case leveraging real HF data if available."""
106
 
107
  # 1. Base Meta structure
108
- template = random.choice(POSTS)
 
109
 
110
  # 2. Content & Ground Truth assignment
111
- if HF_DATA:
112
  # Real AI Training Data Path
113
  content, violation, severity, action = get_real_hf_case()
114
  template["violation"] = violation
 
4
  Generates realistic social media posts with associated metadata,
5
  user account history, and ground-truth moderation decisions.
6
  Integrates natively with Hugging Face datasets.
7
+
8
+ By default, external dataset loading is disabled to keep startup/test
9
+ paths deterministic. Enable with CONTENT_GUARD_USE_HF_DATA=1.
10
  """
11
 
12
+ import os
13
  import random
14
  import copy
15
+ from typing import Any, Dict, Tuple
16
  import logging
17
 
18
  logger = logging.getLogger(__name__)
19
 
20
+ HF_DATASET = None
21
+ _HF_DATA_ATTEMPTED = False
22
+ _HF_DATA_ENABLED = os.getenv("CONTENT_GUARD_USE_HF_DATA", "0").strip().lower() in {"1", "true", "yes", "on"}
23
+
24
  try:
25
+ from datasets import load_dataset as _hf_load_dataset
26
+ except Exception:
27
+ _hf_load_dataset = None
28
+
29
+
30
+ def _maybe_load_hf_data() -> None:
31
+ """Lazy one-time dataset initialization to avoid import-time blocking."""
32
+ global HF_DATASET, _HF_DATA_ATTEMPTED
33
+
34
+ if _HF_DATA_ATTEMPTED:
35
+ return
36
+ _HF_DATA_ATTEMPTED = True
37
+
38
+ if not _HF_DATA_ENABLED:
39
+ logger.info("Hugging Face dataset disabled (CONTENT_GUARD_USE_HF_DATA=0). Using synthetic cases.")
40
+ return
41
+ if _hf_load_dataset is None:
42
+ logger.warning("datasets package unavailable; using synthetic cases.")
43
+ return
44
+
45
+ try:
46
+ logger.info("Initializing Hugging Face Datasets connection...")
47
+ # Keep dataset bounded for memory and faster cold starts.
48
+ HF_DATASET = _hf_load_dataset("SetFit/toxic_conversations", split="train[:1000]")
49
+ logger.info(f"Loaded {len(HF_DATASET)} Hugging Face evaluation records.")
50
+ except Exception as e:
51
+ logger.warning(f"Could not load Hugging Face dataset (falling back to synthetic): {str(e)}")
52
+ HF_DATASET = None
53
 
54
 
55
  # ── Post templates (Fallback / Structure) ──────────────────────────────────────
 
109
 
110
  def get_real_hf_case() -> Tuple[str, str, int, str]:
111
  """Pulls a random record from the real Hugging Face dataset."""
112
+ if HF_DATASET is None or len(HF_DATASET) == 0:
113
+ raise ValueError("HF dataset is not initialized.")
114
+
115
+ record = HF_DATASET[random.randrange(len(HF_DATASET))]
116
  # SetFit/toxic_conversations uses 'text' and 'label' (0=safe, 1=toxic)
117
  content = record.get("text", "")
118
  is_toxic = record.get("label", 0) == 1
 
134
  """Generate a content moderation case leveraging real HF data if available."""
135
 
136
  # 1. Base Meta structure
137
+ template = copy.deepcopy(random.choice(POSTS))
138
+ _maybe_load_hf_data()
139
 
140
  # 2. Content & Ground Truth assignment
141
+ if HF_DATASET is not None and len(HF_DATASET) > 0:
142
  # Real AI Training Data Path
143
  content, violation, severity, action = get_real_hf_case()
144
  template["violation"] = violation
server/static/app.js CHANGED
@@ -84,9 +84,7 @@ const app = {
84
  return;
85
  }
86
 
87
- this.closeReward(true);
88
- this.terminalPrint('LOG: Quick-cycle trigger received (/). Starting a new episode...');
89
- this.startEpisode(this.currentTask);
90
  },
91
 
92
  connectWS: function() {
@@ -375,7 +373,7 @@ const app = {
375
  if (!this.scrollPending) {
376
  this.scrollPending = true;
377
  requestAnimationFrame(() => {
378
- this.scrollOversightToBottom(false);
379
  this.scrollPending = false;
380
  });
381
  }
@@ -445,8 +443,16 @@ const app = {
445
  }
446
  },
447
 
448
- closeReward: function(silent) {
449
  document.getElementById('reward-overlay').style.display = 'none';
 
 
 
 
 
 
 
 
450
  if (!silent && !this.isAutoTraining) {
451
  this.terminalPrint(`LOG: Alignment evaluation captured and dismissed.`);
452
  }
 
84
  return;
85
  }
86
 
87
+ this.closeReward(true, true);
 
 
88
  },
89
 
90
  connectWS: function() {
 
373
  if (!this.scrollPending) {
374
  this.scrollPending = true;
375
  requestAnimationFrame(() => {
376
+ this.scrollOversightToBottom(true);
377
  this.scrollPending = false;
378
  });
379
  }
 
443
  }
444
  },
445
 
446
+ closeReward: function(silent, autoStartNext) {
447
  document.getElementById('reward-overlay').style.display = 'none';
448
+
449
+ const shouldAutoStart = !!this.currentTask && (autoStartNext === true || (!this.isAutoTraining && this.episodeDone));
450
+ if (shouldAutoStart) {
451
+ this.terminalPrint('LOG: Dismiss received. Starting next episode...');
452
+ this.startEpisode(this.currentTask);
453
+ return;
454
+ }
455
+
456
  if (!silent && !this.isAutoTraining) {
457
  this.terminalPrint(`LOG: Alignment evaluation captured and dismissed.`);
458
  }
server/static/style.css CHANGED
@@ -413,6 +413,11 @@ body::after {
413
  gap: 22px;
414
  }
415
 
 
 
 
 
 
416
  .landing-hero {
417
  border: 1px solid rgba(15, 159, 155, 0.18);
418
  background: linear-gradient(160deg, rgba(255, 255, 255, 0.9), rgba(255, 248, 238, 0.8));
@@ -509,6 +514,8 @@ body::after {
509
  grid-template-columns: minmax(0, 1.12fr) minmax(360px, 0.88fr);
510
  gap: 0;
511
  min-height: 0;
 
 
512
  overflow: hidden;
513
  }
514
 
@@ -517,7 +524,7 @@ body::after {
517
  flex-direction: column;
518
  gap: var(--hud-gap);
519
  padding: var(--hud-gap);
520
- overflow-y: auto;
521
  min-width: 0;
522
  min-height: 0;
523
  }
@@ -669,10 +676,15 @@ body::after {
669
  font-family: 'IBM Plex Mono', monospace;
670
  color: var(--ink-800);
671
  font-weight: 500;
 
 
672
  }
673
 
674
  #action-panel {
675
  display: block;
 
 
 
676
  }
677
 
678
  #action-panel .stat-grid {
@@ -869,6 +881,7 @@ body::after {
869
  color: rgba(219, 233, 239, 0.9);
870
  position: relative;
871
  min-height: 0;
 
872
  }
873
 
874
  .log-line {
@@ -972,6 +985,8 @@ body::after {
972
  .text-sm {
973
  font-size: 0.84rem;
974
  line-height: 1.6;
 
 
975
  }
976
 
977
  ::-webkit-scrollbar {
@@ -1138,6 +1153,10 @@ body::after {
1138
  padding: 20px;
1139
  gap: 18px;
1140
  }
 
 
 
 
1141
  }
1142
 
1143
  @media (max-width: 700px) {
 
413
  gap: 22px;
414
  }
415
 
416
+ #main-interface {
417
+ height: 100%;
418
+ min-height: 0;
419
+ }
420
+
421
  .landing-hero {
422
  border: 1px solid rgba(15, 159, 155, 0.18);
423
  background: linear-gradient(160deg, rgba(255, 255, 255, 0.9), rgba(255, 248, 238, 0.8));
 
514
  grid-template-columns: minmax(0, 1.12fr) minmax(360px, 0.88fr);
515
  gap: 0;
516
  min-height: 0;
517
+ height: 100%;
518
+ align-items: stretch;
519
  overflow: hidden;
520
  }
521
 
 
524
  flex-direction: column;
525
  gap: var(--hud-gap);
526
  padding: var(--hud-gap);
527
+ overflow-y: hidden;
528
  min-width: 0;
529
  min-height: 0;
530
  }
 
676
  font-family: 'IBM Plex Mono', monospace;
677
  color: var(--ink-800);
678
  font-weight: 500;
679
+ overflow-wrap: anywhere;
680
+ word-break: break-word;
681
  }
682
 
683
  #action-panel {
684
  display: block;
685
+ max-height: 30vh;
686
+ overflow-y: auto;
687
+ padding-right: 2px;
688
  }
689
 
690
  #action-panel .stat-grid {
 
881
  color: rgba(219, 233, 239, 0.9);
882
  position: relative;
883
  min-height: 0;
884
+ overscroll-behavior: contain;
885
  }
886
 
887
  .log-line {
 
985
  .text-sm {
986
  font-size: 0.84rem;
987
  line-height: 1.6;
988
+ overflow-wrap: anywhere;
989
+ word-break: break-word;
990
  }
991
 
992
  ::-webkit-scrollbar {
 
1153
  padding: 20px;
1154
  gap: 18px;
1155
  }
1156
+
1157
+ .view-col {
1158
+ overflow-y: auto;
1159
+ }
1160
  }
1161
 
1162
  @media (max-width: 700px) {
tests/test_graders.py CHANGED
@@ -12,7 +12,8 @@ async def test_grade_easy_perfect_match():
12
  action = {"violation": "hate_speech"}
13
  ground_truth = {"violation": "hate_speech"}
14
  reward, feedback, rationale = await grade_action_async(action, ground_truth, "easy", {})
15
- assert reward == 1.0
 
16
  assert "Perfect" in feedback
17
  assert "Section 12" in rationale
18
 
 
12
  action = {"violation": "hate_speech"}
13
  ground_truth = {"violation": "hate_speech"}
14
  reward, feedback, rationale = await grade_action_async(action, ground_truth, "easy", {})
15
+ # Rewards are intentionally bounded by _smooth_reward to [0.05, 0.95].
16
+ assert reward == 0.95
17
  assert "Perfect" in feedback
18
  assert "Section 12" in rationale
19