francischung222 commited on
Commit
58f8fc8
Β·
1 Parent(s): 9e30ee9

beefing up the emotion server adding distilBERT

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -0
  2. servers/emotion_server.py +110 -12
requirements.txt CHANGED
@@ -5,3 +5,4 @@ mcp>=1.0.0
5
  anthropic>=0.37.0
6
  python-dotenv>=1.0.0
7
  plotly>=5.18.0
 
 
5
  anthropic>=0.37.0
6
  python-dotenv>=1.0.0
7
  plotly>=5.18.0
8
+ transformers>=4.44.0
servers/emotion_server.py CHANGED
@@ -116,24 +116,55 @@ app = FastMCP("emotion-server")
116
  # Lexicons & heuristics
117
  # ---------------------------
118
  EMO_LEX = {
119
- "happy": r"\b(happy|grateful|excited|joy(?:ful)?|delighted|content|optimistic|glad|thrilled|yay|better|good|great|fine)\b",
120
- "sad": r"\b(sad|down|depress(?:ed|ing)|cry(?:ing)?|lonely|alone|isolated|upset|miserable|heartbroken|devastat(?:ed|ing)|rejected|abandoned|invisible)\b",
121
- "angry": r"\b(angry|mad|furious|irritated|pissed|pissy|annoyed|resentful|rage|hate|infuriat(?:ed|ing)|frustrat(?:ed|ing)|boiling|trapped)\b",
122
- "anxious": r"\b(worried|anxious|nervous|stressed|overwhelmed|scared|uneasy|tense|on edge|freaking out|uncertain|afraid)\b",
123
- "tired": r"\b(tired|exhaust(?:ed|ing)|drained|burnt(?:\s*out)?|sleepy|fatigued|worn out|depleted|no energy)\b",
124
- "love": r"\b(love|affection|caring|fond|admire|cherish|adore)\b",
125
- "fear": r"\b(afraid|fear|terrified|panic(?:ky|ked)?|panicked|shaken|petrified)\b",
126
  }
127
 
128
  # Emojis contribute signals even without words
129
  EMOJI_SIGNAL = {
130
  "happy": ["πŸ˜€", "πŸ˜„", "😊", "πŸ™‚", "😁", "πŸ₯³", "✨"],
131
- "sad": ["😒", "😭", "😞", "πŸ˜”", "☹️"],
132
- "angry": ["😠", "😑", "🀬", "πŸ’’"],
133
- "anxious": ["😰", "😱", "😬", "😟", "😧"],
134
- "tired": ["πŸ₯±", "πŸ˜ͺ", "😴"],
135
  "love": ["❀️", "πŸ’–", "πŸ’•", "😍", "🀍", "πŸ’—", "πŸ’“", "😘"],
136
- "fear": ["🫣", "😨", "😱", "πŸ‘€"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  }
138
 
139
  NEGATORS = r"\b(no|not|never|hardly|barely|scarcely|isn['’]t|aren['’]t|can['’]t|don['’]t|doesn['’]t|won['’]t|without)\b"
@@ -233,6 +264,55 @@ def _softmax(d: Dict[str, float]) -> Dict[str, float]:
233
  return {k: exps[i] / s for i, k in enumerate(d.keys())}
234
 
235
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  # ---------------------------
237
  # Per-user calibration (in-memory)
238
  # ---------------------------
@@ -368,6 +448,24 @@ def _analyze(text: str, user_id: Optional[str] = None) -> dict:
368
  labels_sorted = sorted(probs.items(), key=lambda kv: kv[1], reverse=True)
369
  top_labels = [k for k, v in labels_sorted[:3] if v > 0.05] or ["neutral"]
370
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  return {
372
  "labels": top_labels,
373
  "scores": {k: round(v, 3) for k, v in probs.items()},
 
116
  # Lexicons & heuristics
117
  # ---------------------------
118
  EMO_LEX = {
119
+ "happy": r"\b(happy|grateful|excited|joy(?:ful)?|delighted|content|optimistic|glad|thrilled|yay|better|good|great|fine|okay-ish|doing ok)\b",
120
+ "sad": r"\b(sad|down|depress(?:ed|ing)|cry(?:ing)?|lonely|alone|isolated|upset|miserable|heartbroken|devastat(?:ed|ing)|rejected|abandoned|invisible|empty|numb|blah|hopeless|pointless|worthless)\b",
121
+ "angry": r"\b(angry|mad|furious|irritated|pissed|pissy|annoyed|resentful|rage|hate|infuriat(?:ed|ing)|frustrat(?:ed|ing)|boiling|trapped|fed up|sick of)\b",
122
+ "anxious": r"\b(worried|anxious|nervous|stressed|overwhelmed|scared|uneasy|tense|on edge|freaking out|uncertain|afraid|dread|panicky|jitters?|heart racing|shaky|knots? in (?:my )?stomach)\b",
123
+ "tired": r"\b(tired|exhaust(?:ed|ing)|drained|burnt(?:\s*out)?|sleepy|fatigued|worn out|depleted|no energy|burned out|burnt out|overstimulated|overloaded)\b",
124
+ "love": r"\b(love|affection|caring|fond|admire|cherish|adore|grateful for you)\b",
125
+ "fear": r"\b(afraid|fear|terrified|panic(?:ky|ked)?|panicked|shaken|petrified|scared stiff)\b",
126
  }
127
 
128
  # Emojis contribute signals even without words
129
  EMOJI_SIGNAL = {
130
  "happy": ["πŸ˜€", "πŸ˜„", "😊", "πŸ™‚", "😁", "πŸ₯³", "✨"],
131
+ "sad": ["😒", "😭", "😞", "πŸ˜”", "☹️", "😿"],
132
+ "angry": ["😠", "😑", "🀬", "πŸ’’", "😀"],
133
+ "anxious": ["😰", "😱", "😬", "😟", "😧", "πŸ˜΅β€πŸ’«", "🀯"],
134
+ "tired": ["πŸ₯±", "πŸ˜ͺ", "😴", "😡"],
135
  "love": ["❀️", "πŸ’–", "πŸ’•", "😍", "🀍", "πŸ’—", "πŸ’“", "😘"],
136
+ "fear": ["🫣", "😨", "😱", "πŸ‘€", "😳"],
137
+ }
138
+
139
+ # Heuristic valence/arousal for blending ML fallback
140
+ VALENCE_HINT = {
141
+ "happy": 0.8,
142
+ "love": 0.7,
143
+ "sad": -0.75,
144
+ "angry": -0.6,
145
+ "anxious": -0.45,
146
+ "fear": -0.5,
147
+ "tired": -0.3,
148
+ }
149
+ AROUSAL_HINT = {
150
+ "happy": 0.55,
151
+ "love": 0.5,
152
+ "sad": 0.35,
153
+ "angry": 0.65,
154
+ "anxious": 0.65,
155
+ "fear": 0.7,
156
+ "tired": 0.2,
157
+ }
158
+
159
+ _ML_MODEL_ID = "bhadresh-savani/distilbert-base-uncased-emotion"
160
+ _ML_PIPE = None
161
+ _ML_LABEL_MAP = {
162
+ "joy": ["happy"],
163
+ "sadness": ["sad"],
164
+ "anger": ["angry"],
165
+ "fear": ["anxious", "fear"],
166
+ "love": ["love"],
167
+ "surprise": ["happy", "anxious"],
168
  }
169
 
170
  NEGATORS = r"\b(no|not|never|hardly|barely|scarcely|isn['’]t|aren['’]t|can['’]t|don['’]t|doesn['’]t|won['’]t|without)\b"
 
264
  return {k: exps[i] / s for i, k in enumerate(d.keys())}
265
 
266
 
267
+ def _ensure_ml_pipe():
268
+ """Lazy-load small fallback classifier for low-confidence texts."""
269
+ global _ML_PIPE
270
+ if _ML_PIPE is not None:
271
+ return _ML_PIPE
272
+ try:
273
+ from transformers import pipeline
274
+
275
+ _ML_PIPE = pipeline(
276
+ "text-classification", model=_ML_MODEL_ID, top_k=None, truncation=True
277
+ )
278
+ except Exception as e:
279
+ print(f"[emotion-server] ML fallback unavailable: {type(e).__name__}: {e}")
280
+ _ML_PIPE = False # cache failure
281
+ return _ML_PIPE
282
+
283
+
284
+ def _ml_fallback_probs(text: str) -> Optional[Dict[str, float]]:
285
+ pipe = _ensure_ml_pipe()
286
+ if not pipe:
287
+ return None
288
+ try:
289
+ out = pipe(text)
290
+ records = out[0] if out and isinstance(out[0], list) else out
291
+ scores = {emo: 0.0 for emo in EMO_LEX}
292
+ max_prob = 0.0
293
+ for rec in records:
294
+ lbl = rec.get("label", "").lower()
295
+ prob = float(rec.get("score", 0.0))
296
+ max_prob = max(max_prob, prob)
297
+ for mapped in _ML_LABEL_MAP.get(lbl, []):
298
+ scores[mapped] += prob
299
+ total = sum(scores.values())
300
+ if total <= 0:
301
+ return None
302
+ scores = {k: v / total for k, v in scores.items()}
303
+ scores["__max_prob"] = max_prob
304
+ return scores
305
+ except Exception as e:
306
+ print(f"[emotion-server] ML fallback error: {type(e).__name__}: {e}")
307
+ return None
308
+
309
+
310
+ def _valence_arousal_from_probs(probs: Dict[str, float]) -> Tuple[float, float]:
311
+ val = sum(VALENCE_HINT.get(k, 0.0) * v for k, v in probs.items())
312
+ aro = sum(AROUSAL_HINT.get(k, 0.5) * v for k, v in probs.items())
313
+ return max(-1.0, min(1.0, round(val, 3))), max(0.0, min(1.0, round(aro, 3)))
314
+
315
+
316
  # ---------------------------
317
  # Per-user calibration (in-memory)
318
  # ---------------------------
 
448
  labels_sorted = sorted(probs.items(), key=lambda kv: kv[1], reverse=True)
449
  top_labels = [k for k, v in labels_sorted[:3] if v > 0.05] or ["neutral"]
450
 
451
+ # Always blend in ML classifier when available (gives broader coverage)
452
+ ml_scores = _ml_fallback_probs(t)
453
+ if ml_scores:
454
+ ml_max = ml_scores.pop("__max_prob", 0.0)
455
+ combined = {}
456
+ for emo in emo_scores.keys():
457
+ # bias toward ML (0.65) while keeping lexicon guardrails (0.35)
458
+ combined[emo] = 0.35 * probs.get(emo, 0.0) + 0.65 * ml_scores.get(
459
+ emo, 0.0
460
+ )
461
+ probs = _softmax(combined)
462
+ labels_sorted = sorted(probs.items(), key=lambda kv: kv[1], reverse=True)
463
+ top_labels = [k for k, v in labels_sorted[:3] if v > 0.05] or ["neutral"]
464
+ valence, arousal = _valence_arousal_from_probs(probs)
465
+ tone = quad_tone(valence, arousal)
466
+ conf = max(conf, round(0.45 + 0.4 * ml_max, 3))
467
+ reasons.append("ml blend (distilbert)")
468
+
469
  return {
470
  "labels": top_labels,
471
  "scores": {k: round(v, 3) for k, v in probs.items()},