kcrobot40 commited on
Commit
56c66e5
·
verified ·
1 Parent(s): 8ae7d46
Files changed (1) hide show
  1. app.py +204 -324
app.py CHANGED
@@ -1,97 +1,51 @@
1
 
2
- # app.py - KC Robot AI V4.1 (Full - FPT female TTS)
3
- # Full-feature Flask server:
4
- # - /ask (text) -> HF LLM
5
- # - /tts (text) -> HF TTS (default: NguyenManhTuan/VietnameseTTS_FPT_AI_Female)
6
- # - /stt (audio) -> HF STT (default: openai/whisper-small)
7
- # - /presence (radar event) -> greeting + Telegram notify
8
- # - /display -> OLED lines
9
- # - Web UI for quick test
10
- # - Telegram poller (background thread) to accept /ask, /say, /status
11
- #
12
- # Configuration via environment variables / Secrets in HF Space:
13
- # HF_API_TOKEN (required for HF inference)
14
- # HF_MODEL (optional, default google/flan-t5-large)
15
- # HF_TTS_MODEL (optional, default NguyenManhTuan/VietnameseTTS_FPT_AI_Female)
16
- # HF_STT_MODEL (optional, default openai/whisper-small)
17
- # TELEGRAM_TOKEN (optional)
18
- # TELEGRAM_CHATID (optional)
19
- #
20
- # Keep requirements minimal to improve HF Space stability:
21
- # flask, requests
22
- #
23
- # Important: set tokens in HF Space Settings -> Secrets (do not hardcode)
24
-
25
- import os
26
- import io
27
- import time
28
- import json
29
- import uuid
30
- import logging
31
- import threading
32
- from typing import Optional, List, Tuple
33
  from pathlib import Path
34
-
35
- import requests
36
  from flask import Flask, request, jsonify, send_file, render_template_string, abort
37
 
38
- # ----------------- Config & Logging -----------------
39
  logging.basicConfig(level=logging.INFO)
40
- logger = logging.getLogger("kcrobot.v4")
41
 
42
  app = Flask(__name__)
43
 
44
- # Directory for temporary files (tts audio)
45
  TMP_DIR = Path("/tmp/kcrobot") if os.name != "nt" else Path.cwd() / "tmp_kcrobot"
46
  TMP_DIR.mkdir(parents=True, exist_ok=True)
47
 
48
- # Environment / Secrets (set these in HF Space)
49
  HF_API_TOKEN = os.getenv("HF_API_TOKEN", "").strip()
50
  HF_MODEL = os.getenv("HF_MODEL", "google/flan-t5-large").strip()
51
- # Default FPT female Vietnamese TTS
52
  HF_TTS_MODEL = os.getenv("HF_TTS_MODEL", "NguyenManhTuan/VietnameseTTS_FPT_AI_Female").strip()
53
  HF_STT_MODEL = os.getenv("HF_STT_MODEL", "openai/whisper-small").strip()
54
  TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN", "").strip()
55
  TELEGRAM_CHATID = os.getenv("TELEGRAM_CHATID", "").strip()
56
-
57
- # Port (HF sets PORT env in runtime)
58
- PORT = int(os.getenv("PORT", os.getenv("SERVER_PORT", 7860)))
59
-
60
- if not HF_API_TOKEN:
61
- logger.warning("HF_API_TOKEN is not set — HF inference will fail until you add it in Secrets.")
62
 
63
  HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}
 
 
64
 
65
- # ----------------- In-memory buffers -----------------
66
- CONV: List[Tuple[str, str]] = [] # list of (user, bot)
67
- DISPLAY_LINES: List[str] = [] # lines for OLED display
68
 
69
- def push_display(line: str, limit: int = 6):
70
- """Keep last `limit` lines for display."""
71
- global DISPLAY_LINES
72
  DISPLAY_LINES.append(line)
73
  if len(DISPLAY_LINES) > limit:
74
- DISPLAY_LINES = DISPLAY_LINES[-limit:]
75
 
76
- # ----------------- Helper: Hugging Face inference -----------------
77
  def hf_post_json(model_id: str, payload: dict, timeout: int = 120):
78
- """POST JSON to HF inference; return parsed JSON or raise."""
79
  if not HF_API_TOKEN:
80
  raise RuntimeError("HF_API_TOKEN missing (set in Secrets).")
81
  url = f"https://api-inference.huggingface.co/models/{model_id}"
82
- headers = dict(HF_HEADERS)
83
- headers["Content-Type"] = "application/json"
84
- r = requests.post(url, headers=headers, json=payload, timeout=timeout)
85
  if not r.ok:
86
  logger.error("HF POST JSON error %s: %s", r.status_code, r.text[:400])
87
  r.raise_for_status()
88
- try:
89
- return r.json()
90
- except Exception:
91
- return r.text
92
 
93
  def hf_post_bytes(model_id: str, data: bytes, content_type: str = "application/octet-stream", timeout: int = 180):
94
- """POST binary data (audio) to HF inference; return response object or raise."""
95
  if not HF_API_TOKEN:
96
  raise RuntimeError("HF_API_TOKEN missing (set in Secrets).")
97
  url = f"https://api-inference.huggingface.co/models/{model_id}"
@@ -103,341 +57,267 @@ def hf_post_bytes(model_id: str, data: bytes, content_type: str = "application/o
103
  r.raise_for_status()
104
  return r
105
 
106
- # ----------------- Text generation (LLM) -----------------
107
- def hf_text_generate(prompt: str, model: Optional[str] = None, max_new_tokens: int = 256, temperature: float = 0.7) -> str:
108
- model = model or HF_MODEL
109
- payload = {
110
- "inputs": prompt,
111
- "parameters": {"max_new_tokens": int(max_new_tokens), "temperature": float(temperature)},
112
- "options": {"wait_for_model": True}
113
- }
114
- out = hf_post_json(model, payload, timeout=120)
115
  # parse common shapes
116
- if isinstance(out, list) and len(out) > 0:
117
- first = out[0]
118
- if isinstance(first, dict) and "generated_text" in first:
119
- return first["generated_text"]
120
- return str(first)
121
  if isinstance(out, dict):
122
- for k in ("generated_text", "text", "summary_text"):
123
  if k in out:
124
  return out[k]
125
- return json.dumps(out)
126
  return str(out)
127
 
128
- # ----------------- TTS (Text -> audio bytes) -----------------
129
- def hf_tts_get_audio_bytes(text: str, model: Optional[str] = None) -> bytes:
130
- """Call HF TTS model and return audio bytes (commonly mp3 or wav)."""
131
- model = model or HF_TTS_MODEL
132
  payload = {"inputs": text}
133
- r = requests.post(f"https://api-inference.huggingface.co/models/{model}", headers={**HF_HEADERS, "Content-Type": "application/json"}, json=payload, timeout=120)
 
134
  if not r.ok:
135
  logger.error("HF TTS error %s: %s", r.status_code, r.text[:400])
136
  r.raise_for_status()
137
  return r.content
138
 
139
- def save_tts_temp(audio_bytes: bytes, ext_hint: str = "mp3") -> str:
140
- """Save bytes to a temp file under TMP_DIR and return filename."""
141
- fname = f"tts_{int(time.time())}_{uuid.uuid4().hex}.{ext_hint}"
142
- p = TMP_DIR / fname
143
- p.write_bytes(audio_bytes)
144
- return fname
145
-
146
- # ----------------- STT (audio bytes -> text) -----------------
147
- def hf_stt_from_bytes(audio_bytes: bytes, model: Optional[str] = None) -> str:
148
- model = model or HF_STT_MODEL
149
- r = hf_post_bytes(model, audio_bytes, content_type="application/octet-stream", timeout=180)
150
- # often returns {"text": "..."}
151
  try:
152
  j = r.json()
153
  if isinstance(j, dict) and "text" in j:
154
  return j["text"]
155
- if isinstance(j, list) and len(j) and isinstance(j[0], dict) and "text" in j[0]:
156
- return j[0]["text"]
157
  return str(j)
158
  except Exception:
159
  return r.text if hasattr(r, "text") else ""
160
 
161
- # ----------------- Endpoints for ESP32 / Web -----------------
 
 
 
 
 
 
 
 
 
 
 
 
162
  @app.route("/health", methods=["GET"])
163
  def health():
164
- return jsonify({
165
- "ok": True,
166
- "hf_api_token": bool(HF_API_TOKEN),
167
- "hf_model": HF_MODEL,
168
- "hf_tts_model": HF_TTS_MODEL,
169
- "hf_stt_model": HF_STT_MODEL,
170
- "telegram": bool(TELEGRAM_TOKEN and TELEGRAM_CHATID),
171
- "tmp_dir": str(TMP_DIR),
172
- })
173
 
174
  @app.route("/ask", methods=["POST"])
175
  def route_ask():
176
- """
177
- POST JSON: { "text": "...", "lang": "vi"|"en"|"auto" (optional) }
178
- Returns: { "answer": "..." }
179
- """
 
180
  try:
181
- data = request.get_json(force=True) or {}
182
- text = (data.get("text") or "").strip()
183
- lang = (data.get("lang") or "auto").lower()
184
- if not text:
185
- return jsonify({"error": "no text"}), 400
186
-
187
- # build bilingual instruction
188
- if lang == "vi":
189
- prompt = f"Bạn là trợ lý thông minh, trả lời bằng tiếng Việt, ngắn gọn và lịch sự:\n\n{text}"
190
- elif lang == "en":
191
- prompt = f"You are a helpful assistant. Answer in clear English, concise:\n\n{text}"
192
- else:
193
- prompt = f"Bạn là trợ lý thông minh song ngữ (Vietnamese/English). Trả lời bằng ngôn ngữ phù hợp với câu hỏi:\n\n{text}"
194
-
195
- answer = hf_text_generate(prompt)
196
- # store conversation and display preview
197
- CONV.append((text, answer))
198
- push_display("YOU: " + (text[:40]))
199
- push_display("BOT: " + (answer[:40]))
200
- return jsonify({"answer": answer})
201
  except Exception as e:
202
- logger.exception("route_ask failed")
203
- return jsonify({"error": str(e)}), 500
 
 
 
 
204
 
205
  @app.route("/tts", methods=["POST"])
206
  def route_tts():
207
- """
208
- POST JSON: { "text":"..." }
209
- Returns: audio bytes (audio/mpeg) - HF TTS output (mp3/wav)
210
- """
211
  try:
212
- data = request.get_json(force=True) or {}
213
- text = (data.get("text") or "").strip()
214
- if not text:
215
- return jsonify({"error": "no text"}), 400
216
- audio_bytes = hf_tts_get_audio_bytes(text)
217
- # Try to detect extension: if content-type present? HF sometimes returns mp3 bytes.
218
- # We'll send as audio/mpeg (mp3) which is widely supported by ESP32 players.
219
- return send_file(io.BytesIO(audio_bytes), mimetype="audio/mpeg", as_attachment=False, download_name="tts.mp3")
220
  except Exception as e:
221
- logger.exception("route_tts failed")
222
  return jsonify({"error": str(e)}), 500
 
223
 
224
  @app.route("/stt", methods=["POST"])
225
  def route_stt():
226
  """
227
- Accepts multipart 'file' or raw audio bytes in body.
228
- Returns JSON: { "text": "recognized text" }
229
  """
230
  try:
 
231
  if "file" in request.files:
232
  f = request.files["file"]
233
  audio_bytes = f.read()
234
  else:
235
  audio_bytes = request.get_data() or b""
236
  if not audio_bytes:
237
- return jsonify({"error": "no audio"}), 400
238
- text = hf_stt_from_bytes(audio_bytes)
239
- push_display("UserAudio: " + (text[:40]))
240
- return jsonify({"text": text})
241
  except Exception as e:
242
- logger.exception("route_stt failed")
243
  return jsonify({"error": str(e)}), 500
244
 
245
  @app.route("/presence", methods=["POST"])
246
  def route_presence():
247
- """
248
- ESP32 radar posts: JSON {"note": "..." }
249
- Server responds with greeting, and optionally sends Telegram alert.
250
- """
251
- try:
252
- data = request.get_json(force=True) or {}
253
- note = data.get("note", " người tới")
254
- greeting = f"Xin chào! {note}"
255
- CONV.append(("__presence__", greeting))
256
- push_display("RADAR: " + note[:40])
257
- # Telegram notify
258
- if TELEGRAM_TOKEN and TELEGRAM_CHATID:
259
- try:
260
- send_telegram_message(f"⚠️ Robot: Phát hiện: {note}")
261
- except Exception:
262
- logger.exception("Telegram notify failed")
263
- return jsonify({"greeting": greeting})
264
- except Exception as e:
265
- logger.exception("route_presence failed")
266
- return jsonify({"error": str(e)}), 500
 
267
 
268
  @app.route("/display", methods=["GET"])
269
  def route_display():
270
- return jsonify({"lines": DISPLAY_LINES[-6:], "conv_len": len(CONV)})
271
 
272
- # Serve tts files by filename if needed
273
- @app.route("/tts_file/<path:fname>", methods=["GET"])
274
- def serve_tts_file(fname):
275
- p = TMP_DIR / fname
276
- if not p.exists():
277
- return abort(404)
278
- # guess mime
279
- mime = "audio/mpeg" if str(fname).lower().endswith(".mp3") else "audio/wav"
280
- return send_file(str(p), mimetype=mime)
281
-
282
- # ----------------- Simple Web UI for testing -----------------
283
  INDEX_HTML = """
284
  <!doctype html>
285
- <html>
286
  <head>
287
- <meta charset="utf-8">
288
- <title>KC Robot AI V4.1</title>
289
- <meta name="viewport" content="width=device-width,initial-scale=1">
290
- <style>
291
- body{font-family:Arial,Helvetica, sans-serif; margin:12px; color:#111}
292
- textarea{width:100%; height:90px; padding:8px; font-size:16px}
293
- #chat{border:1px solid #ddd; padding:8px; height:260px; overflow:auto; background:#fbfbfb}
294
- button{padding:8px 12px; margin-top:8px; font-size:15px}
295
- </style>
 
 
 
 
 
 
 
 
 
 
296
  </head>
297
  <body>
298
- <h2>KC Robot AI V4.1 — Cloud Brain (FPT female)</h2>
 
299
  <div id="chat"></div>
300
- <textarea id="txt" placeholder="Nhập tiếng Việt hoặc English..."></textarea><br>
301
- <button onclick="ask()">Gửi (Ask)</button>
302
- <button onclick="playLast()">Phát TTS</button>
303
- <hr/>
304
- <input type="file" id="afile" accept="audio/*"><button onclick="uploadAudio()">Upload audio → STT</button>
305
- <hr/>
306
- <div id="log"></div>
307
- <script>
308
- window._lastAnswer = "";
309
- async function ask(){
310
- let t = document.getElementById('txt').value;
311
- if(!t) return;
312
- appendUser(t);
313
- let res = await fetch('/ask', {method:'POST', headers:{'Content-Type':'application/json'}, body: JSON.stringify({text:t})});
314
- let j = await res.json();
315
- if(j.answer){ appendBot(j.answer); window._lastAnswer = j.answer; }
316
- else appendBot('[Error] ' + JSON.stringify(j));
317
- }
318
- function appendUser(t){ document.getElementById('chat').innerHTML += '<div style="color:#006"><b>You:</b> '+escapeHtml(t)+'</div>'; scroll();}
319
- function appendBot(t){ document.getElementById('chat').innerHTML += '<div style="color:#080"><b>Robot:</b> '+escapeHtml(t)+'</div>'; scroll();}
320
- function escapeHtml(s){ return (s+'').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); }
321
- function scroll(){ let c = document.getElementById('chat'); c.scrollTop = c.scrollHeight; }
322
- async function playLast(){
323
- const txt = window._lastAnswer || document.getElementById('txt').value;
324
- if(!txt) return alert('Chưa câu trả lời');
325
- let r = await fetch('/tts',{method:'POST', headers:{'Content-Type':'application/json'}, body: JSON.stringify({text: txt})});
326
- if(!r.ok) return alert('TTS lỗi');
327
- const b = await r.blob();
328
- const url = URL.createObjectURL(b);
329
- const a = new Audio(url);
330
- a.play();
331
- }
332
- async function uploadAudio(){
333
- const f = document.getElementById('afile').files[0];
334
- if(!f) return alert('Chọn file audio');
335
- const fd = new FormData(); fd.append('file', f);
336
- const r = await fetch('/stt', {method:'POST', body: fd});
 
 
 
 
 
 
 
 
 
 
 
337
  const j = await r.json();
338
- if(j.text) appendUser('[voice] '+j.text);
339
- else appendUser('[stt error] '+JSON.stringify(j));
340
- }
341
- </script>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  </body>
343
  </html>
344
  """
 
345
  @app.route("/", methods=["GET"])
346
  def index():
347
  return render_template_string(INDEX_HTML)
348
 
349
- # ----------------- Telegram helpers & poller -----------------
350
- def send_telegram_message(text: str) -> bool:
351
- if not TELEGRAM_TOKEN or not TELEGRAM_CHATID:
352
- logger.debug("Telegram not configured")
353
- return False
354
- try:
355
- url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage"
356
- r = requests.post(url, json={"chat_id": TELEGRAM_CHATID, "text": text}, timeout=10)
357
- if not r.ok:
358
- logger.warning("Telegram send failed: %s %s", r.status_code, r.text)
359
- return False
360
- return True
361
- except Exception:
362
- logger.exception("send_telegram_message exception")
363
- return False
364
-
365
- def telegram_poll_loop():
366
- """Long-polling loop to fetch updates and respond to simple commands."""
367
- if not TELEGRAM_TOKEN:
368
- logger.info("telegram_poll_loop: TELEGRAM_TOKEN not set, exiting poller.")
369
- return
370
- base = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}"
371
- offset = None
372
- logger.info("telegram_poll_loop: starting.")
373
- while True:
374
  try:
375
- params = {"timeout": 30}
376
- if offset:
377
- params["offset"] = offset
378
- r = requests.get(base + "/getUpdates", params=params, timeout=35)
379
- if not r.ok:
380
- logger.warning("telegram getUpdates failed: %s", r.status_code)
381
- time.sleep(2)
382
- continue
383
- j = r.json()
384
- for upd in j.get("result", []):
385
- offset = upd["update_id"] + 1
386
- msg = upd.get("message") or {}
387
- chat = msg.get("chat", {})
388
- chat_id = chat.get("id")
389
- text = (msg.get("text") or "").strip()
390
- if not text:
391
- continue
392
- logger.info("TG msg %s: %s", chat_id, text)
393
- lower = text.lower()
394
- if lower.startswith("/ask "):
395
- q = text[5:].strip()
396
- try:
397
- ans = hf_text_generate(q)
398
- except Exception as e:
399
- ans = f"[HF error] {e}"
400
- try:
401
- requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": ans}, timeout=10)
402
- except Exception:
403
- logger.exception("tg reply failed")
404
- elif lower.startswith("/say "):
405
- tts_text = text[5:].strip()
406
- try:
407
- audio = hf_tts_get_audio_bytes(tts_text)
408
- files = {"audio": ("reply.mp3", audio, "audio/mpeg")}
409
- requests.post(base + "/sendAudio", files=files, data={"chat_id": chat_id}, timeout=30)
410
- except Exception:
411
- logger.exception("tg say failed")
412
- elif lower.startswith("/status"):
413
- try:
414
- requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "KC Robot AI is running."}, timeout=10)
415
- except Exception:
416
- pass
417
- else:
418
- try:
419
- requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "Commands: /ask <q> | /say <text> | /status"}, timeout=10)
420
- except Exception:
421
- pass
422
  except Exception:
423
- logger.exception("telegram_poll_loop exception, sleeping 3s")
424
- time.sleep(3)
425
-
426
- def start_background_tasks():
427
- # start telegram poller thread (if token provided)
428
- if TELEGRAM_TOKEN:
429
- t = threading.Thread(target=telegram_poll_loop, daemon=True)
430
- t.start()
431
- logger.info("Started Telegram poller thread.")
432
- else:
433
- logger.info("Telegram token not provided; poller disabled.")
434
 
435
  @app.before_first_request
436
- def _startup():
437
- start_background_tasks()
438
 
439
- # ----------------- Run -----------------
440
  if __name__ == "__main__":
441
- logger.info("Starting KC Robot AI V4.1 (FPT female TTS).")
442
- start_background_tasks()
443
  app.run(host="0.0.0.0", port=PORT)
 
1
 
2
+ # app.py - KC Robot AI V4.4 (Web STT from browser mic + TTS greeting + Telegram + Radar)
3
+ import os, io, time, json, threading, logging, requests, uuid
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from pathlib import Path
 
 
5
  from flask import Flask, request, jsonify, send_file, render_template_string, abort
6
 
7
+ # ---------- Config ----------
8
  logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger("kcrobot.v4.4")
10
 
11
  app = Flask(__name__)
12
 
 
13
  TMP_DIR = Path("/tmp/kcrobot") if os.name != "nt" else Path.cwd() / "tmp_kcrobot"
14
  TMP_DIR.mkdir(parents=True, exist_ok=True)
15
 
 
16
  HF_API_TOKEN = os.getenv("HF_API_TOKEN", "").strip()
17
  HF_MODEL = os.getenv("HF_MODEL", "google/flan-t5-large").strip()
 
18
  HF_TTS_MODEL = os.getenv("HF_TTS_MODEL", "NguyenManhTuan/VietnameseTTS_FPT_AI_Female").strip()
19
  HF_STT_MODEL = os.getenv("HF_STT_MODEL", "openai/whisper-small").strip()
20
  TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN", "").strip()
21
  TELEGRAM_CHATID = os.getenv("TELEGRAM_CHATID", "").strip()
22
+ PORT = int(os.getenv("PORT", 7860))
 
 
 
 
 
23
 
24
  HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}
25
+ if not HF_API_TOKEN:
26
+ logger.warning("⚠️ HF_API_TOKEN not set — STT/TTS/LLM calls will fail without it.")
27
 
28
+ # ---------- In-memory ----------
29
+ CONV = [] # list of (user,bot)
30
+ DISPLAY_LINES = [] # for ESP32 OLED
31
 
32
+ def push_display(line: str, limit=8):
 
 
33
  DISPLAY_LINES.append(line)
34
  if len(DISPLAY_LINES) > limit:
35
+ del DISPLAY_LINES[0: len(DISPLAY_LINES)-limit]
36
 
37
+ # ---------- HF helpers ----------
38
  def hf_post_json(model_id: str, payload: dict, timeout: int = 120):
 
39
  if not HF_API_TOKEN:
40
  raise RuntimeError("HF_API_TOKEN missing (set in Secrets).")
41
  url = f"https://api-inference.huggingface.co/models/{model_id}"
42
+ r = requests.post(url, headers={**HF_HEADERS, "Content-Type": "application/json"}, json=payload, timeout=timeout)
 
 
43
  if not r.ok:
44
  logger.error("HF POST JSON error %s: %s", r.status_code, r.text[:400])
45
  r.raise_for_status()
46
+ return r.json()
 
 
 
47
 
48
  def hf_post_bytes(model_id: str, data: bytes, content_type: str = "application/octet-stream", timeout: int = 180):
 
49
  if not HF_API_TOKEN:
50
  raise RuntimeError("HF_API_TOKEN missing (set in Secrets).")
51
  url = f"https://api-inference.huggingface.co/models/{model_id}"
 
57
  r.raise_for_status()
58
  return r
59
 
60
+ def hf_text_generate(prompt: str):
61
+ payload = {"inputs": prompt, "parameters": {"max_new_tokens": 256, "temperature": 0.7}, "options": {"wait_for_model": True}}
62
+ out = hf_post_json(HF_MODEL, payload)
 
 
 
 
 
 
63
  # parse common shapes
64
+ if isinstance(out, list) and len(out) and isinstance(out[0], dict):
65
+ return out[0].get("generated_text", "") or str(out[0])
 
 
 
66
  if isinstance(out, dict):
67
+ for k in ("generated_text", "text"):
68
  if k in out:
69
  return out[k]
 
70
  return str(out)
71
 
72
+ def hf_tts_bytes(text: str):
 
 
 
73
  payload = {"inputs": text}
74
+ url = f"https://api-inference.huggingface.co/models/{HF_TTS_MODEL}"
75
+ r = requests.post(url, headers={**HF_HEADERS, "Content-Type": "application/json"}, json=payload, timeout=120)
76
  if not r.ok:
77
  logger.error("HF TTS error %s: %s", r.status_code, r.text[:400])
78
  r.raise_for_status()
79
  return r.content
80
 
81
+ def hf_stt_from_bytes(audio_bytes: bytes):
82
+ # call HF STT model (whisper) with raw audio bytes
83
+ r = hf_post_bytes(HF_STT_MODEL, audio_bytes, content_type="application/octet-stream", timeout=180)
 
 
 
 
 
 
 
 
 
84
  try:
85
  j = r.json()
86
  if isinstance(j, dict) and "text" in j:
87
  return j["text"]
 
 
88
  return str(j)
89
  except Exception:
90
  return r.text if hasattr(r, "text") else ""
91
 
92
+ # ---------- Telegram ----------
93
+ def send_telegram_message(text: str):
94
+ if not TELEGRAM_TOKEN or not TELEGRAM_CHATID:
95
+ return False
96
+ try:
97
+ url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage"
98
+ r = requests.post(url, json={"chat_id": TELEGRAM_CHATID, "text": text}, timeout=10)
99
+ return r.ok
100
+ except Exception as e:
101
+ logger.exception("Telegram send failed")
102
+ return False
103
+
104
+ # ---------- Routes ----------
105
  @app.route("/health", methods=["GET"])
106
  def health():
107
+ return {"ok": True, "hf": bool(HF_API_TOKEN), "tts_model": HF_TTS_MODEL, "stt_model": HF_STT_MODEL}
 
 
 
 
 
 
 
 
108
 
109
  @app.route("/ask", methods=["POST"])
110
  def route_ask():
111
+ data = request.get_json(force=True, silent=True) or {}
112
+ text = (data.get("text") or "").strip()
113
+ if not text:
114
+ return jsonify({"error":"no text"}), 400
115
+ prompt = f"You are KC Robot AI, bilingual assistant (Vietnamese/English). Answer concisely in the same language as the user.\n\nUser: {text}\nAssistant:"
116
  try:
117
+ ans = hf_text_generate(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  except Exception as e:
119
+ logger.exception("hf_text_generate failed")
120
+ ans = f"[HF error] {e}"
121
+ CONV.append((text, ans))
122
+ push_display("YOU: " + text[:80])
123
+ push_display("BOT: " + ans[:80])
124
+ return jsonify({"answer": ans})
125
 
126
  @app.route("/tts", methods=["POST"])
127
  def route_tts():
128
+ data = request.get_json(force=True, silent=True) or {}
129
+ text = (data.get("text") or "").strip()
130
+ if not text:
131
+ return jsonify({"error":"no text"}), 400
132
  try:
133
+ audio = hf_tts_bytes(text)
 
 
 
 
 
 
 
134
  except Exception as e:
135
+ logger.exception("hf_tts failed")
136
  return jsonify({"error": str(e)}), 500
137
+ return send_file(io.BytesIO(audio), mimetype="audio/mpeg", as_attachment=False, download_name="tts.mp3")
138
 
139
  @app.route("/stt", methods=["POST"])
140
  def route_stt():
141
  """
142
+ Accepts multipart 'file' (browser upload) or raw body bytes (fetch).
143
+ Returns JSON {text: "..."}
144
  """
145
  try:
146
+ audio_bytes = None
147
  if "file" in request.files:
148
  f = request.files["file"]
149
  audio_bytes = f.read()
150
  else:
151
  audio_bytes = request.get_data() or b""
152
  if not audio_bytes:
153
+ return jsonify({"error":"no audio"}), 400
154
+ txt = hf_stt_from_bytes(audio_bytes)
155
+ push_display("STT: " + (txt[:80] if isinstance(txt, str) else str(txt)))
156
+ return jsonify({"text": txt})
157
  except Exception as e:
158
+ logger.exception("stt failed")
159
  return jsonify({"error": str(e)}), 500
160
 
161
  @app.route("/presence", methods=["POST"])
162
  def route_presence():
163
+ data = request.get_json(force=True, silent=True) or {}
164
+ note = (data.get("note") or "Có người tới").strip()
165
+ greeting_vi = f"Xin chào! {note}"
166
+ greeting_en = "Hello! Someone is near the robot."
167
+ combined = f"{greeting_vi}\n{greeting_en}"
168
+ CONV.append(("__presence__", combined))
169
+ push_display("RADAR: " + note[:80])
170
+ # generate greeting audios asynchronously so UI/ESP32 can call /tts if needed
171
+ def gen():
172
+ try:
173
+ _ = hf_tts_bytes(greeting_vi) # cached on HF side; we don't store here
174
+ except Exception:
175
+ logger.exception("generate greeting vi failed")
176
+ try:
177
+ _ = hf_tts_bytes(greeting_en)
178
+ except Exception:
179
+ logger.exception("generate greeting en failed")
180
+ threading.Thread(target=gen, daemon=True).start()
181
+ if TELEGRAM_TOKEN and TELEGRAM_CHATID:
182
+ send_telegram_message("⚠️ Robot: Phát hiện người - " + note)
183
+ return jsonify({"greeting": combined})
184
 
185
  @app.route("/display", methods=["GET"])
186
  def route_display():
187
+ return jsonify({"lines": DISPLAY_LINES[-8:], "conv_len": len(CONV)})
188
 
189
+ # ---------- Web UI (enhanced: record mic, STT, ask, TTS playback) ----------
 
 
 
 
 
 
 
 
 
 
190
  INDEX_HTML = """
191
  <!doctype html>
192
+ <html lang="vi">
193
  <head>
194
+ <meta charset="utf-8"/>
195
+ <meta name="viewport" content="width=device-width,initial-scale=1"/>
196
+ <title>KC Robot AI V4.4</title>
197
+ <style>
198
+ body{font-family:Arial,Helvetica,sans-serif;background:#0f1720;color:#e6eef6;margin:0;padding:12px}
199
+ .wrap{max-width:980px;margin:auto}
200
+ header{display:flex;align-items:center;gap:12px}
201
+ h1{margin:0;font-size:20px;color:#7ee787}
202
+ #chat{background:#0b1220;border-radius:8px;padding:10px;height:420px;overflow:auto;border:1px solid #223344}
203
+ .msg{margin:6px 0;padding:8px;border-radius:8px;display:inline-block}
204
+ .user{background:#163a9b22;color:#cfe7ff;float:right;clear:both}
205
+ .bot{background:#1b6b2b22;color:#dfffdc;float:left;clear:both}
206
+ .controls{margin-top:10px;display:flex;gap:8px;align-items:center}
207
+ button{background:#0ea5a4;border:none;padding:10px 12px;border-radius:8px;color:#fff;cursor:pointer}
208
+ button.secondary{background:#334155}
209
+ input[type=text]{flex:1;padding:10px;border-radius:8px;border:1px solid #334155;background:#021427;color:#e6eef6}
210
+ audio{width:100%;margin-top:8px}
211
+ .small{font-size:12px;color:#99a0b0}
212
+ </style>
213
  </head>
214
  <body>
215
+ <div class="wrap">
216
+ <header><h1>🤖 KC Robot AI V4.4 — Web STT (mic) + TTS</h1></header>
217
  <div id="chat"></div>
218
+ <div class="controls">
219
+ <input id="txt" type="text" placeholder="Gõ câu hỏi (VN/EN) hoặc bấm Ghi để nói..." />
220
+ <button id="sendBtn">Gửi</button>
221
+ <button id="recBtn">🎙 Ghi</button>
222
+ <button id="playGreeting" class="secondary">▶ Chào (TTS)</button>
223
+ </div>
224
+ <audio id="player" controls></audio>
225
+ <p class="small">Lưu ý: cần đặt HF_API_TOKEN trong Secrets để STT/TTS/LLM hoạt động.</p>
226
+ </div>
227
+
228
+ <script>
229
+ let mediaRecorder, audioChunks=[];
230
+ const chat = document.getElementById('chat');
231
+ const player = document.getElementById('player');
232
+
233
+ function appendUser(t){ chat.innerHTML += `<div class="msg user">${escapeHtml(t)}</div>`; chat.scrollTop = chat.scrollHeight; }
234
+ function appendBot(t){ chat.innerHTML += `<div class="msg bot">${escapeHtml(t)}</div>`; chat.scrollTop = chat.scrollHeight; }
235
+ function escapeHtml(s){ return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); }
236
+
237
+ document.getElementById('sendBtn').onclick = async ()=>{
238
+ const v = document.getElementById('txt').value.trim();
239
+ if(!v) return;
240
+ appendUser(v); document.getElementById('txt').value='';
241
+ const res = await fetch('/ask',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({text:v})});
242
+ const j = await res.json(); const ans = j.answer || j.error || 'No answer';
243
+ appendBot(ans);
244
+ // request TTS for the answer and play
245
+ const tts = await fetch('/tts',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({text: ans})});
246
+ if(tts.ok){ const blob = await tts.blob(); const url = URL.createObjectURL(blob); player.src = url; player.play(); }
247
+ };
248
+
249
+ document.getElementById('recBtn').onclick = async ()=>{
250
+ if(mediaRecorder && mediaRecorder.state === 'recording'){
251
+ mediaRecorder.stop();
252
+ return;
253
+ }
254
+ // start recording
255
+ if(!navigator.mediaDevices) return alert('Trình duyệt không hỗ trợ microphone.');
256
+ try{
257
+ const stream = await navigator.mediaDevices.getUserMedia({audio:true});
258
+ mediaRecorder = new MediaRecorder(stream);
259
+ audioChunks = [];
260
+ mediaRecorder.ondataavailable = e => audioChunks.push(e.data);
261
+ mediaRecorder.onstop = async () => {
262
+ const blob = new Blob(audioChunks, {type:'audio/webm'});
263
+ // send blob to /stt (multipart)
264
+ const fd = new FormData(); fd.append('file', blob, 'rec.webm');
265
+ const r = await fetch('/stt',{method:'POST', body: fd});
266
  const j = await r.json();
267
+ if(j.text){
268
+ appendUser('[voice] ' + j.text);
269
+ // auto ask with transcribed text
270
+ const res = await fetch('/ask',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({text: j.text})});
271
+ const ansj = await res.json(); const ans = ansj.answer || ansj.error || 'No answer';
272
+ appendBot(ans);
273
+ // play TTS
274
+ const tts = await fetch('/tts',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({text: ans})});
275
+ if(tts.ok){ const blob2 = await tts.blob(); const url = URL.createObjectURL(blob2); player.src = url; player.play(); }
276
+ } else {
277
+ appendBot('[STT lỗi] ' + JSON.stringify(j));
278
+ }
279
+ };
280
+ mediaRecorder.start();
281
+ document.getElementById('recBtn').textContent = '■ Dừng';
282
+ }catch(err){
283
+ alert('Không thể truy cập microphone: ' + err);
284
+ }
285
+ };
286
+
287
+ document.getElementById('playGreeting').onclick = async ()=>{
288
+ // call /presence with sample note to generate TTS greeting and get combined text
289
+ const r = await fetch('/presence',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({note: 'Xin chào chủ nhân'})});
290
+ const j = await r.json();
291
+ const txt = j.greeting || '';
292
+ if(!txt) return;
293
+ // play TTS of full greeting via /tts
294
+ const t = await fetch('/tts',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({text: txt})});
295
+ if(t.ok){ const b = await t.blob(); const url = URL.createObjectURL(b); player.src = url; player.play(); }
296
+ };
297
+ </script>
298
  </body>
299
  </html>
300
  """
301
+
302
  @app.route("/", methods=["GET"])
303
  def index():
304
  return render_template_string(INDEX_HTML)
305
 
306
+ # ---------- Startup greeting ----------
307
+ def startup_actions():
308
+ logger.info("KC Robot AI V4.4 starting up — generating greeting (async).")
309
+ def gen():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  try:
311
+ # try to generate greeting to warm model cache (non-blocking)
312
+ _ = hf_tts_bytes("Xin chào chủ nhân! Em là KC Robot và đã sẵn sàng!") if HF_API_TOKEN else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  except Exception:
314
+ logger.exception("warmup tts failed")
315
+ threading.Thread(target=gen, daemon=True).start()
 
 
 
 
 
 
 
 
 
316
 
317
  @app.before_first_request
318
+ def _before_first():
319
+ startup_actions()
320
 
321
+ # ---------- Run ----------
322
  if __name__ == "__main__":
 
 
323
  app.run(host="0.0.0.0", port=PORT)