feat: voix mains-libres + wake-word CHLOVA, fin Phase 6 v1 (v0.29.0)

Mode mains-libres : écoute en boucle déclenchée par le wake-word « CHLOVA … » (extractCommand), micro en pause pendant le TTS pour éviter l'auto-écoute ; réponses lues d'office. Bouton Libre + indicateur. 100% navigateur. Build OK. Palier de risque : reversible (front). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-23 07:24:41 +02:00
parent 76ad3b62fd
commit 476c89ce3d
4 changed files with 127 additions and 14 deletions
@@ -23,5 +23,14 @@ Le backend doit tourner avec l'auth configurée (`CHLOVA_ADMIN_*`, voir
 | `src/pages/Chat.tsx` | Conversation agent (v0.23.0). |
 | `src/pages/Review.tsx` | Need-review : approuver/refuser (v0.24.0). |

-## Périmètre v1
-Login → Chat → Review. Voix + app RN : phases ultérieures (API commune réutilisée).
+## Voix (Phase 6)
+100 % navigateur (Web Speech API), zéro backend/GPU :
+- **Parler** : dictée push-to-talk (fr-FR) → envoyée à l'agent.
+- **Voix ON/OFF** : lecture vocale des réponses (TTS), réglage persistant.
+- **Libre** : mains-libres, déclenché par le wake-word « CHLOVA … » ; le micro se
+  met en pause pendant la synthèse pour éviter l'auto-écoute.
+
+STT = Chrome/Edge (webkit) ; TTS = large support. Dégrade proprement sinon.
+
+## Périmètre
+Login → Chat (+ voix) → Review. App RN : phase ultérieure (API commune réutilisée).
@@ -42,7 +42,7 @@ export function Chat() {
      try {
        const { reply } = await api.chat(token, t);
        setMessages((m) => [...m, { role: "assistant", text: reply }]);
-        if (speakReplies) speech.speak(reply);
+        if (speakReplies || speech.handsFree) speech.speak(reply);
      } catch (err) {
        if (err instanceof ApiError && err.status === 401) {
          logout();
@@ -62,13 +62,18 @@ export function Chat() {
  };

  const mic = (): void => {
-    if (speech.listening) {
+    if (speech.listening && !speech.handsFree) {
      speech.stopListening();
      return;
    }
    speech.listen((text) => void sendText(text)); // dicter → envoyer
  };

+  const toggleHandsFree = (): void => {
+    if (speech.handsFree) speech.stopHandsFree();
+    else speech.startHandsFree((text) => void sendText(text)); // « CHLOVA … » → envoyer
+  };
+
  return (
    <div className="flex h-full flex-col">
      <div className="flex-1 overflow-y-auto px-4 py-4 space-y-3">
@@ -86,6 +91,9 @@ export function Chat() {
          </div>
        ))}
        {busy && <p className="text-muted text-sm animate-pulse">CHLOVA réfléchit…</p>}
+        {speech.handsFree && !busy && !speech.speaking && (
+          <p className="text-accent text-sm">Mains libres — dis « CHLOVA … »</p>
+        )}
        {speech.speaking && <p className="text-accent text-sm">Lecture vocale…</p>}
        {error && <p role="alert" className="text-danger text-sm">{error}</p>}
        <div ref={bottom} />
@@ -110,7 +118,7 @@ export function Chat() {
          onChange={(e) => setInput(e.target.value)}
          disabled={busy}
        />
-        {speech.sttSupported && (
+        {speech.sttSupported && !speech.handsFree && (
          <button
            type="button"
            onClick={mic}
@@ -120,6 +128,17 @@ export function Chat() {
            {speech.listening ? "Stop" : "Parler"}
          </button>
        )}
+        {speech.sttSupported && (
+          <button
+            type="button"
+            onClick={toggleHandsFree}
+            aria-label={speech.handsFree ? "Couper les mains libres" : "Activer les mains libres"}
+            title="Mains libres (wake-word CHLOVA)"
+            className={`rounded-md border px-3 py-2 text-sm cursor-pointer ring-accent ${speech.handsFree ? "border-accent text-accent" : "border-border text-muted"}`}
+          >
+            {speech.handsFree ? "Libre ON" : "Libre"}
+          </button>
+        )}
        <button
          type="submit"
          disabled={busy || !input.trim()}
@@ -4,9 +4,10 @@ import { useCallback, useEffect, useRef, useState } from "react";
 * Voix (Phase 6) — 100 % navigateur (Web Speech API), aucun backend ni GPU.
 * - STT : SpeechRecognition (webkit) — Chrome/Edge. Dégrade proprement ailleurs.
 * - TTS : speechSynthesis — large support.
+ * - Mains-libres : écoute en boucle, déclenchée par le wake-word « CHLOVA ».
+ *   Met le micro en pause pendant la synthèse vocale (évite l'auto-écoute).
 */

-// Typages minimaux (Web Speech API non standardisée dans lib.dom).
 interface SREvent {
  results: ArrayLike<ArrayLike<{ transcript: string }>>;
 }
@@ -40,23 +41,41 @@ function makeRecognition(): SR | null {
  return r;
 }

+const WAKE = /\b(chlova|clova|klova)\b[\s,:.]*/i;
+
+/** Renvoie la commande après le wake-word, ou null si absent. */
+export function extractCommand(transcript: string): string | null {
+  const m = WAKE.exec(transcript);
+  if (!m) return null;
+  const cmd = transcript.slice(m.index + m[0].length).trim();
+  return cmd.length > 0 ? cmd : null;
+}
+
 export interface UseSpeech {
  sttSupported: boolean;
  ttsSupported: boolean;
  listening: boolean;
  speaking: boolean;
+  handsFree: boolean;
  listen: (onText: (text: string) => void) => void;
  stopListening: () => void;
  speak: (text: string) => void;
  cancelSpeak: () => void;
+  startHandsFree: (onCommand: (text: string) => void) => void;
+  stopHandsFree: () => void;
 }

 export function useSpeech(): UseSpeech {
  const recRef = useRef<SR | null>(null);
+  const handsFreeRef = useRef(false);
+  const onCommandRef = useRef<(t: string) => void>(() => {});
+  const armRef = useRef<() => void>(() => {});
  const [listening, setListening] = useState(false);
  const [speaking, setSpeaking] = useState(false);
+  const [handsFree, setHandsFree] = useState(false);

-  const sttSupported = typeof window !== "undefined" && !!(window.SpeechRecognition ?? window.webkitSpeechRecognition);
+  const sttSupported =
+    typeof window !== "undefined" && !!(window.SpeechRecognition ?? window.webkitSpeechRecognition);
  const ttsSupported = typeof window !== "undefined" && "speechSynthesis" in window;

  const stopListening = useCallback((): void => {
@@ -89,18 +108,77 @@ export function useSpeech(): UseSpeech {
      window.speechSynthesis.cancel();
      const u = new SpeechSynthesisUtterance(text);
      u.lang = "fr-FR";
-      u.onend = (): void => setSpeaking(false);
-      u.onerror = (): void => setSpeaking(false);
+      const done = (): void => {
+        setSpeaking(false);
+        // En mains-libres, on réarme l'écoute APRÈS avoir parlé (anti auto-écoute).
+        if (handsFreeRef.current) armRef.current();
+      };
+      u.onend = done;
+      u.onerror = done;
      setSpeaking(true);
      window.speechSynthesis.speak(u);
    },
    [ttsSupported],
  );

-  useEffect(() => () => {
-    recRef.current?.abort();
-    if (ttsSupported) window.speechSynthesis.cancel();
-  }, [ttsSupported]);
+  // Boucle mains-libres : une écoute → détecte le wake-word → commande → réarme.
+  armRef.current = (): void => {
+    if (!handsFreeRef.current) return;
+    if (ttsSupported && window.speechSynthesis.speaking) return; // attend la fin du TTS
+    const r = makeRecognition();
+    if (!r) return;
+    recRef.current = r;
+    r.onresult = (e): void => {
+      const text = e.results?.[0]?.[0]?.transcript ?? "";
+      const cmd = extractCommand(text);
+      if (cmd) onCommandRef.current(cmd);
+    };
+    const rearm = (): void => {
+      setListening(false);
+      if (handsFreeRef.current && !(ttsSupported && window.speechSynthesis.speaking)) {
+        setTimeout(() => armRef.current(), 400);
+      }
+    };
+    r.onend = rearm;
+    r.onerror = rearm;
+    setListening(true);
+    r.start();
+  };

-  return { sttSupported, ttsSupported, listening, speaking, listen, stopListening, speak, cancelSpeak };
+  const startHandsFree = useCallback((onCommand: (text: string) => void): void => {
+    onCommandRef.current = onCommand;
+    handsFreeRef.current = true;
+    setHandsFree(true);
+    armRef.current();
+  }, []);
+
+  const stopHandsFree = useCallback((): void => {
+    handsFreeRef.current = false;
+    setHandsFree(false);
+    recRef.current?.abort();
+    setListening(false);
+  }, []);
+
+  useEffect(
+    () => () => {
+      handsFreeRef.current = false;
+      recRef.current?.abort();
+      if (ttsSupported) window.speechSynthesis.cancel();
+    },
+    [ttsSupported],
+  );
+
+  return {
+    sttSupported,
+    ttsSupported,
+    listening,
+    speaking,
+    handsFree,
+    listen,
+    stopListening,
+    speak,
+    cancelSpeak,
+    startHandsFree,
+    stopHandsFree,
+  };
 }