feat: voix navigateur (STT push-to-talk + TTS) (v0.28.0)

Hook useSpeech (Web Speech API, fr-FR) : micro dicter→envoyer + lecture vocale des réponses (bascule persistée). 100% navigateur, zéro backend/GPU, dégrade si non supporté. Build OK. Palier de risque : reversible (front). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-23 07:22:18 +02:00
parent 2bfa58f440
commit 76ad3b62fd
3 changed files with 186 additions and 27 deletions
@@ -6,6 +6,13 @@ incompatibles. Chaque ligne renvoie à un commit dédié (un artefact = un commi

 ## [Unreleased]

+## [0.28.0] — 2026-06-23  — début Phase 6 (voix)
+### Added
+- `web/src/useSpeech.ts` : hook voix 100 % navigateur (Web Speech API), STT
+  (push-to-talk, fr-FR) + TTS, dégrade proprement si non supporté. Zéro backend/GPU.
+- Chat : bouton micro (dicter → envoyer), bascule "Voix ON/OFF" (persistée) qui lit
+  les réponses à voix haute. Pas d'emoji comme icône (texte). Build OK.
+
 ## [0.27.0] — 2026-06-23  — fin Phase 5 (auto-extension v1)
 ### Added
 - Outil local **`chlova.propose_asset`** (`src/autoext/tool.ts`) exposé à l'agent :
@@ -1,6 +1,7 @@
-import { useEffect, useRef, useState, type FormEvent } from "react";
+import { useCallback, useEffect, useRef, useState, type FormEvent } from "react";
 import { useAuth } from "../auth";
 import { api, ApiError } from "../api";
+import { useSpeech } from "../useSpeech";

 interface Msg {
  role: "user" | "assistant";
@@ -9,27 +10,39 @@ interface Msg {

 export function Chat() {
  const { token, logout } = useAuth();
+  const speech = useSpeech();
  const [messages, setMessages] = useState<Msg[]>([]);
  const [input, setInput] = useState("");
  const [busy, setBusy] = useState(false);
  const [error, setError] = useState<string | null>(null);
+  const [speakReplies, setSpeakReplies] = useState(() => localStorage.getItem("chlova.speak") === "1");
  const bottom = useRef<HTMLDivElement>(null);

  useEffect(() => {
    bottom.current?.scrollIntoView({ behavior: "smooth" });
  }, [messages, busy]);

-  const send = async (e: FormEvent): Promise<void> => {
-    e.preventDefault();
-    const text = input.trim();
-    if (!text || busy || !token) return;
+  const toggleSpeak = (): void => {
+    setSpeakReplies((v) => {
+      const next = !v;
+      localStorage.setItem("chlova.speak", next ? "1" : "0");
+      if (!next) speech.cancelSpeak();
+      return next;
+    });
+  };
+
+  const sendText = useCallback(
+    async (text: string): Promise<void> => {
+      const t = text.trim();
+      if (!t || busy || !token) return;
      setInput("");
      setError(null);
-    setMessages((m) => [...m, { role: "user", text }]);
+      setMessages((m) => [...m, { role: "user", text: t }]);
      setBusy(true);
      try {
-      const { reply } = await api.chat(token, text);
+        const { reply } = await api.chat(token, t);
        setMessages((m) => [...m, { role: "assistant", text: reply }]);
+        if (speakReplies) speech.speak(reply);
      } catch (err) {
        if (err instanceof ApiError && err.status === 401) {
          logout();
@@ -39,22 +52,33 @@ export function Chat() {
      } finally {
        setBusy(false);
      }
+    },
+    [busy, token, speakReplies, speech, logout],
+  );
+
+  const submit = (e: FormEvent): void => {
+    e.preventDefault();
+    void sendText(input);
+  };
+
+  const mic = (): void => {
+    if (speech.listening) {
+      speech.stopListening();
+      return;
+    }
+    speech.listen((text) => void sendText(text)); // dicter → envoyer
  };

  return (
    <div className="flex h-full flex-col">
      <div className="flex-1 overflow-y-auto px-4 py-4 space-y-3">
-        {messages.length === 0 && (
-          <p className="text-muted text-sm">Pose une question à CHLOVA…</p>
-        )}
+        {messages.length === 0 && <p className="text-muted text-sm">Pose une question à CHLOVA…</p>}
        {messages.map((m, i) => (
          <div key={i} className={m.role === "user" ? "flex justify-end" : "flex justify-start"}>
            <div
              className={
                "max-w-[80%] whitespace-pre-wrap rounded-lg px-3 py-2 text-sm " +
-                (m.role === "user"
-                  ? "bg-surface-2 border border-accent/40"
-                  : "bg-surface border border-border font-mono")
+                (m.role === "user" ? "bg-surface-2 border border-accent/40" : "bg-surface border border-border font-mono")
              }
            >
              {m.text}
@@ -62,18 +86,40 @@ export function Chat() {
          </div>
        ))}
        {busy && <p className="text-muted text-sm animate-pulse">CHLOVA réfléchit…</p>}
+        {speech.speaking && <p className="text-accent text-sm">Lecture vocale…</p>}
        {error && <p role="alert" className="text-danger text-sm">{error}</p>}
        <div ref={bottom} />
      </div>

-      <form onSubmit={send} className="flex gap-2 border-t border-border bg-surface p-3">
+      <form onSubmit={submit} className="flex items-center gap-2 border-t border-border bg-surface p-3">
+        {speech.ttsSupported && (
+          <button
+            type="button"
+            onClick={toggleSpeak}
+            aria-label={speakReplies ? "Couper la voix" : "Activer la voix"}
+            title={speakReplies ? "Voix activée" : "Voix coupée"}
+            className={`rounded-md border px-3 py-2 text-sm cursor-pointer ring-accent ${speakReplies ? "border-accent text-accent" : "border-border text-muted"}`}
+          >
+            {speakReplies ? "Voix ON" : "Voix OFF"}
+          </button>
+        )}
        <input
          className="flex-1 rounded-md bg-surface-2 border border-border px-3 py-2 text-fg placeholder:text-muted ring-accent"
-          placeholder="Message…"
+          placeholder={speech.listening ? "Écoute…" : "Message…"}
          value={input}
          onChange={(e) => setInput(e.target.value)}
          disabled={busy}
        />
+        {speech.sttSupported && (
+          <button
+            type="button"
+            onClick={mic}
+            aria-label={speech.listening ? "Arrêter le micro" : "Parler"}
+            className={`rounded-md border px-3 py-2 text-sm cursor-pointer ring-accent ${speech.listening ? "border-accent text-accent animate-pulse" : "border-border text-muted"}`}
+          >
+            {speech.listening ? "Stop" : "Parler"}
+          </button>
+        )}
        <button
          type="submit"
          disabled={busy || !input.trim()}
@@ -0,0 +1,106 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+
+/**
+ * Voix (Phase 6) — 100 % navigateur (Web Speech API), aucun backend ni GPU.
+ * - STT : SpeechRecognition (webkit) — Chrome/Edge. Dégrade proprement ailleurs.
+ * - TTS : speechSynthesis — large support.
+ */
+
+// Typages minimaux (Web Speech API non standardisée dans lib.dom).
+interface SREvent {
+  results: ArrayLike<ArrayLike<{ transcript: string }>>;
+}
+interface SR {
+  lang: string;
+  interimResults: boolean;
+  continuous: boolean;
+  maxAlternatives: number;
+  start(): void;
+  stop(): void;
+  abort(): void;
+  onresult: ((e: SREvent) => void) | null;
+  onend: (() => void) | null;
+  onerror: ((e: unknown) => void) | null;
+}
+declare global {
+  interface Window {
+    SpeechRecognition?: { new (): SR };
+    webkitSpeechRecognition?: { new (): SR };
+  }
+}
+
+function makeRecognition(): SR | null {
+  const Ctor = window.SpeechRecognition ?? window.webkitSpeechRecognition;
+  if (!Ctor) return null;
+  const r = new Ctor();
+  r.lang = "fr-FR";
+  r.interimResults = false;
+  r.continuous = false;
+  r.maxAlternatives = 1;
+  return r;
+}
+
+export interface UseSpeech {
+  sttSupported: boolean;
+  ttsSupported: boolean;
+  listening: boolean;
+  speaking: boolean;
+  listen: (onText: (text: string) => void) => void;
+  stopListening: () => void;
+  speak: (text: string) => void;
+  cancelSpeak: () => void;
+}
+
+export function useSpeech(): UseSpeech {
+  const recRef = useRef<SR | null>(null);
+  const [listening, setListening] = useState(false);
+  const [speaking, setSpeaking] = useState(false);
+
+  const sttSupported = typeof window !== "undefined" && !!(window.SpeechRecognition ?? window.webkitSpeechRecognition);
+  const ttsSupported = typeof window !== "undefined" && "speechSynthesis" in window;
+
+  const stopListening = useCallback((): void => {
+    recRef.current?.stop();
+    setListening(false);
+  }, []);
+
+  const listen = useCallback((onText: (text: string) => void): void => {
+    const r = makeRecognition();
+    if (!r) return;
+    recRef.current = r;
+    r.onresult = (e): void => {
+      const text = e.results?.[0]?.[0]?.transcript ?? "";
+      if (text) onText(text);
+    };
+    r.onend = (): void => setListening(false);
+    r.onerror = (): void => setListening(false);
+    setListening(true);
+    r.start();
+  }, []);
+
+  const cancelSpeak = useCallback((): void => {
+    if (ttsSupported) window.speechSynthesis.cancel();
+    setSpeaking(false);
+  }, [ttsSupported]);
+
+  const speak = useCallback(
+    (text: string): void => {
+      if (!ttsSupported || !text.trim()) return;
+      window.speechSynthesis.cancel();
+      const u = new SpeechSynthesisUtterance(text);
+      u.lang = "fr-FR";
+      u.onend = (): void => setSpeaking(false);
+      u.onerror = (): void => setSpeaking(false);
+      setSpeaking(true);
+      window.speechSynthesis.speak(u);
+    },
+    [ttsSupported],
+  );
+
+  useEffect(() => () => {
+    recRef.current?.abort();
+    if (ttsSupported) window.speechSynthesis.cancel();
+  }, [ttsSupported]);
+
+  return { sttSupported, ttsSupported, listening, speaking, listen, stopListening, speak, cancelSpeak };
+}