feat: voix mains-libres + wake-word CHLOVA, fin Phase 6 v1 (v0.29.0)
Mode mains-libres : écoute en boucle déclenchée par le wake-word « CHLOVA … » (extractCommand), micro en pause pendant le TTS pour éviter l'auto-écoute ; réponses lues d'office. Bouton Libre + indicateur. 100% navigateur. Build OK. Palier de risque : reversible (front). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,13 @@ incompatibles. Chaque ligne renvoie à un commit dédié (un artefact = un commi
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [0.29.0] — 2026-06-23 — fin Phase 6 (voix v1)
|
||||||
|
### Added
|
||||||
|
- `useSpeech` : mode **mains-libres** + wake-word « CHLOVA » (`extractCommand`),
|
||||||
|
écoute en boucle, micro en pause pendant le TTS (anti auto-écoute).
|
||||||
|
- Chat : bouton "Libre" (mains-libres) ; en mains-libres les réponses sont lues
|
||||||
|
d'office. Indicateur d'écoute. Build OK. (README web : section voix.)
|
||||||
|
|
||||||
## [0.28.0] — 2026-06-23 — début Phase 6 (voix)
|
## [0.28.0] — 2026-06-23 — début Phase 6 (voix)
|
||||||
### Added
|
### Added
|
||||||
- `web/src/useSpeech.ts` : hook voix 100 % navigateur (Web Speech API), STT
|
- `web/src/useSpeech.ts` : hook voix 100 % navigateur (Web Speech API), STT
|
||||||
|
|||||||
+11
-2
@@ -23,5 +23,14 @@ Le backend doit tourner avec l'auth configurée (`CHLOVA_ADMIN_*`, voir
|
|||||||
| `src/pages/Chat.tsx` | Conversation agent (v0.23.0). |
|
| `src/pages/Chat.tsx` | Conversation agent (v0.23.0). |
|
||||||
| `src/pages/Review.tsx` | Need-review : approuver/refuser (v0.24.0). |
|
| `src/pages/Review.tsx` | Need-review : approuver/refuser (v0.24.0). |
|
||||||
|
|
||||||
## Périmètre v1
|
## Voix (Phase 6)
|
||||||
Login → Chat → Review. Voix + app RN : phases ultérieures (API commune réutilisée).
|
100 % navigateur (Web Speech API), zéro backend/GPU :
|
||||||
|
- **Parler** : dictée push-to-talk (fr-FR) → envoyée à l'agent.
|
||||||
|
- **Voix ON/OFF** : lecture vocale des réponses (TTS), réglage persistant.
|
||||||
|
- **Libre** : mains-libres, déclenché par le wake-word « CHLOVA … » ; le micro se
|
||||||
|
met en pause pendant la synthèse pour éviter l'auto-écoute.
|
||||||
|
|
||||||
|
STT = Chrome/Edge (webkit) ; TTS = large support. Dégrade proprement sinon.
|
||||||
|
|
||||||
|
## Périmètre
|
||||||
|
Login → Chat (+ voix) → Review. App RN : phase ultérieure (API commune réutilisée).
|
||||||
|
|||||||
+22
-3
@@ -42,7 +42,7 @@ export function Chat() {
|
|||||||
try {
|
try {
|
||||||
const { reply } = await api.chat(token, t);
|
const { reply } = await api.chat(token, t);
|
||||||
setMessages((m) => [...m, { role: "assistant", text: reply }]);
|
setMessages((m) => [...m, { role: "assistant", text: reply }]);
|
||||||
if (speakReplies) speech.speak(reply);
|
if (speakReplies || speech.handsFree) speech.speak(reply);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (err instanceof ApiError && err.status === 401) {
|
if (err instanceof ApiError && err.status === 401) {
|
||||||
logout();
|
logout();
|
||||||
@@ -62,13 +62,18 @@ export function Chat() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const mic = (): void => {
|
const mic = (): void => {
|
||||||
if (speech.listening) {
|
if (speech.listening && !speech.handsFree) {
|
||||||
speech.stopListening();
|
speech.stopListening();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
speech.listen((text) => void sendText(text)); // dicter → envoyer
|
speech.listen((text) => void sendText(text)); // dicter → envoyer
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const toggleHandsFree = (): void => {
|
||||||
|
if (speech.handsFree) speech.stopHandsFree();
|
||||||
|
else speech.startHandsFree((text) => void sendText(text)); // « CHLOVA … » → envoyer
|
||||||
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex h-full flex-col">
|
<div className="flex h-full flex-col">
|
||||||
<div className="flex-1 overflow-y-auto px-4 py-4 space-y-3">
|
<div className="flex-1 overflow-y-auto px-4 py-4 space-y-3">
|
||||||
@@ -86,6 +91,9 @@ export function Chat() {
|
|||||||
</div>
|
</div>
|
||||||
))}
|
))}
|
||||||
{busy && <p className="text-muted text-sm animate-pulse">CHLOVA réfléchit…</p>}
|
{busy && <p className="text-muted text-sm animate-pulse">CHLOVA réfléchit…</p>}
|
||||||
|
{speech.handsFree && !busy && !speech.speaking && (
|
||||||
|
<p className="text-accent text-sm">Mains libres — dis « CHLOVA … »</p>
|
||||||
|
)}
|
||||||
{speech.speaking && <p className="text-accent text-sm">Lecture vocale…</p>}
|
{speech.speaking && <p className="text-accent text-sm">Lecture vocale…</p>}
|
||||||
{error && <p role="alert" className="text-danger text-sm">{error}</p>}
|
{error && <p role="alert" className="text-danger text-sm">{error}</p>}
|
||||||
<div ref={bottom} />
|
<div ref={bottom} />
|
||||||
@@ -110,7 +118,7 @@ export function Chat() {
|
|||||||
onChange={(e) => setInput(e.target.value)}
|
onChange={(e) => setInput(e.target.value)}
|
||||||
disabled={busy}
|
disabled={busy}
|
||||||
/>
|
/>
|
||||||
{speech.sttSupported && (
|
{speech.sttSupported && !speech.handsFree && (
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
onClick={mic}
|
onClick={mic}
|
||||||
@@ -120,6 +128,17 @@ export function Chat() {
|
|||||||
{speech.listening ? "Stop" : "Parler"}
|
{speech.listening ? "Stop" : "Parler"}
|
||||||
</button>
|
</button>
|
||||||
)}
|
)}
|
||||||
|
{speech.sttSupported && (
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={toggleHandsFree}
|
||||||
|
aria-label={speech.handsFree ? "Couper les mains libres" : "Activer les mains libres"}
|
||||||
|
title="Mains libres (wake-word CHLOVA)"
|
||||||
|
className={`rounded-md border px-3 py-2 text-sm cursor-pointer ring-accent ${speech.handsFree ? "border-accent text-accent" : "border-border text-muted"}`}
|
||||||
|
>
|
||||||
|
{speech.handsFree ? "Libre ON" : "Libre"}
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
<button
|
<button
|
||||||
type="submit"
|
type="submit"
|
||||||
disabled={busy || !input.trim()}
|
disabled={busy || !input.trim()}
|
||||||
|
|||||||
+85
-7
@@ -4,9 +4,10 @@ import { useCallback, useEffect, useRef, useState } from "react";
|
|||||||
* Voix (Phase 6) — 100 % navigateur (Web Speech API), aucun backend ni GPU.
|
* Voix (Phase 6) — 100 % navigateur (Web Speech API), aucun backend ni GPU.
|
||||||
* - STT : SpeechRecognition (webkit) — Chrome/Edge. Dégrade proprement ailleurs.
|
* - STT : SpeechRecognition (webkit) — Chrome/Edge. Dégrade proprement ailleurs.
|
||||||
* - TTS : speechSynthesis — large support.
|
* - TTS : speechSynthesis — large support.
|
||||||
|
* - Mains-libres : écoute en boucle, déclenchée par le wake-word « CHLOVA ».
|
||||||
|
* Met le micro en pause pendant la synthèse vocale (évite l'auto-écoute).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// Typages minimaux (Web Speech API non standardisée dans lib.dom).
|
|
||||||
interface SREvent {
|
interface SREvent {
|
||||||
results: ArrayLike<ArrayLike<{ transcript: string }>>;
|
results: ArrayLike<ArrayLike<{ transcript: string }>>;
|
||||||
}
|
}
|
||||||
@@ -40,23 +41,41 @@ function makeRecognition(): SR | null {
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const WAKE = /\b(chlova|clova|klova)\b[\s,:.]*/i;
|
||||||
|
|
||||||
|
/** Renvoie la commande après le wake-word, ou null si absent. */
|
||||||
|
export function extractCommand(transcript: string): string | null {
|
||||||
|
const m = WAKE.exec(transcript);
|
||||||
|
if (!m) return null;
|
||||||
|
const cmd = transcript.slice(m.index + m[0].length).trim();
|
||||||
|
return cmd.length > 0 ? cmd : null;
|
||||||
|
}
|
||||||
|
|
||||||
export interface UseSpeech {
|
export interface UseSpeech {
|
||||||
sttSupported: boolean;
|
sttSupported: boolean;
|
||||||
ttsSupported: boolean;
|
ttsSupported: boolean;
|
||||||
listening: boolean;
|
listening: boolean;
|
||||||
speaking: boolean;
|
speaking: boolean;
|
||||||
|
handsFree: boolean;
|
||||||
listen: (onText: (text: string) => void) => void;
|
listen: (onText: (text: string) => void) => void;
|
||||||
stopListening: () => void;
|
stopListening: () => void;
|
||||||
speak: (text: string) => void;
|
speak: (text: string) => void;
|
||||||
cancelSpeak: () => void;
|
cancelSpeak: () => void;
|
||||||
|
startHandsFree: (onCommand: (text: string) => void) => void;
|
||||||
|
stopHandsFree: () => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function useSpeech(): UseSpeech {
|
export function useSpeech(): UseSpeech {
|
||||||
const recRef = useRef<SR | null>(null);
|
const recRef = useRef<SR | null>(null);
|
||||||
|
const handsFreeRef = useRef(false);
|
||||||
|
const onCommandRef = useRef<(t: string) => void>(() => {});
|
||||||
|
const armRef = useRef<() => void>(() => {});
|
||||||
const [listening, setListening] = useState(false);
|
const [listening, setListening] = useState(false);
|
||||||
const [speaking, setSpeaking] = useState(false);
|
const [speaking, setSpeaking] = useState(false);
|
||||||
|
const [handsFree, setHandsFree] = useState(false);
|
||||||
|
|
||||||
const sttSupported = typeof window !== "undefined" && !!(window.SpeechRecognition ?? window.webkitSpeechRecognition);
|
const sttSupported =
|
||||||
|
typeof window !== "undefined" && !!(window.SpeechRecognition ?? window.webkitSpeechRecognition);
|
||||||
const ttsSupported = typeof window !== "undefined" && "speechSynthesis" in window;
|
const ttsSupported = typeof window !== "undefined" && "speechSynthesis" in window;
|
||||||
|
|
||||||
const stopListening = useCallback((): void => {
|
const stopListening = useCallback((): void => {
|
||||||
@@ -89,18 +108,77 @@ export function useSpeech(): UseSpeech {
|
|||||||
window.speechSynthesis.cancel();
|
window.speechSynthesis.cancel();
|
||||||
const u = new SpeechSynthesisUtterance(text);
|
const u = new SpeechSynthesisUtterance(text);
|
||||||
u.lang = "fr-FR";
|
u.lang = "fr-FR";
|
||||||
u.onend = (): void => setSpeaking(false);
|
const done = (): void => {
|
||||||
u.onerror = (): void => setSpeaking(false);
|
setSpeaking(false);
|
||||||
|
// En mains-libres, on réarme l'écoute APRÈS avoir parlé (anti auto-écoute).
|
||||||
|
if (handsFreeRef.current) armRef.current();
|
||||||
|
};
|
||||||
|
u.onend = done;
|
||||||
|
u.onerror = done;
|
||||||
setSpeaking(true);
|
setSpeaking(true);
|
||||||
window.speechSynthesis.speak(u);
|
window.speechSynthesis.speak(u);
|
||||||
},
|
},
|
||||||
[ttsSupported],
|
[ttsSupported],
|
||||||
);
|
);
|
||||||
|
|
||||||
useEffect(() => () => {
|
// Boucle mains-libres : une écoute → détecte le wake-word → commande → réarme.
|
||||||
|
armRef.current = (): void => {
|
||||||
|
if (!handsFreeRef.current) return;
|
||||||
|
if (ttsSupported && window.speechSynthesis.speaking) return; // attend la fin du TTS
|
||||||
|
const r = makeRecognition();
|
||||||
|
if (!r) return;
|
||||||
|
recRef.current = r;
|
||||||
|
r.onresult = (e): void => {
|
||||||
|
const text = e.results?.[0]?.[0]?.transcript ?? "";
|
||||||
|
const cmd = extractCommand(text);
|
||||||
|
if (cmd) onCommandRef.current(cmd);
|
||||||
|
};
|
||||||
|
const rearm = (): void => {
|
||||||
|
setListening(false);
|
||||||
|
if (handsFreeRef.current && !(ttsSupported && window.speechSynthesis.speaking)) {
|
||||||
|
setTimeout(() => armRef.current(), 400);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
r.onend = rearm;
|
||||||
|
r.onerror = rearm;
|
||||||
|
setListening(true);
|
||||||
|
r.start();
|
||||||
|
};
|
||||||
|
|
||||||
|
const startHandsFree = useCallback((onCommand: (text: string) => void): void => {
|
||||||
|
onCommandRef.current = onCommand;
|
||||||
|
handsFreeRef.current = true;
|
||||||
|
setHandsFree(true);
|
||||||
|
armRef.current();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const stopHandsFree = useCallback((): void => {
|
||||||
|
handsFreeRef.current = false;
|
||||||
|
setHandsFree(false);
|
||||||
|
recRef.current?.abort();
|
||||||
|
setListening(false);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
useEffect(
|
||||||
|
() => () => {
|
||||||
|
handsFreeRef.current = false;
|
||||||
recRef.current?.abort();
|
recRef.current?.abort();
|
||||||
if (ttsSupported) window.speechSynthesis.cancel();
|
if (ttsSupported) window.speechSynthesis.cancel();
|
||||||
}, [ttsSupported]);
|
},
|
||||||
|
[ttsSupported],
|
||||||
|
);
|
||||||
|
|
||||||
return { sttSupported, ttsSupported, listening, speaking, listen, stopListening, speak, cancelSpeak };
|
return {
|
||||||
|
sttSupported,
|
||||||
|
ttsSupported,
|
||||||
|
listening,
|
||||||
|
speaking,
|
||||||
|
handsFree,
|
||||||
|
listen,
|
||||||
|
stopListening,
|
||||||
|
speak,
|
||||||
|
cancelSpeak,
|
||||||
|
startHandsFree,
|
||||||
|
stopHandsFree,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user