Skip to main content

st/web_dashboard/
voice.rs

1//! Voice Integration - Transcription and TTS via liquid-rust
2//!
3//! Provides voice capabilities for the dashboard:
4//! - Speech-to-text transcription with salience analysis
5//! - Speaker recognition via Phoenix Protocol
6//! - Text-to-speech with multiple voice personas
7//!
8//! Requires the `voice` feature flag and liquid-rust models.
9//! Currently returns "not implemented" stubs until liquid-rust is integrated.
10
11use axum::{
12    extract::Multipart,
13    http::StatusCode,
14    Json,
15};
16use serde::{Deserialize, Serialize};
17
18/// Transcription result with salience and speaker info
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct TranscriptionResult {
21    /// Transcribed text
22    pub text: String,
23    /// Salience score (0.0 to 1.0) - how important/urgent
24    pub salience: f32,
25    /// Identified speaker (if registered in Phoenix DB)
26    pub speaker: Option<String>,
27    /// Speaker identification confidence
28    pub speaker_confidence: Option<f32>,
29    /// Emotional profile
30    pub emotion: Option<EmotionProfile>,
31}
32
33/// Emotional profile from voice analysis
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct EmotionProfile {
36    /// Valence: positive (1.0) to negative (-1.0)
37    pub valence: f32,
38    /// Arousal: excited (1.0) to calm (0.0)
39    pub arousal: f32,
40    /// Voice stability (0.0 to 1.0)
41    pub stability: f32,
42}
43
44/// TTS request
45#[derive(Debug, Deserialize)]
46pub struct SpeakRequest {
47    /// Text to speak
48    pub text: String,
49    /// Voice persona to use
50    #[serde(default = "default_voice")]
51    pub voice: String,
52}
53
54fn default_voice() -> String {
55    "aye".to_string()
56}
57
58/// Speaker registration request
59#[derive(Debug, Deserialize)]
60pub struct RegisterSpeakerRequest {
61    /// Label for the speaker (e.g., "Hue")
62    pub label: String,
63}
64
65// =============================================================================
66// Stub API Handlers (until liquid-rust is integrated)
67// =============================================================================
68
69/// Transcribe uploaded audio
70///
71/// POST /api/voice/transcribe
72/// Content-Type: multipart/form-data
73///
74/// Returns: TranscriptionResult with text, salience, and optional speaker ID
75pub async fn transcribe(
76    mut _multipart: Multipart,
77) -> Result<Json<TranscriptionResult>, (StatusCode, String)> {
78    // TODO: Enable when liquid-rust is integrated
79    // For now, return a stub response
80    Err((
81        StatusCode::NOT_IMPLEMENTED,
82        "Voice transcription requires liquid-rust integration. \
83         See docs/plans/2025-11-11-realtime-collaborative-dashboard-design.md"
84            .to_string(),
85    ))
86}
87
88/// Register a speaker for Phoenix Protocol recognition
89///
90/// POST /api/voice/register
91/// Content-Type: multipart/form-data
92/// Fields: label (text), audio (file)
93pub async fn register_speaker(
94    mut _multipart: Multipart,
95) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
96    Err((
97        StatusCode::NOT_IMPLEMENTED,
98        "Speaker registration requires liquid-rust integration.".to_string(),
99    ))
100}
101
102/// Generate speech from text using TTS
103///
104/// POST /api/voice/speak
105/// Content-Type: application/json
106/// Body: { "text": "Hello", "voice": "aye" }
107///
108/// Available voices: aye, omnimom, claude, alert, sky, adam, bella, nicole, michael
109pub async fn speak(
110    Json(_req): Json<SpeakRequest>,
111) -> Result<impl axum::response::IntoResponse, (StatusCode, String)> {
112    Err::<([(axum::http::header::HeaderName, &str); 1], Vec<u8>), _>((
113        StatusCode::NOT_IMPLEMENTED,
114        "TTS requires liquid-rust integration.".to_string(),
115    ))
116}
117
118// =============================================================================
119// Future: Full implementation when liquid-rust is integrated
120// =============================================================================
121//
122// When liquid-rust is ready, this module will provide:
123//
124// 1. VoiceEngine struct holding:
125//    - LfmModel for transcription
126//    - PhoenixSpeakerDB for speaker recognition
127//    - TtsEngine for text-to-speech
128//
129// 2. Real implementations of:
130//    - transcribe() -> Decode audio, run inference, analyze salience
131//    - register_speaker() -> Add voice to Phoenix DB
132//    - speak() -> Generate WAV audio from text
133//
134// 3. Integration with dashboard state:
135//    - Voice hints sent via WebSocket
136//    - Salience metrics displayed in UI
137//    - Speaker identification in activity log
138//
139// See ../liquid-rust/examples/aye_ears.rs for reference implementation.