1use serde::{Deserialize, Serialize};
2
3pub type LanguageCode = String;
5
6#[derive(Debug, Clone, Default, Deserialize, Serialize)]
8#[serde(rename_all = "camelCase")]
9pub struct ListenConfig {
10 #[serde(default)]
13 pub language: Option<LanguageCode>,
14
15 #[serde(default, rename = "interimResults")]
17 pub interim_results: bool,
18
19 #[serde(default)]
22 pub continuous: bool,
23
24 #[serde(default, rename = "maxDuration")]
26 pub max_duration: u32,
27
28 #[serde(default, rename = "maxAlternatives")]
30 pub max_alternatives: Option<u32>,
31
32 #[serde(default, rename = "onDevice")]
35 pub on_device: bool,
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
40#[serde(rename_all = "lowercase")]
41#[derive(Default)]
42pub enum RecognitionState {
43 #[default]
45 Idle,
46 Listening,
48 Processing,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
54#[serde(rename_all = "camelCase")]
55pub struct RecognitionResult {
56 pub transcript: String,
58
59 pub is_final: bool,
61
62 #[serde(default)]
64 pub confidence: Option<f32>,
65
66 #[serde(default, skip_serializing_if = "Option::is_none")]
69 pub audio_data: Option<String>,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
74#[serde(rename_all = "camelCase")]
75pub struct RecognitionStatus {
76 pub state: RecognitionState,
78
79 pub is_available: bool,
81
82 #[serde(default)]
84 pub language: Option<LanguageCode>,
85}
86
87#[derive(Debug, Clone, Serialize, Deserialize)]
89#[serde(rename_all = "camelCase")]
90pub struct SupportedLanguage {
91 pub code: LanguageCode,
93
94 pub name: String,
96
97 #[serde(default)]
99 pub installed: Option<bool>,
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
104#[serde(rename_all = "lowercase")]
105pub enum PermissionStatus {
106 Granted,
108 Denied,
110 Unknown,
112}
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
116#[serde(rename_all = "camelCase")]
117pub struct PermissionResponse {
118 pub microphone: PermissionStatus,
120
121 pub speech_recognition: PermissionStatus,
123}
124
125#[derive(Debug, Clone, Serialize, Deserialize)]
127#[serde(rename_all = "camelCase")]
128pub struct AvailabilityResponse {
129 pub available: bool,
131
132 #[serde(default)]
134 pub reason: Option<String>,
135}
136
137#[derive(Debug, Clone, Serialize, Deserialize)]
139#[serde(rename_all = "camelCase")]
140pub struct SupportedLanguagesResponse {
141 pub languages: Vec<SupportedLanguage>,
143}
144
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
147#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
148#[derive(Default)]
149pub enum SttErrorCode {
150 #[default]
152 None,
153 NotAvailable,
155 PermissionDenied,
157 SpeechPermissionDenied,
159 NetworkError,
161 AudioError,
163 Timeout,
165 NoSpeech,
167 LanguageNotSupported,
169 Cancelled,
171 AlreadyListening,
173 NotListening,
175 Busy,
177 ModelNotInstalled,
179 Unknown,
181}
182
183impl SttErrorCode {
184 pub fn description(&self) -> &'static str {
186 match self {
187 Self::None => "No error",
188 Self::NotAvailable => "Speech recognition is not available on this device",
189 Self::PermissionDenied => "Microphone permission was denied",
190 Self::SpeechPermissionDenied => "Speech recognition permission was denied",
191 Self::NetworkError => "Network error during recognition",
192 Self::AudioError => "Error accessing audio input",
193 Self::Timeout => "Recognition timed out",
194 Self::NoSpeech => "No speech was detected",
195 Self::LanguageNotSupported => "The requested language is not supported",
196 Self::Cancelled => "Recognition was cancelled",
197 Self::AlreadyListening => "Already listening for speech",
198 Self::NotListening => "Not currently listening",
199 Self::Busy => "Speech recognition service is busy",
200 Self::ModelNotInstalled => "No speech recognition model has been downloaded",
201 Self::Unknown => "An unknown error occurred",
202 }
203 }
204
205 pub fn code(&self) -> i32 {
207 match self {
208 Self::None => 0,
209 Self::NotAvailable => -1,
210 Self::PermissionDenied => -2,
211 Self::SpeechPermissionDenied => -3,
212 Self::NetworkError => -4,
213 Self::AudioError => -5,
214 Self::Timeout => -6,
215 Self::NoSpeech => -7,
216 Self::LanguageNotSupported => -8,
217 Self::Cancelled => -9,
218 Self::AlreadyListening => -10,
219 Self::NotListening => -11,
220 Self::Busy => -12,
221 Self::ModelNotInstalled => -13,
222 Self::Unknown => -99,
223 }
224 }
225}
226
227#[derive(Debug, Clone, Serialize, Deserialize)]
229#[serde(rename_all = "camelCase")]
230pub struct SttError {
231 pub code: SttErrorCode,
233 pub message: String,
235 #[serde(default)]
237 pub details: Option<String>,
238}
239
240#[derive(Debug, Clone, Serialize, Deserialize)]
241#[serde(rename_all = "camelCase")]
242pub struct WhisperModelInfo {
243 pub id: String,
246 pub display_name: String,
248 pub size_mb: u32,
251 pub required_memory_mb: u32,
256 pub installed: bool,
258 pub active: bool,
260 pub recommended: bool,
263 pub tier: String,
267 #[serde(default)]
271 pub language: Option<String>,
272 pub fits_in_memory: bool,
276 pub advanced: bool,
280}
281
282#[derive(Debug, Clone, Serialize, Deserialize)]
283#[serde(rename_all = "camelCase")]
284pub struct WhisperModelsResponse {
285 pub models: Vec<WhisperModelInfo>,
287 #[serde(default)]
289 pub active: Option<String>,
290 pub total_disk_bytes: u64,
293 pub system_memory_mb: u32,
297}
298
299#[cfg(test)]
300mod tests {
301 use super::*;
302
303 #[test]
304 fn test_listen_config_defaults() {
305 let config: ListenConfig = serde_json::from_str("{}").unwrap();
306 assert!(config.language.is_none());
307 assert!(!config.interim_results);
308 assert!(!config.continuous);
309 assert_eq!(config.max_duration, 0);
310 }
311
312 #[test]
313 fn test_listen_config_full() {
314 let json = r#"{
315 "language": "pt-BR",
316 "interimResults": true,
317 "continuous": true,
318 "maxDuration": 30
319 }"#;
320 let config: ListenConfig = serde_json::from_str(json).unwrap();
321 assert_eq!(config.language, Some("pt-BR".to_string()));
322 assert!(config.interim_results);
323 assert!(config.continuous);
324 assert_eq!(config.max_duration, 30);
325 }
326
327 #[test]
328 fn test_recognition_state_serialization() {
329 assert_eq!(
330 serde_json::to_string(&RecognitionState::Idle).unwrap(),
331 "\"idle\""
332 );
333 assert_eq!(
334 serde_json::to_string(&RecognitionState::Listening).unwrap(),
335 "\"listening\""
336 );
337 assert_eq!(
338 serde_json::to_string(&RecognitionState::Processing).unwrap(),
339 "\"processing\""
340 );
341 }
342
343 #[test]
344 fn test_recognition_result() {
345 let result = RecognitionResult {
346 transcript: "Hello world".to_string(),
347 is_final: true,
348 confidence: Some(0.95),
349 audio_data: None,
350 };
351 let json = serde_json::to_string(&result).unwrap();
352 assert!(json.contains("\"transcript\":\"Hello world\""));
353 assert!(json.contains("\"isFinal\":true"));
354 assert!(json.contains("\"confidence\":0.95"));
355 }
356
357 #[test]
358 fn test_permission_status_serialization() {
359 assert_eq!(
360 serde_json::to_string(&PermissionStatus::Granted).unwrap(),
361 "\"granted\""
362 );
363 assert_eq!(
364 serde_json::to_string(&PermissionStatus::Denied).unwrap(),
365 "\"denied\""
366 );
367 assert_eq!(
368 serde_json::to_string(&PermissionStatus::Unknown).unwrap(),
369 "\"unknown\""
370 );
371 }
372}