Skip to main content

tauri_plugin_stt/
models.rs

1use serde::{Deserialize, Serialize};
2
3/// Language code for speech recognition (e.g., "en-US", "pt-BR", "ja-JP")
4pub type LanguageCode = String;
5
6/// Configuration for starting speech recognition
7#[derive(Debug, Clone, Default, Deserialize, Serialize)]
8#[serde(rename_all = "camelCase")]
9pub struct ListenConfig {
10    /// Language code for recognition (e.g., "en-US", "pt-BR")
11    /// If not specified, uses device default language
12    #[serde(default)]
13    pub language: Option<LanguageCode>,
14
15    /// Whether to return interim (partial) results
16    #[serde(default, rename = "interimResults")]
17    pub interim_results: bool,
18
19    /// Whether to continue listening after getting a result
20    /// If false, stops after first final result
21    #[serde(default)]
22    pub continuous: bool,
23
24    /// Maximum duration to listen in milliseconds (0 = no limit)
25    #[serde(default, rename = "maxDuration")]
26    pub max_duration: u32,
27
28    /// Maximum number of alternative transcriptions
29    #[serde(default, rename = "maxAlternatives")]
30    pub max_alternatives: Option<u32>,
31
32    /// Use on-device recognition only (iOS 13+, no network required)
33    /// When true, recognition works offline but may be less accurate
34    #[serde(default, rename = "onDevice")]
35    pub on_device: bool,
36}
37
38/// Recognition state
39#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
40#[serde(rename_all = "lowercase")]
41pub enum RecognitionState {
42    /// Not currently listening
43    Idle,
44    /// Actively listening for speech
45    Listening,
46    /// Processing audio (may briefly occur between utterances)
47    Processing,
48}
49
50impl Default for RecognitionState {
51    fn default() -> Self {
52        Self::Idle
53    }
54}
55
56/// A speech recognition result
57#[derive(Debug, Clone, Serialize, Deserialize)]
58#[serde(rename_all = "camelCase")]
59pub struct RecognitionResult {
60    /// The recognized text
61    pub transcript: String,
62
63    /// Whether this is a final result (vs interim/partial)
64    pub is_final: bool,
65
66    /// Confidence score (0.0 to 1.0), if available
67    #[serde(default)]
68    pub confidence: Option<f32>,
69}
70
71/// Current status of speech recognition
72#[derive(Debug, Clone, Serialize, Deserialize)]
73#[serde(rename_all = "camelCase")]
74pub struct RecognitionStatus {
75    /// Current state
76    pub state: RecognitionState,
77
78    /// Whether STT is available on this device
79    pub is_available: bool,
80
81    /// Current language being used
82    #[serde(default)]
83    pub language: Option<LanguageCode>,
84}
85
86/// Supported language information
87#[derive(Debug, Clone, Serialize, Deserialize)]
88#[serde(rename_all = "camelCase")]
89pub struct SupportedLanguage {
90    /// Language code (e.g., "en-US")
91    pub code: LanguageCode,
92
93    /// Human-readable name (e.g., "English (United States)")
94    pub name: String,
95
96    /// Whether the model for this language is installed locally (desktop only)
97    #[serde(default)]
98    pub installed: Option<bool>,
99}
100
101/// Permission status
102#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
103#[serde(rename_all = "lowercase")]
104pub enum PermissionStatus {
105    /// Permission has been granted
106    Granted,
107    /// Permission has been denied
108    Denied,
109    /// Permission hasn't been requested yet
110    Unknown,
111}
112
113/// Response for permission check
114#[derive(Debug, Clone, Serialize, Deserialize)]
115#[serde(rename_all = "camelCase")]
116pub struct PermissionResponse {
117    /// Microphone permission status
118    pub microphone: PermissionStatus,
119
120    /// Speech recognition permission status (iOS/macOS specific)
121    pub speech_recognition: PermissionStatus,
122}
123
124/// Response for availability check
125#[derive(Debug, Clone, Serialize, Deserialize)]
126#[serde(rename_all = "camelCase")]
127pub struct AvailabilityResponse {
128    /// Whether STT is available
129    pub available: bool,
130
131    /// Reason if not available
132    #[serde(default)]
133    pub reason: Option<String>,
134}
135
136/// Response for supported languages
137#[derive(Debug, Clone, Serialize, Deserialize)]
138#[serde(rename_all = "camelCase")]
139pub struct SupportedLanguagesResponse {
140    /// List of supported languages
141    pub languages: Vec<SupportedLanguage>,
142}
143
144/// Unified error codes for cross-platform consistency
145#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
146#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
147pub enum SttErrorCode {
148    /// No error
149    None,
150    /// Speech recognition service not available
151    NotAvailable,
152    /// Microphone permission denied
153    PermissionDenied,
154    /// Speech recognition permission denied (iOS)
155    SpeechPermissionDenied,
156    /// Network error (server-based recognition)
157    NetworkError,
158    /// Audio recording error
159    AudioError,
160    /// Recognition timed out (maxDuration reached)
161    Timeout,
162    /// No speech detected
163    NoSpeech,
164    /// Language not supported
165    LanguageNotSupported,
166    /// Recognition was cancelled
167    Cancelled,
168    /// Already listening
169    AlreadyListening,
170    /// Not currently listening
171    NotListening,
172    /// Service busy
173    Busy,
174    /// Unknown error
175    Unknown,
176}
177
178impl Default for SttErrorCode {
179    fn default() -> Self {
180        Self::None
181    }
182}
183
184impl SttErrorCode {
185    /// Get a human-readable description of the error
186    pub fn description(&self) -> &'static str {
187        match self {
188            Self::None => "No error",
189            Self::NotAvailable => "Speech recognition is not available on this device",
190            Self::PermissionDenied => "Microphone permission was denied",
191            Self::SpeechPermissionDenied => "Speech recognition permission was denied",
192            Self::NetworkError => "Network error during recognition",
193            Self::AudioError => "Error accessing audio input",
194            Self::Timeout => "Recognition timed out",
195            Self::NoSpeech => "No speech was detected",
196            Self::LanguageNotSupported => "The requested language is not supported",
197            Self::Cancelled => "Recognition was cancelled",
198            Self::AlreadyListening => "Already listening for speech",
199            Self::NotListening => "Not currently listening",
200            Self::Busy => "Speech recognition service is busy",
201            Self::Unknown => "An unknown error occurred",
202        }
203    }
204
205    /// Get the numeric code for this error
206    pub fn code(&self) -> i32 {
207        match self {
208            Self::None => 0,
209            Self::NotAvailable => -1,
210            Self::PermissionDenied => -2,
211            Self::SpeechPermissionDenied => -3,
212            Self::NetworkError => -4,
213            Self::AudioError => -5,
214            Self::Timeout => -6,
215            Self::NoSpeech => -7,
216            Self::LanguageNotSupported => -8,
217            Self::Cancelled => -9,
218            Self::AlreadyListening => -10,
219            Self::NotListening => -11,
220            Self::Busy => -12,
221            Self::Unknown => -99,
222        }
223    }
224}
225
226/// Structured error event for frontend consumption
227#[derive(Debug, Clone, Serialize, Deserialize)]
228#[serde(rename_all = "camelCase")]
229pub struct SttError {
230    /// Error code for programmatic handling
231    pub code: SttErrorCode,
232    /// Human-readable error message
233    pub message: String,
234    /// Platform-specific error details (optional)
235    #[serde(default)]
236    pub details: Option<String>,
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242
243    #[test]
244    fn test_listen_config_defaults() {
245        let config: ListenConfig = serde_json::from_str("{}").unwrap();
246        assert!(config.language.is_none());
247        assert!(!config.interim_results);
248        assert!(!config.continuous);
249        assert_eq!(config.max_duration, 0);
250    }
251
252    #[test]
253    fn test_listen_config_full() {
254        let json = r#"{
255            "language": "pt-BR",
256            "interimResults": true,
257            "continuous": true,
258            "maxDuration": 30
259        }"#;
260        let config: ListenConfig = serde_json::from_str(json).unwrap();
261        assert_eq!(config.language, Some("pt-BR".to_string()));
262        assert!(config.interim_results);
263        assert!(config.continuous);
264        assert_eq!(config.max_duration, 30);
265    }
266
267    #[test]
268    fn test_recognition_state_serialization() {
269        assert_eq!(
270            serde_json::to_string(&RecognitionState::Idle).unwrap(),
271            "\"idle\""
272        );
273        assert_eq!(
274            serde_json::to_string(&RecognitionState::Listening).unwrap(),
275            "\"listening\""
276        );
277        assert_eq!(
278            serde_json::to_string(&RecognitionState::Processing).unwrap(),
279            "\"processing\""
280        );
281    }
282
283    #[test]
284    fn test_recognition_result() {
285        let result = RecognitionResult {
286            transcript: "Hello world".to_string(),
287            is_final: true,
288            confidence: Some(0.95),
289        };
290        let json = serde_json::to_string(&result).unwrap();
291        assert!(json.contains("\"transcript\":\"Hello world\""));
292        assert!(json.contains("\"isFinal\":true"));
293        assert!(json.contains("\"confidence\":0.95"));
294    }
295
296    #[test]
297    fn test_permission_status_serialization() {
298        assert_eq!(
299            serde_json::to_string(&PermissionStatus::Granted).unwrap(),
300            "\"granted\""
301        );
302        assert_eq!(
303            serde_json::to_string(&PermissionStatus::Denied).unwrap(),
304            "\"denied\""
305        );
306        assert_eq!(
307            serde_json::to_string(&PermissionStatus::Unknown).unwrap(),
308            "\"unknown\""
309        );
310    }
311}