Skip to main content

tauri_plugin_tts/
models.rs

1use serde::{Deserialize, Serialize};
2use std::borrow::Cow;
3use ts_rs::TS;
4
5/// Maximum text length in bytes (10KB)
6pub const MAX_TEXT_LENGTH: usize = 10_000;
7/// Maximum voice ID length
8pub const MAX_VOICE_ID_LENGTH: usize = 256;
9/// Maximum language code length
10pub const MAX_LANGUAGE_LENGTH: usize = 35;
11
12#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq, TS)]
13#[ts(export, export_to = "../guest-js/bindings/")]
14#[serde(rename_all = "lowercase")]
15pub enum QueueMode {
16    /// Flush any pending speech and start speaking immediately (default)
17    #[default]
18    Flush,
19    /// Add to queue and speak after current speech finishes
20    Add,
21}
22
23#[derive(Debug, Clone, Deserialize, Serialize, TS)]
24#[ts(export, export_to = "../guest-js/bindings/")]
25#[serde(rename_all = "camelCase")]
26pub struct SpeakOptions {
27    /// The text to speak (max 10,000 characters)
28    pub text: String,
29    /// The language/locale code (e.g., "en-US", "pt-BR", "ja-JP")
30    #[serde(skip_serializing_if = "Option::is_none")]
31    pub language: Option<String>,
32    /// Specific voice ID to use (from getVoices). Takes priority over language
33    #[serde(skip_serializing_if = "Option::is_none")]
34    pub voice_id: Option<String>,
35    /// Speech rate (0.1 to 4.0, where 1.0 = normal)
36    #[serde(skip_serializing_if = "Option::is_none")]
37    pub rate: Option<f32>,
38    /// Pitch (0.5 to 2.0, where 1.0 = normal)
39    #[serde(skip_serializing_if = "Option::is_none")]
40    pub pitch: Option<f32>,
41    /// Volume (0.0 to 1.0, where 1.0 = full volume)
42    #[serde(skip_serializing_if = "Option::is_none")]
43    pub volume: Option<f32>,
44    /// Queue mode: "flush" (default) or "add"
45    #[serde(skip_serializing_if = "Option::is_none")]
46    pub queue_mode: Option<QueueMode>,
47}
48
49#[derive(Debug, Clone, Deserialize, Serialize, TS)]
50#[ts(export, export_to = "../guest-js/bindings/")]
51#[serde(rename_all = "camelCase")]
52pub struct PreviewVoiceOptions {
53    /// Voice ID to preview
54    pub voice_id: String,
55    /// Optional custom sample text (uses default if not provided)
56    #[serde(skip_serializing_if = "Option::is_none")]
57    pub text: Option<String>,
58}
59
60#[derive(Debug, Deserialize, Serialize)]
61#[serde(rename_all = "camelCase")]
62pub struct SpeakRequest {
63    /// The text to speak
64    pub text: String,
65    /// The language/locale code (e.g., "en-US", "pt-BR", "ja-JP")
66    #[serde(default)]
67    pub language: Option<String>,
68    /// Voice ID to use (from getVoices)
69    #[serde(default)]
70    pub voice_id: Option<String>,
71    /// Speech rate (0.1 to 4.0, where 1.0 = normal, 2.0 = double, 0.5 = half)
72    #[serde(default = "default_rate")]
73    pub rate: f32,
74    /// Pitch (0.5 = low, 1.0 = normal, 2.0 = high)
75    #[serde(default = "default_pitch")]
76    pub pitch: f32,
77    /// Volume (0.0 = silent, 1.0 = full volume)
78    #[serde(default = "default_volume")]
79    pub volume: f32,
80    /// Queue mode: "flush" (default) or "add"
81    #[serde(default)]
82    pub queue_mode: QueueMode,
83}
84
85fn default_rate() -> f32 {
86    1.0
87}
88fn default_pitch() -> f32 {
89    1.0
90}
91fn default_volume() -> f32 {
92    1.0
93}
94
95#[derive(Debug, Clone, thiserror::Error)]
96pub enum ValidationError {
97    #[error("Text cannot be empty")]
98    EmptyText,
99    #[error("Text too long: {len} bytes (max: {max})")]
100    TextTooLong { len: usize, max: usize },
101    #[error("Voice ID too long: {len} chars (max: {max})")]
102    VoiceIdTooLong { len: usize, max: usize },
103    #[error("Invalid voice ID format - only alphanumeric, dots, underscores, and hyphens allowed")]
104    InvalidVoiceId,
105    #[error("Language code too long: {len} chars (max: {max})")]
106    LanguageTooLong { len: usize, max: usize },
107}
108
109#[derive(Debug, Clone)]
110pub struct ValidatedSpeakRequest {
111    pub text: String,
112    pub language: Option<String>,
113    pub voice_id: Option<String>,
114    pub rate: f32,
115    pub pitch: f32,
116    pub volume: f32,
117    pub queue_mode: QueueMode,
118}
119
120impl SpeakRequest {
121    pub fn validate(&self) -> Result<ValidatedSpeakRequest, ValidationError> {
122        // Text validation
123        if self.text.is_empty() {
124            return Err(ValidationError::EmptyText);
125        }
126        if self.text.len() > MAX_TEXT_LENGTH {
127            return Err(ValidationError::TextTooLong {
128                len: self.text.len(),
129                max: MAX_TEXT_LENGTH,
130            });
131        }
132
133        // Language validation (if provided)
134        let sanitized_language = self
135            .language
136            .as_ref()
137            .map(|lang| Self::validate_language(lang))
138            .transpose()?;
139
140        // Voice ID validation (if provided)
141        if let Some(ref voice_id) = self.voice_id {
142            Self::validate_voice_id(voice_id)?;
143        }
144
145        Ok(ValidatedSpeakRequest {
146            text: self.text.clone(),
147            language: sanitized_language,
148            voice_id: self.voice_id.clone(),
149            rate: self.rate.clamp(0.1, 4.0),
150            pitch: self.pitch.clamp(0.5, 2.0),
151            volume: self.volume.clamp(0.0, 1.0),
152            queue_mode: self.queue_mode,
153        })
154    }
155
156    fn validate_language(lang: &str) -> Result<String, ValidationError> {
157        if lang.len() > MAX_LANGUAGE_LENGTH {
158            return Err(ValidationError::LanguageTooLong {
159                len: lang.len(),
160                max: MAX_LANGUAGE_LENGTH,
161            });
162        }
163        Ok(lang.to_string())
164    }
165
166    fn validate_voice_id(voice_id: &str) -> Result<(), ValidationError> {
167        if voice_id.len() > MAX_VOICE_ID_LENGTH {
168            return Err(ValidationError::VoiceIdTooLong {
169                len: voice_id.len(),
170                max: MAX_VOICE_ID_LENGTH,
171            });
172        }
173        // Only allow alphanumeric, dots, underscores, and hyphens (matches iOS validation)
174        if !voice_id
175            .chars()
176            .all(|c| c.is_alphanumeric() || c == '.' || c == '_' || c == '-')
177        {
178            return Err(ValidationError::InvalidVoiceId);
179        }
180        Ok(())
181    }
182}
183
184#[derive(Debug, Clone, Default, Deserialize, Serialize)]
185#[serde(rename_all = "camelCase")]
186pub struct SpeakResponse {
187    /// Whether speech was successfully initiated
188    pub success: bool,
189    /// Optional warning message (e.g., voice not found, using fallback)
190    #[serde(skip_serializing_if = "Option::is_none")]
191    pub warning: Option<String>,
192}
193
194#[derive(Debug, Clone, Default, Deserialize, Serialize)]
195#[serde(rename_all = "camelCase")]
196pub struct StopResponse {
197    pub success: bool,
198}
199
200#[derive(Debug, Deserialize, Serialize)]
201#[serde(rename_all = "camelCase")]
202pub struct SetBackgroundBehaviorRequest {
203    /// Whether TTS should continue speaking when the app goes to background / screen locks.
204    /// Defaults to true. When false, speech is paused on background and a `speech:backgroundPause`
205    /// event is emitted (matching the previous behavior). Desktop: ignored (no-op).
206    pub continue_in_background: bool,
207}
208
209#[derive(Debug, Clone, Default, Deserialize, Serialize)]
210#[serde(rename_all = "camelCase")]
211pub struct SetBackgroundBehaviorResponse {
212    pub success: bool,
213}
214
215#[derive(Debug, Clone, Deserialize, Serialize, TS)]
216#[ts(export, export_to = "../guest-js/bindings/")]
217#[serde(rename_all = "camelCase")]
218pub struct Voice {
219    /// Unique identifier for the voice
220    pub id: String,
221    /// Display name of the voice
222    pub name: String,
223    /// Language code (e.g., "en-US")
224    pub language: String,
225}
226
227#[derive(Debug, Deserialize, Serialize)]
228#[serde(rename_all = "camelCase")]
229pub struct GetVoicesRequest {
230    /// Optional language filter
231    #[serde(default)]
232    pub language: Option<String>,
233}
234
235#[derive(Debug, Clone, Default, Deserialize, Serialize)]
236#[serde(rename_all = "camelCase")]
237pub struct GetVoicesResponse {
238    pub voices: Vec<Voice>,
239}
240
241#[derive(Debug, Clone, Default, Deserialize, Serialize)]
242#[serde(rename_all = "camelCase")]
243pub struct IsSpeakingResponse {
244    pub speaking: bool,
245}
246
247#[derive(Debug, Clone, Default, Deserialize, Serialize)]
248#[serde(rename_all = "camelCase")]
249pub struct IsInitializedResponse {
250    /// Whether the TTS engine is initialized and ready
251    pub initialized: bool,
252    /// Number of available voices (0 if not initialized)
253    pub voice_count: u32,
254}
255
256#[derive(Debug, Clone, Default, Deserialize, Serialize, TS)]
257#[ts(export, export_to = "../guest-js/bindings/")]
258#[serde(rename_all = "camelCase")]
259pub struct PauseResumeResponse {
260    pub success: bool,
261    /// Reason for failure (if success is false)
262    #[serde(skip_serializing_if = "Option::is_none")]
263    pub reason: Option<String>,
264}
265
266#[derive(Debug, Deserialize, Serialize)]
267#[serde(rename_all = "camelCase")]
268pub struct PreviewVoiceRequest {
269    /// Voice ID to preview
270    pub voice_id: String,
271    /// Optional custom sample text (uses default if not provided)
272    #[serde(default)]
273    pub text: Option<String>,
274}
275
276impl PreviewVoiceRequest {
277    pub const DEFAULT_SAMPLE_TEXT: &'static str =
278        "Hello! This is a sample of how this voice sounds.";
279
280    pub fn sample_text(&self) -> Cow<'_, str> {
281        match &self.text {
282            Some(text) => Cow::Borrowed(text.as_str()),
283            None => Cow::Borrowed(Self::DEFAULT_SAMPLE_TEXT),
284        }
285    }
286
287    pub fn validate(&self) -> Result<(), ValidationError> {
288        // Validate custom text if provided
289        if let Some(ref text) = self.text {
290            if text.is_empty() {
291                return Err(ValidationError::EmptyText);
292            }
293            if text.len() > MAX_TEXT_LENGTH {
294                return Err(ValidationError::TextTooLong {
295                    len: text.len(),
296                    max: MAX_TEXT_LENGTH,
297                });
298            }
299        }
300
301        Ok(())
302    }
303}
304
305/// On desktop, emitted directly via `app.emit("tts://<event_type>", payload)`.
306/// On mobile, native plugins send this through a Tauri `Channel`; the Rust relay
307/// deserializes it and re-emits via `app.emit()` so JS `listen("tts://...")` works
308/// uniformly on every platform.
309///
310/// The shape matches the JS `SpeechEvent` interface.
311#[derive(Debug, Clone, Default, Deserialize, Serialize)]
312#[serde(rename_all = "camelCase")]
313pub struct TtsEventPayload {
314    /// The event name, e.g. "speech:finish". Used to build the emit key "tts://<event_type>".
315    pub event_type: String,
316    /// Unique identifier for the utterance (if available)
317    #[serde(skip_serializing_if = "Option::is_none")]
318    pub id: Option<String>,
319    /// Error message (for error events)
320    #[serde(skip_serializing_if = "Option::is_none")]
321    pub error: Option<String>,
322    /// Whether speech was interrupted
323    #[serde(skip_serializing_if = "Option::is_none")]
324    pub interrupted: Option<bool>,
325    /// Reason for the event (e.g. "audio_focus_lost", "app_paused")
326    #[serde(skip_serializing_if = "Option::is_none")]
327    pub reason: Option<String>,
328}
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333
334    #[test]
335    fn test_speak_request_defaults() {
336        let json = r#"{"text": "Hello world"}"#;
337        let request: SpeakRequest = serde_json::from_str(json).unwrap();
338
339        assert_eq!(request.text, "Hello world");
340        assert!(request.language.is_none());
341        assert!(request.voice_id.is_none());
342        assert_eq!(request.rate, 1.0);
343        assert_eq!(request.pitch, 1.0);
344        assert_eq!(request.volume, 1.0);
345    }
346
347    #[test]
348    fn test_speak_request_full() {
349        let json = r#"{
350            "text": "Olá",
351            "language": "pt-BR",
352            "voiceId": "com.apple.voice.enhanced.pt-BR",
353            "rate": 0.8,
354            "pitch": 1.2,
355            "volume": 0.9
356        }"#;
357
358        let request: SpeakRequest = serde_json::from_str(json).unwrap();
359        assert_eq!(request.text, "Olá");
360        assert_eq!(request.language, Some("pt-BR".to_string()));
361        assert_eq!(
362            request.voice_id,
363            Some("com.apple.voice.enhanced.pt-BR".to_string())
364        );
365        assert_eq!(request.rate, 0.8);
366        assert_eq!(request.pitch, 1.2);
367        assert_eq!(request.volume, 0.9);
368    }
369
370    #[test]
371    fn test_voice_serialization() {
372        let voice = Voice {
373            id: "test-voice".to_string(),
374            name: "Test Voice".to_string(),
375            language: "en-US".to_string(),
376        };
377
378        let json = serde_json::to_string(&voice).unwrap();
379        assert!(json.contains("\"id\":\"test-voice\""));
380        assert!(json.contains("\"name\":\"Test Voice\""));
381        assert!(json.contains("\"language\":\"en-US\""));
382    }
383
384    #[test]
385    fn test_get_voices_request_optional_language() {
386        let json1 = r#"{}"#;
387        let request1: GetVoicesRequest = serde_json::from_str(json1).unwrap();
388        assert!(request1.language.is_none());
389
390        let json2 = r#"{"language": "en"}"#;
391        let request2: GetVoicesRequest = serde_json::from_str(json2).unwrap();
392        assert_eq!(request2.language, Some("en".to_string()));
393    }
394
395    #[test]
396    fn test_validation_empty_text() {
397        let request = SpeakRequest {
398            text: "".to_string(),
399            language: None,
400            voice_id: None,
401            rate: 1.0,
402            pitch: 1.0,
403            volume: 1.0,
404            queue_mode: QueueMode::Flush,
405        };
406
407        let result = request.validate();
408        assert!(result.is_err());
409        assert!(matches!(result.unwrap_err(), ValidationError::EmptyText));
410    }
411
412    #[test]
413    fn test_validation_text_too_long() {
414        let long_text = "x".repeat(MAX_TEXT_LENGTH + 1);
415        let request = SpeakRequest {
416            text: long_text,
417            language: None,
418            voice_id: None,
419            rate: 1.0,
420            pitch: 1.0,
421            volume: 1.0,
422            queue_mode: QueueMode::Flush,
423        };
424
425        let result = request.validate();
426        assert!(result.is_err());
427        assert!(matches!(
428            result.unwrap_err(),
429            ValidationError::TextTooLong { .. }
430        ));
431    }
432
433    #[test]
434    fn test_validation_valid_voice_id() {
435        let request = SpeakRequest {
436            text: "Hello".to_string(),
437            language: None,
438            voice_id: Some("com.apple.voice.enhanced.en-US".to_string()),
439            rate: 1.0,
440            pitch: 1.0,
441            volume: 1.0,
442            queue_mode: QueueMode::Flush,
443        };
444
445        let result = request.validate();
446        assert!(result.is_ok());
447        assert_eq!(
448            result.unwrap().voice_id,
449            Some("com.apple.voice.enhanced.en-US".to_string())
450        );
451    }
452
453    #[test]
454    fn test_validation_voice_id_too_long() {
455        let long_voice_id = "x".repeat(MAX_VOICE_ID_LENGTH + 1);
456        let request = SpeakRequest {
457            text: "Hello".to_string(),
458            language: None,
459            voice_id: Some(long_voice_id),
460            rate: 1.0,
461            pitch: 1.0,
462            volume: 1.0,
463            queue_mode: QueueMode::Flush,
464        };
465
466        let result = request.validate();
467        assert!(result.is_err());
468        assert!(matches!(
469            result.unwrap_err(),
470            ValidationError::VoiceIdTooLong { .. }
471        ));
472    }
473
474    #[test]
475    fn test_validation_rate_clamping() {
476        let request = SpeakRequest {
477            text: "Hello".to_string(),
478            language: None,
479            voice_id: None,
480            rate: 999.0,
481            pitch: 1.0,
482            volume: 1.0,
483            queue_mode: QueueMode::Flush,
484        };
485
486        let result = request.validate();
487        assert!(result.is_ok());
488        let validated = result.unwrap();
489        assert_eq!(validated.rate, 4.0); // Clamped to max
490    }
491
492    #[test]
493    fn test_validation_pitch_clamping() {
494        let request = SpeakRequest {
495            text: "Hello".to_string(),
496            language: None,
497            voice_id: None,
498            rate: 1.0,
499            pitch: 0.1,
500            volume: 1.0,
501            queue_mode: QueueMode::Flush,
502        };
503
504        let result = request.validate();
505        assert!(result.is_ok());
506        let validated = result.unwrap();
507        assert_eq!(validated.pitch, 0.5); // Clamped to min
508    }
509
510    #[test]
511    fn test_validation_volume_clamping() {
512        let request = SpeakRequest {
513            text: "Hello".to_string(),
514            language: None,
515            voice_id: None,
516            rate: 1.0,
517            pitch: 1.0,
518            volume: 5.0,
519            queue_mode: QueueMode::Flush,
520        };
521
522        let result = request.validate();
523        assert!(result.is_ok());
524        let validated = result.unwrap();
525        assert_eq!(validated.volume, 1.0); // Clamped to max
526    }
527
528    #[test]
529    fn test_preview_voice_validation() {
530        // Valid preview
531        let valid = PreviewVoiceRequest {
532            voice_id: "valid-voice_123".to_string(),
533            text: None,
534        };
535        assert!(valid.validate().is_ok());
536
537        // Invalid voice_id
538        let invalid = PreviewVoiceRequest {
539            voice_id: "invalid<script>".to_string(),
540            text: None,
541        };
542        assert!(invalid.validate().is_err());
543    }
544
545    #[test]
546    fn test_preview_voice_sample_text() {
547        let without_text = PreviewVoiceRequest {
548            voice_id: "voice".to_string(),
549            text: None,
550        };
551        assert_eq!(
552            without_text.sample_text(),
553            PreviewVoiceRequest::DEFAULT_SAMPLE_TEXT
554        );
555
556        let with_text = PreviewVoiceRequest {
557            voice_id: "voice".to_string(),
558            text: Some("Custom sample".to_string()),
559        };
560        assert_eq!(with_text.sample_text(), "Custom sample");
561    }
562}