wavekat_tts/types.rs
1/// A TTS synthesis request.
2///
3/// Backend-agnostic parameters that describe what to synthesize.
4/// Each backend interprets `voice`, `instruction`, and `language` according to
5/// its own capabilities; unsupported fields are silently ignored.
6#[derive(Debug, Clone)]
7pub struct SynthesizeRequest<'a> {
8 /// Text to synthesize.
9 pub text: &'a str,
10
11 /// Voice identifier (backend-specific).
12 ///
13 /// Used by backends with a fixed speaker catalog:
14 /// - Edge-TTS: `"zh-CN-XiaoxiaoNeural"`, `"zh-CN-YunxiNeural"`, …
15 /// - Kokoro: `"af_heart"`, `"zf_xiaobei"`, …
16 ///
17 /// `None` uses the backend's default voice.
18 pub voice: Option<&'a str>,
19
20 /// Free-form voice instruction / style prompt.
21 ///
22 /// Used by instruction-following backends (e.g. Qwen3-TTS VoiceDesign).
23 /// The text describes how the model should speak:
24 ///
25 /// ```text
26 /// "Speak in a calm, professional tone."
27 /// "Narrate with warmth and a gentle pace."
28 /// "Respond with high energy and enthusiasm!"
29 /// ```
30 ///
31 /// `None` lets the backend use its default voice character.
32 pub instruction: Option<&'a str>,
33
34 /// Language / locale code.
35 ///
36 /// E.g. `"zh"`, `"en"`, `"ja"`.
37 /// `None` uses the backend's default or auto-detects.
38 pub language: Option<&'a str>,
39
40 /// Speed multiplier. `1.0` is normal speed.
41 ///
42 /// Values below 1.0 slow down, above 1.0 speed up.
43 /// Not all backends support this; unsupported values are ignored.
44 pub speed: Option<f32>,
45}
46
47impl<'a> SynthesizeRequest<'a> {
48 /// Create a minimal request with just text.
49 pub fn new(text: &'a str) -> Self {
50 Self {
51 text,
52 voice: None,
53 instruction: None,
54 language: None,
55 speed: None,
56 }
57 }
58
59 /// Set the voice identifier.
60 pub fn with_voice(mut self, voice: &'a str) -> Self {
61 self.voice = Some(voice);
62 self
63 }
64
65 /// Set the voice instruction / style prompt.
66 pub fn with_instruction(mut self, instruction: &'a str) -> Self {
67 self.instruction = Some(instruction);
68 self
69 }
70
71 /// Set the language.
72 pub fn with_language(mut self, language: &'a str) -> Self {
73 self.language = Some(language);
74 self
75 }
76
77 /// Set the speed multiplier.
78 pub fn with_speed(mut self, speed: f32) -> Self {
79 self.speed = Some(speed);
80 self
81 }
82}
83
84/// Metadata about a voice available in a backend.
85#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
86pub struct VoiceInfo {
87 /// Backend-specific voice identifier.
88 pub id: String,
89
90 /// Human-readable display name.
91 pub name: String,
92
93 /// Supported language / locale codes.
94 pub languages: Vec<String>,
95
96 /// Gender hint, if available.
97 pub gender: Option<Gender>,
98}
99
100/// Voice gender hint.
101#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
102pub enum Gender {
103 /// Male voice.
104 Male,
105 /// Female voice.
106 Female,
107 /// Gender-neutral or unspecified voice.
108 Neutral,
109}