1#[cfg(feature = "kokoro")]
4pub mod kokoro;
5
6#[cfg(feature = "omnivoice")]
7pub mod omnivoice;
8
9#[cfg(feature = "qwen3-tts")]
10pub mod qwen3_tts;
11
12#[cfg(feature = "vibevoice")]
13pub mod vibevoice;
14
15#[cfg(feature = "vibevoice")]
16pub mod vibevoice_realtime;
17
18#[cfg(feature = "voxtral")]
19pub mod voxtral;
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub struct ModelAssetRequirement {
24 pub pattern: &'static str,
25 pub required: bool,
26 pub purpose: &'static str,
27}
28
29const KOKORO_ASSETS: &[ModelAssetRequirement] = &[
30 ModelAssetRequirement {
31 pattern: "config.json",
32 required: true,
33 purpose: "Model architecture and phoneme vocabulary.",
34 },
35 ModelAssetRequirement {
36 pattern: "model.safetensors | *.pth",
37 required: true,
38 purpose: "Main Kokoro weights.",
39 },
40 ModelAssetRequirement {
41 pattern: "voices/*.pt",
42 required: false,
43 purpose: "Preset voice packs for named-voice synthesis.",
44 },
45];
46
47const OMNIVOICE_ASSETS: &[ModelAssetRequirement] = &[
48 ModelAssetRequirement {
49 pattern: "config.json",
50 required: true,
51 purpose: "Main OmniVoice config.",
52 },
53 ModelAssetRequirement {
54 pattern: "tokenizer.json",
55 required: true,
56 purpose: "Text tokenizer.",
57 },
58 ModelAssetRequirement {
59 pattern: "model.safetensors | model-*-of-*.safetensors",
60 required: true,
61 purpose: "Main OmniVoice weights.",
62 },
63 ModelAssetRequirement {
64 pattern: "audio_tokenizer/config.json",
65 required: true,
66 purpose: "Codec decoder config.",
67 },
68 ModelAssetRequirement {
69 pattern: "audio_tokenizer/model.safetensors | audio_tokenizer/model-*-of-*.safetensors",
70 required: true,
71 purpose: "Codec decoder weights.",
72 },
73];
74
75const QWEN3_TTS_ASSETS: &[ModelAssetRequirement] = &[
76 ModelAssetRequirement {
77 pattern: "config.json",
78 required: true,
79 purpose: "Main talker/code-predictor config.",
80 },
81 ModelAssetRequirement {
82 pattern: "tokenizer.json",
83 required: true,
84 purpose: "Text tokenizer.",
85 },
86 ModelAssetRequirement {
87 pattern: "model.safetensors | model-*-of-*.safetensors",
88 required: true,
89 purpose: "Main Qwen3-TTS weights.",
90 },
91 ModelAssetRequirement {
92 pattern: "speech_tokenizer/model.safetensors | speech_tokenizer/model-*-of-*.safetensors",
93 required: true,
94 purpose: "Speech-tokenizer decoder weights.",
95 },
96 ModelAssetRequirement {
97 pattern: "speech_tokenizer/config.json",
98 required: false,
99 purpose: "Optional speech-tokenizer config when it is stored beside the main assets.",
100 },
101];
102
103const VIBEVOICE_ASSETS: &[ModelAssetRequirement] = &[
104 ModelAssetRequirement {
105 pattern: "config.json",
106 required: true,
107 purpose: "Main VibeVoice config.",
108 },
109 ModelAssetRequirement {
110 pattern: "tokenizer.json",
111 required: true,
112 purpose: "Text tokenizer.",
113 },
114 ModelAssetRequirement {
115 pattern: "model.safetensors | model-*-of-*.safetensors",
116 required: true,
117 purpose: "Unified VibeVoice weights.",
118 },
119 ModelAssetRequirement {
120 pattern: "preprocessor_config.json",
121 required: false,
122 purpose: "Published preprocessing defaults.",
123 },
124];
125
126const VIBEVOICE_REALTIME_ASSETS: &[ModelAssetRequirement] = &[
127 ModelAssetRequirement {
128 pattern: "config.json",
129 required: true,
130 purpose: "Main VibeVoice Realtime config.",
131 },
132 ModelAssetRequirement {
133 pattern: "tokenizer.json",
134 required: true,
135 purpose: "Text tokenizer.",
136 },
137 ModelAssetRequirement {
138 pattern: "model.safetensors",
139 required: true,
140 purpose: "Realtime VibeVoice weights.",
141 },
142 ModelAssetRequirement {
143 pattern: "preprocessor_config.json",
144 required: false,
145 purpose: "Published preprocessing defaults.",
146 },
147 ModelAssetRequirement {
148 pattern: "voices/*.pt",
149 required: false,
150 purpose: "Optional cached-prompt voice presets from the upstream demo bundle.",
151 },
152];
153
154const VOXTRAL_ASSETS: &[ModelAssetRequirement] = &[
155 ModelAssetRequirement {
156 pattern: "params.json",
157 required: true,
158 purpose: "Main Voxtral config.",
159 },
160 ModelAssetRequirement {
161 pattern: "tekken.json",
162 required: true,
163 purpose: "Tekken tokenizer.",
164 },
165 ModelAssetRequirement {
166 pattern: "consolidated.safetensors",
167 required: true,
168 purpose: "Main Voxtral weights.",
169 },
170 ModelAssetRequirement {
171 pattern: "voice_embedding/*.pt",
172 required: true,
173 purpose: "Preset voice embeddings.",
174 },
175];
176
177#[derive(Debug, Clone, Copy, PartialEq, Eq)]
179pub enum ModelType {
180 Kokoro,
182 OmniVoice,
184 Qwen3Tts,
186 VibeVoice,
188 VibeVoiceRealtime,
190 Voxtral,
192}
193
194impl ModelType {
195 pub fn asset_requirements(self) -> &'static [ModelAssetRequirement] {
197 match self {
198 Self::Kokoro => KOKORO_ASSETS,
199 Self::OmniVoice => OMNIVOICE_ASSETS,
200 Self::Qwen3Tts => QWEN3_TTS_ASSETS,
201 Self::VibeVoice => VIBEVOICE_ASSETS,
202 Self::VibeVoiceRealtime => VIBEVOICE_REALTIME_ASSETS,
203 Self::Voxtral => VOXTRAL_ASSETS,
204 }
205 }
206}