1use crate::shared::response_wrapper::OpenAIError;
2use crate::shared::types::FileMeta;
3use derive_builder::Builder;
4use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Serialize, Default, Clone, strum::Display)]
7pub enum SttResponseFormat {
8 #[default]
9 #[strum(serialize = "json")]
10 Json,
11 #[strum(serialize = "text")]
12 Text,
13 #[strum(serialize = "srt")]
14 Srt,
15 #[strum(serialize = "verbose_json")]
16 VerboseJson,
17 #[strum(serialize = "vtt")]
18 Vtt,
19}
20
21#[derive(Debug, Serialize, Default, Clone, strum::Display)]
22pub enum TtsResponseFormat {
23 #[default]
24 #[strum(serialize = "mp3")]
25 Mp3,
26 #[strum(serialize = "opus")]
27 Opus,
28 #[strum(serialize = "aac")]
29 Aac,
30 #[strum(serialize = "flac")]
31 Flac,
32 #[strum(serialize = "wav")]
33 Wav,
34 #[strum(serialize = "pcm")]
35 Pcm,
36}
37
38#[derive(Debug, Serialize, Default, Clone, strum::Display)]
39pub enum Language {
40 #[default]
41 #[strum(serialize = "en")]
42 English,
43 #[strum(serialize = "zh")]
44 Chinese,
45 #[strum(serialize = "de")]
46 German,
47 #[strum(serialize = "es")]
48 Spanish,
49 #[strum(serialize = "ru")]
50 Russian,
51 #[strum(serialize = "ko")]
52 Korean,
53 #[strum(serialize = "fr")]
54 French,
55 #[strum(serialize = "ja")]
56 Japanese,
57 #[strum(serialize = "pt")]
58 Portuguese,
59 #[strum(serialize = "tr")]
60 Turkish,
61 #[strum(serialize = "pl")]
62 Polish,
63 #[strum(serialize = "ca")]
64 Catalan,
65 #[strum(serialize = "nl")]
66 Dutch,
67 #[strum(serialize = "ar")]
68 Arabic,
69 #[strum(serialize = "sv")]
70 Swedish,
71 #[strum(serialize = "it")]
72 Italian,
73 #[strum(serialize = "id")]
74 Indonesian,
75 #[strum(serialize = "hi")]
76 Hindi,
77 #[strum(serialize = "fi")]
78 Finnish,
79 #[strum(serialize = "vi")]
80 Vietnamese,
81 #[strum(serialize = "he")]
82 Hebrew,
83 #[strum(serialize = "uk")]
84 Ukrainian,
85 #[strum(serialize = "el")]
86 Greek,
87 #[strum(serialize = "ms")]
88 Malay,
89 #[strum(serialize = "cs")]
90 Czech,
91 #[strum(serialize = "ro")]
92 Romanian,
93 #[strum(serialize = "da")]
94 Danish,
95 #[strum(serialize = "hu")]
96 Hungarian,
97 #[strum(serialize = "ta")]
98 Tamil,
99 #[strum(serialize = "no")]
100 Norwegian,
101 #[strum(serialize = "th")]
102 Thai,
103 #[strum(serialize = "ur")]
104 Urdu,
105 #[strum(serialize = "hr")]
106 Croatian,
107 #[strum(serialize = "bg")]
108 Bulgarian,
109 #[strum(serialize = "lt")]
110 Lithuanian,
111 #[strum(serialize = "la")]
112 Latin,
113 #[strum(serialize = "mi")]
114 Maori,
115 #[strum(serialize = "ml")]
116 Malayalam,
117 #[strum(serialize = "cy")]
118 Welsh,
119 #[strum(serialize = "sk")]
120 Slovak,
121 #[strum(serialize = "te")]
122 Telugu,
123 #[strum(serialize = "fa")]
124 Persian,
125 #[strum(serialize = "lv")]
126 Latvian,
127 #[strum(serialize = "bn")]
128 Bengali,
129 #[strum(serialize = "sr")]
130 Serbian,
131 #[strum(serialize = "az")]
132 Azerbaijani,
133 #[strum(serialize = "sl")]
134 Slovenian,
135 #[strum(serialize = "kn")]
136 Kannada,
137 #[strum(serialize = "et")]
138 Estonian,
139 #[strum(serialize = "mk")]
140 Macedonian,
141 #[strum(serialize = "br")]
142 Breton,
143 #[strum(serialize = "eu")]
144 Basque,
145 #[strum(serialize = "is")]
146 Icelandic,
147 #[strum(serialize = "hy")]
148 Armenian,
149 #[strum(serialize = "ne")]
150 Nepali,
151 #[strum(serialize = "mn")]
152 Mongolian,
153 #[strum(serialize = "bs")]
154 Bosnian,
155 #[strum(serialize = "kk")]
156 Kazakh,
157 #[strum(serialize = "sq")]
158 Albanian,
159 #[strum(serialize = "sw")]
160 Swahili,
161 #[strum(serialize = "gl")]
162 Galician,
163 #[strum(serialize = "mr")]
164 Marathi,
165 #[strum(serialize = "pa")]
166 Punjabi,
167 #[strum(serialize = "si")]
168 Sinhala,
169 #[strum(serialize = "km")]
170 Khmer,
171 #[strum(serialize = "sn")]
172 Shona,
173 #[strum(serialize = "yo")]
174 Yoruba,
175 #[strum(serialize = "so")]
176 Somali,
177 #[strum(serialize = "af")]
178 Afrikaans,
179 #[strum(serialize = "oc")]
180 Occitan,
181 #[strum(serialize = "ka")]
182 Georgian,
183 #[strum(serialize = "be")]
184 Belarusian,
185 #[strum(serialize = "tg")]
186 Tajik,
187 #[strum(serialize = "sd")]
188 Sindhi,
189 #[strum(serialize = "gu")]
190 Gujarati,
191 #[strum(serialize = "am")]
192 Amharic,
193 #[strum(serialize = "yi")]
194 Yiddish,
195 #[strum(serialize = "lo")]
196 Lao,
197 #[strum(serialize = "uz")]
198 Uzbek,
199 #[strum(serialize = "fo")]
200 Faroese,
201 #[strum(serialize = "ht")]
202 HaitianCreole,
203 #[strum(serialize = "ps")]
204 Pashto,
205 #[strum(serialize = "tk")]
206 Turkmen,
207 #[strum(serialize = "nn")]
208 Nynorsk,
209 #[strum(serialize = "mt")]
210 Maltese,
211 #[strum(serialize = "sa")]
212 Sanskrit,
213 #[strum(serialize = "lb")]
214 Luxembourgish,
215 #[strum(serialize = "my")]
216 Myanmar,
217 #[strum(serialize = "bo")]
218 Tibetan,
219 #[strum(serialize = "tl")]
220 Tagalog,
221 #[strum(serialize = "mg")]
222 Malagasy,
223 #[strum(serialize = "as")]
224 Assamese,
225 #[strum(serialize = "tt")]
226 Tatar,
227 #[strum(serialize = "haw")]
228 Hawaiian,
229 #[strum(serialize = "ln")]
230 Lingala,
231 #[strum(serialize = "ha")]
232 Hausa,
233 #[strum(serialize = "ba")]
234 Bashkir,
235 #[strum(serialize = "jw")]
236 Javanese,
237 #[strum(serialize = "su")]
238 Sundanese,
239}
240
241#[derive(Debug, Serialize, Default, Clone, strum::Display)]
242pub enum Voice {
243 #[default]
244 #[strum(serialize = "alloy")]
245 Alloy,
246 #[strum(serialize = "echo")]
247 Echo,
248 #[strum(serialize = "fable")]
249 Fable,
250 #[strum(serialize = "onyx")]
251 Onyx,
252 #[strum(serialize = "nova")]
253 Nova,
254 #[strum(serialize = "shimmer")]
255 Shimmer,
256}
257
258#[derive(Debug, Serialize, Default, Clone, strum::Display)]
259pub enum SttModel {
260 #[default]
261 #[strum(serialize = "whisper-1")]
262 Whisper1,
263}
264
265#[derive(Debug, Serialize, Default, Clone, strum::Display)]
266pub enum AudioSpeechModel {
267 #[default]
268 #[strum(serialize = "tts-1")]
269 Whisper1,
270 #[strum(serialize = "tts-1-hd")]
271 Whisper1Hd,
272}
273
274#[derive(Builder, Clone, Debug, Default, Serialize)]
275#[builder(name = "CreateSpeechRequestBuilder")]
276#[builder(pattern = "mutable")]
277#[builder(setter(into, strip_option), default)]
278#[builder(derive(Debug))]
279#[builder(build_fn(error = "OpenAIError"))]
280pub struct CreateSpeechRequest {
281 pub model: AudioSpeechModel,
283
284 pub input: String,
286
287 pub voice: Voice,
290
291 pub response_format: Option<SttResponseFormat>, #[serde(skip_serializing_if = "Option::is_none")]
297 pub speed: Option<f32>, }
299
300#[derive(Builder, Clone, Debug, Default, Serialize)]
301#[builder(name = "CreateTranscriptionRequestBuilder")]
302#[builder(pattern = "mutable")]
303#[builder(setter(into, strip_option), default)]
304#[builder(derive(Debug))]
305#[builder(build_fn(error = "OpenAIError"))]
306pub struct CreateTranscriptionRequest {
307 pub file: FileMeta,
309
310 pub model: SttModel,
312
313 #[serde(skip_serializing_if = "Option::is_none")]
316 pub prompt: Option<String>,
317
318 #[serde(skip_serializing_if = "Option::is_none")]
320 pub response_format: Option<SttResponseFormat>, #[serde(skip_serializing_if = "Option::is_none")]
326 pub temperature: Option<f32>, #[serde(skip_serializing_if = "Option::is_none")]
330 pub language: Option<Language>,
331}
332
333#[derive(Builder, Clone, Debug, Default, Serialize)]
334#[builder(name = "CreateTranslationRequestBuilder")]
335#[builder(pattern = "mutable")]
336#[builder(setter(into, strip_option), default)]
337#[builder(derive(Debug))]
338#[builder(build_fn(error = "OpenAIError"))]
339pub struct CreateTranslationRequest {
340 pub file: FileMeta,
342
343 pub model: SttModel,
345
346 #[serde(skip_serializing_if = "Option::is_none")]
349 pub prompt: Option<String>,
350
351 #[serde(skip_serializing_if = "Option::is_none")]
353 pub response_format: Option<SttResponseFormat>, #[serde(skip_serializing_if = "Option::is_none")]
359 pub temperature: Option<f32>, }
361
362#[derive(Debug, Deserialize, Clone, Serialize)]
363pub struct VerboseJsonForAudioResponse {
364 pub task: Option<String>,
365 pub language: Option<String>,
366 pub duration: Option<f32>,
367 pub segments: Option<Vec<Segment>>,
368 pub text: String,
369}
370
371#[derive(Debug, Deserialize, Clone, Serialize)]
372pub struct Segment {
373 pub id: u32,
374 pub seek: u32,
375 pub start: f32,
376 pub end: f32,
377 pub text: String,
378 pub tokens: Vec<u32>,
379 pub temperature: f32,
380 pub avg_logprob: f32,
381 pub compression_ratio: f32,
382 pub no_speech_prob: f32,
383}