Skip to main content

quantum_sdk/
audio.rs

1use std::collections::HashMap;
2
3use serde::{Deserialize, Serialize};
4
5use crate::client::Client;
6use crate::error::Result;
7
8/// Request body for text-to-speech.
9#[derive(Debug, Clone, Serialize, Default)]
10pub struct TtsRequest {
11    /// TTS model (e.g. "tts-1", "eleven_multilingual_v2", "grok-3-tts").
12    pub model: String,
13
14    /// Text to synthesise into speech.
15    pub text: String,
16
17    /// Voice to use (e.g. "alloy", "echo", "nova", "Rachel").
18    #[serde(skip_serializing_if = "Option::is_none")]
19    pub voice: Option<String>,
20
21    /// Audio format (e.g. "mp3", "wav", "opus"). Default: "mp3".
22    #[serde(rename = "format", skip_serializing_if = "Option::is_none")]
23    pub output_format: Option<String>,
24
25    /// Speech rate (provider-dependent).
26    #[serde(skip_serializing_if = "Option::is_none")]
27    pub speed: Option<f64>,
28}
29
30/// Response from text-to-speech.
31#[derive(Debug, Clone, Deserialize)]
32pub struct TtsResponse {
33    /// Base64-encoded audio data.
34    pub audio_base64: String,
35
36    /// Audio format (e.g. "mp3").
37    pub format: String,
38
39    /// Audio file size.
40    pub size_bytes: i64,
41
42    /// Model that generated the audio.
43    pub model: String,
44
45    /// Total cost in ticks.
46    #[serde(default)]
47    pub cost_ticks: i64,
48
49    /// Unique request identifier.
50    #[serde(default)]
51    pub request_id: String,
52}
53
54/// Request body for speech-to-text.
55#[derive(Debug, Clone, Serialize, Default)]
56pub struct SttRequest {
57    /// STT model (e.g. "whisper-1", "scribe_v2").
58    pub model: String,
59
60    /// Base64-encoded audio data.
61    pub audio_base64: String,
62
63    /// Original filename (helps with format detection). Default: "audio.mp3".
64    #[serde(skip_serializing_if = "Option::is_none")]
65    pub filename: Option<String>,
66
67    /// BCP-47 language code hint (e.g. "en", "de").
68    #[serde(skip_serializing_if = "Option::is_none")]
69    pub language: Option<String>,
70}
71
72/// Response from speech-to-text.
73#[derive(Debug, Clone, Deserialize)]
74pub struct SttResponse {
75    /// Transcribed text.
76    pub text: String,
77
78    /// Model that performed transcription.
79    pub model: String,
80
81    /// Total cost in ticks.
82    #[serde(default)]
83    pub cost_ticks: i64,
84
85    /// Unique request identifier.
86    #[serde(default)]
87    pub request_id: String,
88}
89
90/// Request body for music generation.
91#[derive(Debug, Clone, Serialize, Default)]
92pub struct MusicRequest {
93    /// Music generation model (e.g. "lyria").
94    pub model: String,
95
96    /// Describes the music to generate.
97    pub prompt: String,
98
99    /// Target duration in seconds (default 30).
100    #[serde(skip_serializing_if = "Option::is_none")]
101    pub duration_seconds: Option<i32>,
102}
103
104/// Response from music generation.
105#[derive(Debug, Clone, Deserialize)]
106pub struct MusicResponse {
107    /// Generated music clips.
108    #[serde(default)]
109    pub audio_clips: Vec<MusicClip>,
110
111    /// Model that generated the music.
112    #[serde(default)]
113    pub model: String,
114
115    /// Total cost in ticks.
116    #[serde(default)]
117    pub cost_ticks: i64,
118
119    /// Unique request identifier.
120    #[serde(default)]
121    pub request_id: String,
122}
123
124/// A single generated music clip.
125#[derive(Debug, Clone, Deserialize)]
126pub struct MusicClip {
127    /// Base64-encoded audio data.
128    pub base64: String,
129
130    /// Audio format (e.g. "mp3", "wav").
131    #[serde(default)]
132    pub format: String,
133
134    /// Audio file size.
135    #[serde(default)]
136    pub size_bytes: i64,
137
138    /// Clip index within the batch.
139    #[serde(default)]
140    pub index: i32,
141}
142
143/// Request body for sound effects generation.
144#[derive(Debug, Clone, Serialize, Default)]
145pub struct SoundEffectRequest {
146    /// Text prompt describing the sound effect.
147    pub prompt: String,
148
149    /// Optional duration in seconds.
150    #[serde(skip_serializing_if = "Option::is_none")]
151    pub duration_seconds: Option<f64>,
152}
153
154/// Response from sound effects generation.
155#[derive(Debug, Clone, Deserialize)]
156pub struct SoundEffectResponse {
157    /// Base64-encoded audio data.
158    pub audio_base64: String,
159
160    /// Audio format (e.g. "mp3").
161    pub format: String,
162
163    /// File size in bytes.
164    #[serde(default)]
165    pub size_bytes: i64,
166
167    /// Model used.
168    #[serde(default)]
169    pub model: String,
170
171    /// Total cost in ticks.
172    #[serde(default)]
173    pub cost_ticks: i64,
174
175    /// Unique request identifier.
176    #[serde(default)]
177    pub request_id: String,
178}
179
180// ---------------------------------------------------------------------------
181// Advanced Audio Types
182// ---------------------------------------------------------------------------
183
184/// Generic audio response used by multiple advanced audio endpoints.
185#[derive(Debug, Clone, Deserialize)]
186pub struct AudioResponse {
187    /// Base64-encoded audio data.
188    #[serde(default)]
189    pub audio_base64: Option<String>,
190
191    /// Audio format (e.g. "mp3", "wav").
192    #[serde(default)]
193    pub format: Option<String>,
194
195    /// File size in bytes.
196    #[serde(default)]
197    pub size_bytes: Option<i64>,
198
199    /// Model used.
200    #[serde(default)]
201    pub model: Option<String>,
202
203    /// Total cost in ticks.
204    #[serde(default)]
205    pub cost_ticks: i64,
206
207    /// Unique request identifier.
208    #[serde(default)]
209    pub request_id: String,
210
211    /// Additional response fields.
212    #[serde(flatten)]
213    pub extra: HashMap<String, serde_json::Value>,
214}
215
216/// A single dialogue turn (used for building the request — converted to text + voices).
217#[derive(Debug, Clone, Serialize, Deserialize, Default)]
218pub struct DialogueTurn {
219    /// Speaker name or identifier.
220    pub speaker: String,
221
222    /// Text for this speaker to say.
223    pub text: String,
224
225    /// Voice ID to use for this speaker.
226    #[serde(skip_serializing_if = "Option::is_none")]
227    pub voice: Option<String>,
228}
229
230/// Voice mapping for ElevenLabs dialogue.
231#[derive(Debug, Clone, Serialize)]
232pub struct DialogueVoice {
233    pub voice_id: String,
234    pub name: String,
235}
236
237/// Request body sent to the QAI proxy for dialogue generation.
238/// The proxy expects `text` (full script) + `voices` (speaker-to-voice mapping).
239#[derive(Debug, Clone, Serialize, Default)]
240pub struct DialogueRequest {
241    /// Full dialogue script (e.g. "Speaker1: Hello!\nSpeaker2: Hi there!").
242    pub text: String,
243
244    /// Voice mappings — each speaker name mapped to a voice_id.
245    pub voices: Vec<DialogueVoice>,
246
247    /// Dialogue model.
248    #[serde(skip_serializing_if = "Option::is_none")]
249    pub model: Option<String>,
250
251    /// Output audio format.
252    #[serde(rename = "output_format", skip_serializing_if = "Option::is_none")]
253    pub output_format: Option<String>,
254
255    /// Seed for reproducible generation.
256    #[serde(skip_serializing_if = "Option::is_none")]
257    pub seed: Option<i32>,
258}
259
260impl DialogueRequest {
261    /// Build a DialogueRequest from individual turns.
262    /// Converts turns into the text + voices format the API expects.
263    pub fn from_turns(turns: Vec<DialogueTurn>, model: Option<String>) -> Self {
264        // Build the script text: "Speaker: text\n..."
265        let text = turns.iter()
266            .map(|t| format!("{}: {}", t.speaker, t.text))
267            .collect::<Vec<_>>()
268            .join("\n");
269
270        // Deduplicate voices — one entry per unique speaker
271        let mut seen = std::collections::HashSet::new();
272        let voices: Vec<DialogueVoice> = turns.iter()
273            .filter(|t| t.voice.is_some() && seen.insert(t.speaker.clone()))
274            .map(|t| DialogueVoice {
275                voice_id: t.voice.clone().unwrap_or_default(),
276                name: t.speaker.clone(),
277            })
278            .collect();
279
280        Self {
281            text,
282            voices,
283            model,
284            ..Default::default()
285        }
286    }
287}
288
289/// Request body for speech-to-speech conversion.
290#[derive(Debug, Clone, Serialize, Default)]
291pub struct SpeechToSpeechRequest {
292    /// Model for conversion.
293    #[serde(skip_serializing_if = "Option::is_none")]
294    pub model: Option<String>,
295
296    /// Base64-encoded source audio.
297    pub audio_base64: String,
298
299    /// Target voice.
300    #[serde(skip_serializing_if = "Option::is_none")]
301    pub voice: Option<String>,
302
303    /// Output audio format.
304    #[serde(rename = "format", skip_serializing_if = "Option::is_none")]
305    pub output_format: Option<String>,
306}
307
308/// Request body for voice isolation.
309#[derive(Debug, Clone, Serialize, Default)]
310pub struct IsolateRequest {
311    /// Base64-encoded audio to isolate voice from.
312    pub audio_base64: String,
313
314    /// Output audio format.
315    #[serde(rename = "format", skip_serializing_if = "Option::is_none")]
316    pub output_format: Option<String>,
317}
318
319/// Request body for voice remixing.
320#[derive(Debug, Clone, Serialize, Default)]
321pub struct RemixRequest {
322    /// Base64-encoded source audio.
323    pub audio_base64: String,
324
325    /// Target voice for the remix.
326    #[serde(skip_serializing_if = "Option::is_none")]
327    pub voice: Option<String>,
328
329    /// Model for remixing.
330    #[serde(skip_serializing_if = "Option::is_none")]
331    pub model: Option<String>,
332
333    /// Output audio format.
334    #[serde(rename = "format", skip_serializing_if = "Option::is_none")]
335    pub output_format: Option<String>,
336}
337
338/// Request body for audio dubbing.
339#[derive(Debug, Clone, Serialize, Default)]
340pub struct DubRequest {
341    /// Base64-encoded source audio or video.
342    pub audio_base64: String,
343
344    /// Original filename (helps detect format).
345    #[serde(skip_serializing_if = "Option::is_none")]
346    pub filename: Option<String>,
347
348    /// Target language (BCP-47 code, e.g. "es", "de").
349    pub target_language: String,
350
351    /// Source language (auto-detected if omitted).
352    #[serde(skip_serializing_if = "Option::is_none")]
353    pub source_language: Option<String>,
354}
355
356/// Request body for audio alignment / forced alignment.
357#[derive(Debug, Clone, Serialize, Default)]
358pub struct AlignRequest {
359    /// Base64-encoded audio data.
360    pub audio_base64: String,
361
362    /// Transcript text to align against the audio.
363    pub text: String,
364
365    /// Language code.
366    #[serde(skip_serializing_if = "Option::is_none")]
367    pub language: Option<String>,
368}
369
370/// A single alignment segment.
371#[derive(Debug, Clone, Deserialize)]
372pub struct AlignmentSegment {
373    /// Aligned text.
374    pub text: String,
375
376    /// Start time in seconds.
377    pub start: f64,
378
379    /// End time in seconds.
380    pub end: f64,
381}
382
383/// Response from audio alignment.
384#[derive(Debug, Clone, Deserialize)]
385pub struct AlignResponse {
386    /// Aligned segments.
387    pub segments: Vec<AlignmentSegment>,
388
389    /// Total cost in ticks.
390    #[serde(default)]
391    pub cost_ticks: i64,
392
393    /// Unique request identifier.
394    #[serde(default)]
395    pub request_id: String,
396}
397
398/// Request body for voice design (generating a voice from a description).
399#[derive(Debug, Clone, Serialize, Default)]
400pub struct VoiceDesignRequest {
401    /// Text description of the desired voice.
402    #[serde(rename = "voice_description")]
403    pub description: String,
404
405    /// Sample text to speak with the designed voice.
406    #[serde(rename = "sample_text")]
407    pub text: String,
408
409    /// Output audio format.
410    #[serde(rename = "format", skip_serializing_if = "Option::is_none")]
411    pub output_format: Option<String>,
412}
413
414/// Request body for Starfish TTS.
415#[derive(Debug, Clone, Serialize, Default)]
416pub struct StarfishTTSRequest {
417    /// Text to synthesise.
418    pub text: String,
419
420    /// Voice identifier.
421    #[serde(skip_serializing_if = "Option::is_none")]
422    pub voice: Option<String>,
423
424    /// Output audio format.
425    #[serde(rename = "format", skip_serializing_if = "Option::is_none")]
426    pub output_format: Option<String>,
427
428    /// Speech speed multiplier.
429    #[serde(skip_serializing_if = "Option::is_none")]
430    pub speed: Option<f64>,
431}
432
433// ---------------------------------------------------------------------------
434// Eleven Music (advanced music generation with sections, finetunes, etc.)
435// ---------------------------------------------------------------------------
436
437/// A section within an Eleven Music generation request.
438#[derive(Debug, Clone, Serialize, Deserialize, Default)]
439pub struct MusicSection {
440    pub section_type: String,
441    #[serde(skip_serializing_if = "Option::is_none")]
442    pub lyrics: Option<String>,
443    #[serde(skip_serializing_if = "Option::is_none")]
444    pub style: Option<String>,
445    #[serde(skip_serializing_if = "Option::is_none")]
446    pub style_exclude: Option<String>,
447}
448
449/// Request body for advanced music generation (ElevenLabs Eleven Music).
450#[derive(Debug, Clone, Serialize, Default)]
451pub struct ElevenMusicRequest {
452    pub model: String,
453    pub prompt: String,
454    #[serde(skip_serializing_if = "Option::is_none")]
455    pub sections: Option<Vec<MusicSection>>,
456    #[serde(skip_serializing_if = "Option::is_none")]
457    pub duration_seconds: Option<i32>,
458    #[serde(skip_serializing_if = "Option::is_none")]
459    pub language: Option<String>,
460    #[serde(skip_serializing_if = "Option::is_none")]
461    pub vocals: Option<bool>,
462    #[serde(skip_serializing_if = "Option::is_none")]
463    pub style: Option<String>,
464    #[serde(skip_serializing_if = "Option::is_none")]
465    pub style_exclude: Option<String>,
466    #[serde(skip_serializing_if = "Option::is_none")]
467    pub finetune_id: Option<String>,
468    #[serde(skip_serializing_if = "Option::is_none")]
469    pub edit_reference_id: Option<String>,
470    #[serde(skip_serializing_if = "Option::is_none")]
471    pub edit_instruction: Option<String>,
472}
473
474/// A single music clip from advanced generation.
475#[derive(Debug, Clone, Deserialize)]
476pub struct ElevenMusicClip {
477    /// Base64-encoded audio data.
478    #[serde(default)]
479    pub base64: String,
480    /// Audio format (e.g. "mp3").
481    #[serde(default)]
482    pub format: String,
483    /// File size in bytes.
484    #[serde(default)]
485    pub size: i64,
486}
487
488/// Response from advanced music generation.
489/// Backend returns: { clips: [...], model, cost_ticks, request_id }
490#[derive(Debug, Clone, Deserialize)]
491pub struct ElevenMusicResponse {
492    /// Generated music clips.
493    #[serde(default)]
494    pub clips: Vec<ElevenMusicClip>,
495    /// Model used.
496    #[serde(default)]
497    pub model: String,
498    /// Total cost in ticks.
499    #[serde(default)]
500    pub cost_ticks: i64,
501    /// Unique request identifier.
502    #[serde(default)]
503    pub request_id: String,
504}
505
506/// Info about a music finetune.
507#[derive(Debug, Clone, Serialize, Deserialize)]
508pub struct FinetuneInfo {
509    pub finetune_id: String,
510    pub name: String,
511    #[serde(default)]
512    pub status: String,
513    #[serde(default)]
514    pub created_at: Option<String>,
515}
516
517/// Response from listing finetunes.
518#[derive(Debug, Clone, Deserialize)]
519pub struct ListFinetunesResponse {
520    pub finetunes: Vec<FinetuneInfo>,
521}
522
523// ---------------------------------------------------------------------------
524// Client impl
525// ---------------------------------------------------------------------------
526
527impl Client {
528    /// Generates speech from text.
529    pub async fn speak(&self, req: &TtsRequest) -> Result<TtsResponse> {
530        let (mut resp, meta) = self
531            .post_json::<TtsRequest, TtsResponse>("/qai/v1/audio/tts", req)
532            .await?;
533        if resp.cost_ticks == 0 {
534            resp.cost_ticks = meta.cost_ticks;
535        }
536        if resp.request_id.is_empty() {
537            resp.request_id = meta.request_id;
538        }
539        Ok(resp)
540    }
541
542    /// Converts speech to text.
543    pub async fn transcribe(&self, req: &SttRequest) -> Result<SttResponse> {
544        let (mut resp, meta) = self
545            .post_json::<SttRequest, SttResponse>("/qai/v1/audio/stt", req)
546            .await?;
547        if resp.cost_ticks == 0 {
548            resp.cost_ticks = meta.cost_ticks;
549        }
550        if resp.request_id.is_empty() {
551            resp.request_id = meta.request_id;
552        }
553        Ok(resp)
554    }
555
556    /// Generates sound effects from a text prompt (ElevenLabs).
557    pub async fn sound_effects(&self, req: &SoundEffectRequest) -> Result<SoundEffectResponse> {
558        let (mut resp, meta) = self
559            .post_json::<SoundEffectRequest, SoundEffectResponse>(
560                "/qai/v1/audio/sound-effects",
561                req,
562            )
563            .await?;
564        if resp.cost_ticks == 0 {
565            resp.cost_ticks = meta.cost_ticks;
566        }
567        if resp.request_id.is_empty() {
568            resp.request_id = meta.request_id;
569        }
570        Ok(resp)
571    }
572
573    /// Generates music from a text prompt.
574    pub async fn generate_music(&self, req: &MusicRequest) -> Result<MusicResponse> {
575        let (mut resp, meta) = self
576            .post_json::<MusicRequest, MusicResponse>("/qai/v1/audio/music", req)
577            .await?;
578        if resp.cost_ticks == 0 {
579            resp.cost_ticks = meta.cost_ticks;
580        }
581        if resp.request_id.is_empty() {
582            resp.request_id = meta.request_id;
583        }
584        Ok(resp)
585    }
586
587    /// Generates multi-speaker dialogue audio.
588    pub async fn dialogue(&self, req: &DialogueRequest) -> Result<AudioResponse> {
589        let (mut resp, meta) = self
590            .post_json::<DialogueRequest, AudioResponse>("/qai/v1/audio/dialogue", req)
591            .await?;
592        if resp.cost_ticks == 0 {
593            resp.cost_ticks = meta.cost_ticks;
594        }
595        if resp.request_id.is_empty() {
596            resp.request_id = meta.request_id;
597        }
598        Ok(resp)
599    }
600
601    /// Converts speech to a different voice.
602    pub async fn speech_to_speech(
603        &self,
604        req: &SpeechToSpeechRequest,
605    ) -> Result<AudioResponse> {
606        let (mut resp, meta) = self
607            .post_json::<SpeechToSpeechRequest, AudioResponse>(
608                "/qai/v1/audio/speech-to-speech",
609                req,
610            )
611            .await?;
612        if resp.cost_ticks == 0 {
613            resp.cost_ticks = meta.cost_ticks;
614        }
615        if resp.request_id.is_empty() {
616            resp.request_id = meta.request_id;
617        }
618        Ok(resp)
619    }
620
621    /// Isolates voice from background noise and music.
622    pub async fn isolate_voice(&self, req: &IsolateRequest) -> Result<AudioResponse> {
623        let (mut resp, meta) = self
624            .post_json::<IsolateRequest, AudioResponse>("/qai/v1/audio/isolate", req)
625            .await?;
626        if resp.cost_ticks == 0 {
627            resp.cost_ticks = meta.cost_ticks;
628        }
629        if resp.request_id.is_empty() {
630            resp.request_id = meta.request_id;
631        }
632        Ok(resp)
633    }
634
635    /// Remixes audio with a different voice.
636    pub async fn remix_voice(&self, req: &RemixRequest) -> Result<AudioResponse> {
637        let (mut resp, meta) = self
638            .post_json::<RemixRequest, AudioResponse>("/qai/v1/audio/remix", req)
639            .await?;
640        if resp.cost_ticks == 0 {
641            resp.cost_ticks = meta.cost_ticks;
642        }
643        if resp.request_id.is_empty() {
644            resp.request_id = meta.request_id;
645        }
646        Ok(resp)
647    }
648
649    /// Dubs audio or video into a target language.
650    pub async fn dub(&self, req: &DubRequest) -> Result<AudioResponse> {
651        let (mut resp, meta) = self
652            .post_json::<DubRequest, AudioResponse>("/qai/v1/audio/dub", req)
653            .await?;
654        if resp.cost_ticks == 0 {
655            resp.cost_ticks = meta.cost_ticks;
656        }
657        if resp.request_id.is_empty() {
658            resp.request_id = meta.request_id;
659        }
660        Ok(resp)
661    }
662
663    /// Performs forced alignment of text against audio.
664    pub async fn align(&self, req: &AlignRequest) -> Result<AlignResponse> {
665        let (mut resp, meta) = self
666            .post_json::<AlignRequest, AlignResponse>("/qai/v1/audio/align", req)
667            .await?;
668        if resp.cost_ticks == 0 {
669            resp.cost_ticks = meta.cost_ticks;
670        }
671        if resp.request_id.is_empty() {
672            resp.request_id = meta.request_id;
673        }
674        Ok(resp)
675    }
676
677    /// Designs a new voice from a text description and generates sample audio.
678    pub async fn voice_design(&self, req: &VoiceDesignRequest) -> Result<AudioResponse> {
679        let (mut resp, meta) = self
680            .post_json::<VoiceDesignRequest, AudioResponse>("/qai/v1/audio/voice-design", req)
681            .await?;
682        if resp.cost_ticks == 0 {
683            resp.cost_ticks = meta.cost_ticks;
684        }
685        if resp.request_id.is_empty() {
686            resp.request_id = meta.request_id;
687        }
688        Ok(resp)
689    }
690
691    /// Generates speech using Starfish TTS (HeyGen).
692    pub async fn starfish_tts(&self, req: &StarfishTTSRequest) -> Result<AudioResponse> {
693        let (mut resp, meta) = self
694            .post_json::<StarfishTTSRequest, AudioResponse>("/qai/v1/audio/starfish-tts", req)
695            .await?;
696        if resp.cost_ticks == 0 {
697            resp.cost_ticks = meta.cost_ticks;
698        }
699        if resp.request_id.is_empty() {
700            resp.request_id = meta.request_id;
701        }
702        Ok(resp)
703    }
704
705    /// Generates music via ElevenLabs Eleven Music (advanced: sections, finetunes, edits).
706    pub async fn generate_music_advanced(
707        &self,
708        req: &ElevenMusicRequest,
709    ) -> Result<ElevenMusicResponse> {
710        let (mut resp, meta) = self
711            .post_json::<ElevenMusicRequest, ElevenMusicResponse>(
712                "/qai/v1/audio/music/advanced",
713                req,
714            )
715            .await?;
716        if resp.cost_ticks == 0 {
717            resp.cost_ticks = meta.cost_ticks;
718        }
719        if resp.request_id.is_empty() {
720            resp.request_id = meta.request_id;
721        }
722        Ok(resp)
723    }
724
725    /// Lists all music finetunes for the authenticated user.
726    pub async fn list_finetunes(&self) -> Result<ListFinetunesResponse> {
727        let (resp, _) = self
728            .get_json::<ListFinetunesResponse>("/qai/v1/audio/finetunes")
729            .await?;
730        Ok(resp)
731    }
732
733    /// Creates a new music finetune from audio sample files.
734    pub async fn create_finetune(
735        &self,
736        name: &str,
737        files: Vec<crate::voices::CloneVoiceFile>,
738    ) -> Result<FinetuneInfo> {
739        let mut form = reqwest::multipart::Form::new().text("name", name.to_string());
740
741        for file in files {
742            let part = reqwest::multipart::Part::bytes(file.data)
743                .file_name(file.filename)
744                .mime_str(&file.mime_type)
745                .map_err(|e| crate::error::Error::Http(e.into()))?;
746            form = form.part("files", part);
747        }
748
749        let (resp, _) = self
750            .post_multipart::<FinetuneInfo>("/qai/v1/audio/finetunes", form)
751            .await?;
752        Ok(resp)
753    }
754
755    /// Deletes a music finetune by ID.
756    pub async fn delete_finetune(&self, id: &str) -> Result<serde_json::Value> {
757        let path = format!("/qai/v1/audio/finetunes/{id}");
758        let (resp, _) = self.delete_json::<serde_json::Value>(&path).await?;
759        Ok(resp)
760    }
761}