dynamo_async_openai/
audio.rs

1// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
5// Original Copyright (c) 2022 Himanshu Neema
6// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
7//
8// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
9// Licensed under Apache 2.0
10
11use bytes::Bytes;
12
13use crate::{
14    Client,
15    config::Config,
16    error::OpenAIError,
17    types::{
18        CreateSpeechRequest, CreateSpeechResponse, CreateTranscriptionRequest,
19        CreateTranscriptionResponseJson, CreateTranscriptionResponseVerboseJson,
20        CreateTranslationRequest, CreateTranslationResponseJson,
21        CreateTranslationResponseVerboseJson,
22    },
23};
24
25/// Turn audio into text or text into audio.
26/// Related guide: [Speech to text](https://platform.openai.com/docs/guides/speech-to-text)
27pub struct Audio<'c, C: Config> {
28    client: &'c Client<C>,
29}
30
31impl<'c, C: Config> Audio<'c, C> {
32    pub fn new(client: &'c Client<C>) -> Self {
33        Self { client }
34    }
35
36    /// Transcribes audio into the input language.
37    #[crate::byot(
38        T0 = Clone,
39        R = serde::de::DeserializeOwned,
40        where_clause =  "reqwest::multipart::Form: crate::traits::AsyncTryFrom<T0, Error = OpenAIError>",
41    )]
42    pub async fn transcribe(
43        &self,
44        request: CreateTranscriptionRequest,
45    ) -> Result<CreateTranscriptionResponseJson, OpenAIError> {
46        self.client
47            .post_form("/audio/transcriptions", request)
48            .await
49    }
50
51    /// Transcribes audio into the input language.
52    #[crate::byot(
53        T0 = Clone,
54        R = serde::de::DeserializeOwned,
55        where_clause =  "reqwest::multipart::Form: crate::traits::AsyncTryFrom<T0, Error = OpenAIError>",
56    )]
57    pub async fn transcribe_verbose_json(
58        &self,
59        request: CreateTranscriptionRequest,
60    ) -> Result<CreateTranscriptionResponseVerboseJson, OpenAIError> {
61        self.client
62            .post_form("/audio/transcriptions", request)
63            .await
64    }
65
66    /// Transcribes audio into the input language.
67    pub async fn transcribe_raw(
68        &self,
69        request: CreateTranscriptionRequest,
70    ) -> Result<Bytes, OpenAIError> {
71        self.client
72            .post_form_raw("/audio/transcriptions", request)
73            .await
74    }
75
76    /// Translates audio into English.
77    #[crate::byot(
78        T0 = Clone,
79        R = serde::de::DeserializeOwned,
80        where_clause =  "reqwest::multipart::Form: crate::traits::AsyncTryFrom<T0, Error = OpenAIError>",
81    )]
82    pub async fn translate(
83        &self,
84        request: CreateTranslationRequest,
85    ) -> Result<CreateTranslationResponseJson, OpenAIError> {
86        self.client.post_form("/audio/translations", request).await
87    }
88
89    /// Translates audio into English.
90    #[crate::byot(
91        T0 = Clone,
92        R = serde::de::DeserializeOwned,
93        where_clause =  "reqwest::multipart::Form: crate::traits::AsyncTryFrom<T0, Error = OpenAIError>",
94    )]
95    pub async fn translate_verbose_json(
96        &self,
97        request: CreateTranslationRequest,
98    ) -> Result<CreateTranslationResponseVerboseJson, OpenAIError> {
99        self.client.post_form("/audio/translations", request).await
100    }
101
102    /// Transcribes audio into the input language.
103    pub async fn translate_raw(
104        &self,
105        request: CreateTranslationRequest,
106    ) -> Result<Bytes, OpenAIError> {
107        self.client
108            .post_form_raw("/audio/translations", request)
109            .await
110    }
111
112    /// Generates audio from the input text.
113    pub async fn speech(
114        &self,
115        request: CreateSpeechRequest,
116    ) -> Result<CreateSpeechResponse, OpenAIError> {
117        let bytes = self.client.post_raw("/audio/speech", request).await?;
118
119        Ok(CreateSpeechResponse { bytes })
120    }
121}