whis_core/provider/
deepgram.rs

1//! Deepgram Nova transcription provider
2//!
3//! Deepgram uses a different API format than OpenAI-style providers:
4//! - Raw audio bytes in request body (not multipart form)
5//! - Options passed as query parameters
6//! - Different response JSON structure
7
8use anyhow::{Context, Result};
9use async_trait::async_trait;
10use serde::Deserialize;
11
12use super::base::retry::{RetryConfig, is_rate_limited, is_retryable_error, is_retryable_status};
13use super::{
14    DEFAULT_TIMEOUT_SECS, TranscriptionBackend, TranscriptionRequest, TranscriptionResult,
15    TranscriptionStage,
16};
17
18const API_URL: &str = "https://api.deepgram.com/v1/listen";
19const MODEL: &str = "nova-2";
20
21#[derive(Deserialize)]
22struct Response {
23    results: Results,
24}
25
26#[derive(Deserialize)]
27struct Results {
28    channels: Vec<Channel>,
29}
30
31#[derive(Deserialize)]
32struct Channel {
33    alternatives: Vec<Alternative>,
34}
35
36#[derive(Deserialize)]
37struct Alternative {
38    transcript: String,
39}
40
41/// Deepgram Nova transcription provider
42///
43/// Uses Deepgram's REST API with Nova-2 model.
44/// Offers fast transcription at $0.26/hour with good accuracy.
45#[derive(Debug, Default, Clone)]
46pub struct DeepgramProvider;
47
48#[async_trait]
49impl TranscriptionBackend for DeepgramProvider {
50    fn name(&self) -> &'static str {
51        "deepgram"
52    }
53
54    fn display_name(&self) -> &'static str {
55        "Deepgram Nova"
56    }
57
58    fn transcribe_sync(
59        &self,
60        api_key: &str,
61        request: TranscriptionRequest,
62    ) -> Result<TranscriptionResult> {
63        // Report uploading stage
64        request.report(TranscriptionStage::Uploading);
65
66        let client = reqwest::blocking::Client::builder()
67            .timeout(std::time::Duration::from_secs(DEFAULT_TIMEOUT_SECS))
68            .build()
69            .context("Failed to create HTTP client")?;
70
71        let mut url = reqwest::Url::parse(API_URL).context("Failed to parse Deepgram URL")?;
72        url.query_pairs_mut()
73            .append_pair("model", MODEL)
74            .append_pair("smart_format", "true");
75
76        if let Some(lang) = &request.language {
77            url.query_pairs_mut().append_pair("language", lang);
78        }
79
80        let config = RetryConfig::default();
81        let mut attempt = 0;
82
83        loop {
84            // Report transcribing stage
85            request.report(TranscriptionStage::Transcribing);
86
87            let result = client
88                .post(url.clone())
89                .header("Authorization", format!("Token {api_key}"))
90                .header("Content-Type", &request.mime_type)
91                .body(request.audio_data.clone())
92                .send();
93
94            match result {
95                Ok(response) => {
96                    let status = response.status();
97
98                    if status.is_success() {
99                        let text = response.text().context("Failed to get response text")?;
100                        let resp: Response = serde_json::from_str(&text)
101                            .context("Failed to parse Deepgram API response")?;
102
103                        let transcript = resp
104                            .results
105                            .channels
106                            .first()
107                            .and_then(|c| c.alternatives.first())
108                            .map(|a| a.transcript.clone())
109                            .ok_or_else(|| {
110                                anyhow::anyhow!(
111                                    "Deepgram API returned unexpected response format: no transcript found"
112                                )
113                            })?;
114
115                        return Ok(TranscriptionResult { text: transcript });
116                    }
117
118                    // Check if error is retryable
119                    if is_retryable_status(status) && attempt < config.max_retries {
120                        let delay = config.delay_for_attempt(attempt, is_rate_limited(status));
121                        crate::verbose!(
122                            "Deepgram request failed with {} (attempt {}/{}), retrying in {:?}",
123                            status,
124                            attempt + 1,
125                            config.max_retries,
126                            delay
127                        );
128                        std::thread::sleep(delay);
129                        attempt += 1;
130                        continue;
131                    }
132
133                    // Non-retryable error or max retries exceeded
134                    let error_text = response
135                        .text()
136                        .unwrap_or_else(|_| "Unknown error".to_string());
137                    anyhow::bail!("Deepgram API error ({status}): {error_text}");
138                }
139                Err(err) => {
140                    // Check if network error is retryable
141                    if is_retryable_error(&err) && attempt < config.max_retries {
142                        let delay = config.delay_for_attempt(attempt, false);
143                        crate::verbose!(
144                            "Deepgram request failed with network error (attempt {}/{}), retrying in {:?}: {}",
145                            attempt + 1,
146                            config.max_retries,
147                            delay,
148                            err
149                        );
150                        std::thread::sleep(delay);
151                        attempt += 1;
152                        continue;
153                    }
154
155                    return Err(err).context("Failed to send request to Deepgram API");
156                }
157            }
158        }
159    }
160
161    async fn transcribe_async(
162        &self,
163        client: &reqwest::Client,
164        api_key: &str,
165        request: TranscriptionRequest,
166    ) -> Result<TranscriptionResult> {
167        // Report uploading stage
168        request.report(TranscriptionStage::Uploading);
169
170        let mut url = reqwest::Url::parse(API_URL).context("Failed to parse Deepgram URL")?;
171        url.query_pairs_mut()
172            .append_pair("model", MODEL)
173            .append_pair("smart_format", "true");
174
175        if let Some(lang) = &request.language {
176            url.query_pairs_mut().append_pair("language", lang);
177        }
178
179        let config = RetryConfig::default();
180        let mut attempt = 0;
181
182        loop {
183            // Report transcribing stage
184            request.report(TranscriptionStage::Transcribing);
185
186            let result = client
187                .post(url.clone())
188                .header("Authorization", format!("Token {api_key}"))
189                .header("Content-Type", &request.mime_type)
190                .body(request.audio_data.clone())
191                .send()
192                .await;
193
194            match result {
195                Ok(response) => {
196                    let status = response.status();
197
198                    if status.is_success() {
199                        let text = response
200                            .text()
201                            .await
202                            .context("Failed to get response text")?;
203                        let resp: Response = serde_json::from_str(&text)
204                            .context("Failed to parse Deepgram API response")?;
205
206                        let transcript = resp
207                            .results
208                            .channels
209                            .first()
210                            .and_then(|c| c.alternatives.first())
211                            .map(|a| a.transcript.clone())
212                            .ok_or_else(|| {
213                                anyhow::anyhow!(
214                                    "Deepgram API returned unexpected response format: no transcript found"
215                                )
216                            })?;
217
218                        return Ok(TranscriptionResult { text: transcript });
219                    }
220
221                    // Check if error is retryable
222                    if is_retryable_status(status) && attempt < config.max_retries {
223                        let delay = config.delay_for_attempt(attempt, is_rate_limited(status));
224                        crate::verbose!(
225                            "Deepgram request failed with {} (attempt {}/{}), retrying in {:?}",
226                            status,
227                            attempt + 1,
228                            config.max_retries,
229                            delay
230                        );
231                        tokio::time::sleep(delay).await;
232                        attempt += 1;
233                        continue;
234                    }
235
236                    // Non-retryable error or max retries exceeded
237                    let error_text = response
238                        .text()
239                        .await
240                        .unwrap_or_else(|_| "Unknown error".to_string());
241                    anyhow::bail!("Deepgram API error ({status}): {error_text}");
242                }
243                Err(err) => {
244                    // Check if network error is retryable
245                    if is_retryable_error(&err) && attempt < config.max_retries {
246                        let delay = config.delay_for_attempt(attempt, false);
247                        crate::verbose!(
248                            "Deepgram request failed with network error (attempt {}/{}), retrying in {:?}: {}",
249                            attempt + 1,
250                            config.max_retries,
251                            delay,
252                            err
253                        );
254                        tokio::time::sleep(delay).await;
255                        attempt += 1;
256                        continue;
257                    }
258
259                    return Err(err).context("Failed to send request to Deepgram API");
260                }
261            }
262        }
263    }
264}