use async_trait::async_trait;
use serde::Deserialize;
use tracing::debug;
use super::{Bytes, Result, TranscribeError, Transcriber};
const TRANSCRIPTION_ENDPOINT: &str = "https://api.openai.com/v1/audio/transcriptions";
const DEFAULT_MODEL: &str = "gpt-4o-mini-transcribe";
#[derive(Debug, Clone)]
pub struct OpenAIConfig {
pub api_key: String,
pub model: Option<String>,
}
impl OpenAIConfig {
pub fn new(api_key: impl Into<String>) -> Self {
Self {
api_key: api_key.into(),
model: None,
}
}
pub fn with_model(mut self, model: impl Into<String>) -> Self {
self.model = Some(model.into());
self
}
pub fn model(&self) -> &str {
self.model.as_deref().unwrap_or(DEFAULT_MODEL)
}
}
#[derive(Debug, Clone)]
pub struct OpenAIClient {
client: reqwest::Client,
config: OpenAIConfig,
}
#[derive(Debug, Deserialize)]
struct WhisperResponse {
text: String,
}
impl OpenAIClient {
pub fn new(config: OpenAIConfig) -> Self {
Self {
client: reqwest::Client::new(),
config,
}
}
pub fn from_api_key(api_key: impl Into<String>) -> Self {
Self::new(OpenAIConfig::new(api_key))
}
}
#[async_trait]
impl Transcriber for OpenAIClient {
async fn transcribe(&self, audio: Bytes, language: Option<&str>) -> Result<String> {
debug!(
model = self.config.model(),
audio_bytes = audio.len(),
language = ?language,
"Sending transcription request to OpenAI"
);
let mut form = reqwest::multipart::Form::new()
.part(
"file",
reqwest::multipart::Part::stream(reqwest::Body::from(audio))
.file_name("recording.wav")
.mime_str("audio/wav")
.map_err(|e| TranscribeError::ApiError(e.to_string()))?,
)
.part(
"model",
reqwest::multipart::Part::text(self.config.model().to_string()),
);
if let Some(lang) = language {
form = form.part("language", reqwest::multipart::Part::text(lang.to_string()));
}
let response = self
.client
.post(TRANSCRIPTION_ENDPOINT)
.header("Authorization", format!("Bearer {}", self.config.api_key))
.multipart(form)
.send()
.await?;
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
return Err(TranscribeError::ApiError(format!(
"API returned {}: {}",
status, body
)));
}
let whisper_response: WhisperResponse = response
.json()
.await
.map_err(|e| TranscribeError::TranscriptionFailed(e.to_string()))?;
Ok(whisper_response.text)
}
fn name(&self) -> &str {
"openai"
}
}