use crate::providers::TranscriptionProvider;
use anyhow::Result;
use async_trait::async_trait;
use bytes::Bytes;
use tracing::{debug, error, info, instrument};
pub struct OpenAIProvider {
client: async_openai::Client<async_openai::config::OpenAIConfig>,
}
impl OpenAIProvider {
pub async fn new() -> Self {
Self {
client: async_openai::Client::new(),
}
}
}
#[async_trait]
impl TranscriptionProvider for OpenAIProvider {
fn name(&self) -> &'static str {
"OpenAI"
}
fn min_chunk_duration(&self) -> std::time::Duration {
std::time::Duration::from_secs(5)
}
#[instrument(skip(self, audio_data))]
async fn transcribe(&self, audio_data: &[u8]) -> Result<String> {
use async_openai::types::{AudioInput, AudioResponseFormat, CreateTranscriptionRequest};
info!("Starting OpenAI transcription request");
debug!("Audio data size: {} bytes", audio_data.len());
let min_required_bytes = 16000 * 2 * 5; if audio_data.len() < min_required_bytes {
return Err(anyhow::anyhow!(
"Audio chunk too short ({} bytes < {} bytes). Minimum 5 seconds required",
audio_data.len(),
min_required_bytes
));
}
let audio_input =
AudioInput::from_bytes("audio.wav".to_string(), Bytes::copy_from_slice(audio_data));
let request = CreateTranscriptionRequest {
file: audio_input,
model: "whisper-1".to_string(),
response_format: Some(AudioResponseFormat::Json),
..Default::default()
};
debug!("Sending request to OpenAI Whisper API");
match self.client.audio().transcribe(request).await {
Ok(response) => {
info!("OpenAI transcription successful");
info!("Received transcription: {}", response.text);
Ok(response.text)
}
Err(e) => {
error!("OpenAI transcription failed: {}", e);
Err(anyhow::anyhow!("OpenAI API error: {}", e))
}
}
}
}