use anyhow::{Context, Result};
use async_trait::async_trait;
use super::{TranscriptionBackend, TranscriptionRequest, TranscriptionResult, TranscriptionStage};
#[derive(Debug, Default, Clone)]
pub struct LocalParakeetProvider;
#[async_trait]
impl TranscriptionBackend for LocalParakeetProvider {
fn name(&self) -> &'static str {
"local-parakeet"
}
fn display_name(&self) -> &'static str {
"Local Parakeet"
}
fn transcribe_sync(
&self,
model_path: &str, request: TranscriptionRequest,
) -> Result<TranscriptionResult> {
transcribe_local(model_path, request)
}
async fn transcribe_async(
&self,
_client: &reqwest::Client, model_path: &str,
request: TranscriptionRequest,
) -> Result<TranscriptionResult> {
let model_path = model_path.to_string();
tokio::task::spawn_blocking(move || transcribe_local(&model_path, request))
.await
.context("Task join failed")?
}
}
fn transcribe_local(
model_path: &str,
request: TranscriptionRequest,
) -> Result<TranscriptionResult> {
request.report(TranscriptionStage::Transcribing);
let pcm_samples = decode_mp3_to_samples(&request.audio_data)?;
transcribe_samples(model_path, pcm_samples)
}
pub fn transcribe_raw(model_path: &str, samples: Vec<f32>) -> Result<TranscriptionResult> {
transcribe_samples(model_path, samples)
}
fn transcribe_samples(model_path: &str, samples: Vec<f32>) -> Result<TranscriptionResult> {
use parakeet_rs::{ParakeetTDT, Transcriber};
let mut parakeet =
ParakeetTDT::from_pretrained(model_path, None).context("Failed to load Parakeet model")?;
let result = parakeet
.transcribe_samples(samples, 16000, 1, None)
.context("Parakeet transcription failed")?;
Ok(TranscriptionResult {
text: result.text.trim().to_string(),
})
}
fn decode_mp3_to_samples(mp3_data: &[u8]) -> Result<Vec<f32>> {
use minimp3::{Decoder, Frame};
let mut decoder = Decoder::new(mp3_data);
let mut samples = Vec::new();
let mut sample_rate = 0u32;
let mut channels = 0u16;
loop {
match decoder.next_frame() {
Ok(Frame {
data,
sample_rate: sr,
channels: ch,
..
}) => {
sample_rate = sr as u32;
channels = ch as u16;
samples.extend(data.iter().map(|&s| s as f32 / i16::MAX as f32));
}
Err(minimp3::Error::Eof) => break,
Err(e) => anyhow::bail!("MP3 decode error: {:?}", e),
}
}
if samples.is_empty() {
anyhow::bail!("No audio data decoded from MP3");
}
crate::resample::resample_to_16k(&samples, sample_rate, channels)
}