use crate::{Result, VoiceError};
use async_trait::async_trait;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AudioFormat {
Mp3,
Wav,
}
#[derive(Debug)]
pub struct SynthesizedAudio {
pub bytes: Vec<u8>,
pub format: AudioFormat,
}
#[async_trait]
pub trait Speaker: Send + Sync {
async fn synth(&self, text: &str) -> Result<SynthesizedAudio>;
async fn speak(&self, text: &str) -> Result<()> {
let audio = self.synth(text).await?;
play_audio(audio).await
}
}
pub(crate) fn encode_pcm_f32_to_wav_pcm16(
samples: &[f32],
sample_rate: u32,
) -> std::io::Result<Vec<u8>> {
let spec = hound::WavSpec {
channels: 1,
sample_rate,
bits_per_sample: 16,
sample_format: hound::SampleFormat::Int,
};
let mut buf = std::io::Cursor::new(Vec::<u8>::new());
{
let mut writer = hound::WavWriter::new(&mut buf, spec).map_err(io_err)?;
for &s in samples {
let clamped = s.clamp(-1.0, 1.0);
let pcm16 = (clamped * i16::MAX as f32) as i16;
writer.write_sample(pcm16).map_err(io_err)?;
}
writer.finalize().map_err(io_err)?;
}
Ok(buf.into_inner())
}
fn io_err(e: hound::Error) -> std::io::Error {
std::io::Error::new(std::io::ErrorKind::Other, e.to_string())
}
pub async fn play_audio(audio: SynthesizedAudio) -> Result<()> {
tokio::task::spawn_blocking(move || -> Result<()> {
let (_stream, handle) = rodio::OutputStream::try_default()
.map_err(|e| VoiceError::Playback(format!("output stream: {e}")))?;
let sink = rodio::Sink::try_new(&handle)
.map_err(|e| VoiceError::Playback(format!("sink: {e}")))?;
let cursor = std::io::Cursor::new(audio.bytes);
let decoder = match audio.format {
AudioFormat::Mp3 | AudioFormat::Wav => rodio::Decoder::new(cursor)
.map_err(|e| VoiceError::Playback(format!("decode: {e}")))?,
};
sink.append(decoder);
sink.sleep_until_end();
Ok(())
})
.await
.map_err(|e| VoiceError::Playback(format!("blocking task join: {e}")))?
}