car-voice 0.14.0

Voice I/O capability for CAR — mic capture, VAD, listener/speaker traits
Documentation
//! Text-to-speech provider trait and audio playback helpers.
//!
//! Channels obtain a `&dyn Speaker` (or a concrete impl like
//! [`crate::ElevenLabsSpeaker`]) and call [`Speaker::speak`] for
//! fire-and-forget narration or [`Speaker::synth`] when they want the audio
//! bytes for processing / saving / inspection.

use crate::{Result, VoiceError};
use async_trait::async_trait;

/// Audio format produced by a [`Speaker`] implementation.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AudioFormat {
    /// Compressed MP3 (decodable with rodio's `mp3` feature).
    Mp3,
    /// Uncompressed WAV.
    Wav,
}

/// A finalized synthesis result — raw audio bytes plus a format tag so the
/// player knows how to decode them.
#[derive(Debug)]
pub struct SynthesizedAudio {
    pub bytes: Vec<u8>,
    pub format: AudioFormat,
}

/// A text-to-speech provider.
///
/// Implementations are stateless from the caller's perspective — each call
/// to [`Speaker::synth`] is independent. Concrete impls hold the API key,
/// voice ID, and any provider-specific tuning in the constructor.
#[async_trait]
pub trait Speaker: Send + Sync {
    /// Synthesize the text into audio bytes. The returned format depends on
    /// the impl (ElevenLabs streams MP3; a future Piper impl would emit
    /// WAV/PCM).
    async fn synth(&self, text: &str) -> Result<SynthesizedAudio>;

    /// Synthesize and play through the OS default output device.
    ///
    /// Default impl: call [`synth`] then play via [`play_audio`].
    async fn speak(&self, text: &str) -> Result<()> {
        let audio = self.synth(text).await?;
        play_audio(audio).await
    }
}

/// Decode and play [`SynthesizedAudio`] through the OS default output
/// device. Blocks the current task until playback finishes.
///
/// Uses `rodio` (synchronous), which is run on a dedicated blocking thread
/// so the async runtime is not stalled.
///
/// # Cancellation
///
/// If the returned future is dropped mid-playback, the underlying
/// blocking thread keeps running until rodio's `sleep_until_end` returns —
/// `spawn_blocking` cannot abort an OS thread. For a "stop speaking now"
/// affordance the caller needs a `Sink` reference and to call `.stop()`
/// on it; that's a future hardening item alongside chunk-streamed playback.
/// Encode mono Float32 PCM samples in `[-1.0, 1.0]` as a 16-bit PCM
/// WAV blob. Shared between providers that capture PCM and need to
/// hand back a [`SynthesizedAudio`].
pub(crate) fn encode_pcm_f32_to_wav_pcm16(
    samples: &[f32],
    sample_rate: u32,
) -> std::io::Result<Vec<u8>> {
    let spec = hound::WavSpec {
        channels: 1,
        sample_rate,
        bits_per_sample: 16,
        sample_format: hound::SampleFormat::Int,
    };
    let mut buf = std::io::Cursor::new(Vec::<u8>::new());
    {
        let mut writer = hound::WavWriter::new(&mut buf, spec).map_err(io_err)?;
        for &s in samples {
            let clamped = s.clamp(-1.0, 1.0);
            let pcm16 = (clamped * i16::MAX as f32) as i16;
            writer.write_sample(pcm16).map_err(io_err)?;
        }
        writer.finalize().map_err(io_err)?;
    }
    Ok(buf.into_inner())
}

fn io_err(e: hound::Error) -> std::io::Error {
    std::io::Error::new(std::io::ErrorKind::Other, e.to_string())
}

pub async fn play_audio(audio: SynthesizedAudio) -> Result<()> {
    tokio::task::spawn_blocking(move || -> Result<()> {
        let (_stream, handle) = rodio::OutputStream::try_default()
            .map_err(|e| VoiceError::Playback(format!("output stream: {e}")))?;
        let sink = rodio::Sink::try_new(&handle)
            .map_err(|e| VoiceError::Playback(format!("sink: {e}")))?;
        let cursor = std::io::Cursor::new(audio.bytes);
        let decoder = match audio.format {
            AudioFormat::Mp3 | AudioFormat::Wav => rodio::Decoder::new(cursor)
                .map_err(|e| VoiceError::Playback(format!("decode: {e}")))?,
        };
        sink.append(decoder);
        sink.sleep_until_end();
        Ok(())
    })
    .await
    .map_err(|e| VoiceError::Playback(format!("blocking task join: {e}")))?
}