car-voice 0.14.0

Voice I/O capability for CAR — mic capture, VAD, listener/speaker traits
Documentation
//! Voice I/O capability for CAR.
//!
//! Channel-neutral microphone capture, voice activity detection,
//! speech-to-text, text-to-speech, and audio playback. Any CAR-based
//! agent or channel (CLI, GUI, IDE plug-in) can consume this crate
//! without pulling in a UI shell.
//!
//! ## Module map
//!
//! - [`config`] — `VoiceConfig` (provider selection, VAD tuning, mode).
//! - [`error`] — `VoiceError`.
//! - [`events`] — `VoiceEvent` enum (SpeechStart/End/Transcript/BargeIn).
//! - [`stt`] — `SttProvider` trait for speech-to-text backends.
//! - [`tts`] — `Speaker` trait + raw playback helper.
//! - [`provider`] — Factory functions that build the right STT/TTS from config.
//! - [`elevenlabs_stt`] / [`elevenlabs_tts`] — ElevenLabs cloud providers.
//! - [`whisper_cpp_stt`] — In-process Whisper STT via whisper.cpp (Metal on Apple Silicon)
//! - [`local_tts`] — Local OpenAI-compatible TTS provider
//!   (e.g. MLX-Whisper, mlx-audio Kokoro/Qwen3-TTS).
//! - [`listener`] — `Listener` trait + cross-platform `CpalListener`.
//! - [`voice_processing_listener`] — macOS `VoiceProcessingIO` listener with
//!   hardware AEC, AGC, and barge-in support.
//! - [`voice_audio_mixer`] — software mixer that feeds VPIO's bus 0
//!   reference signal so AEC has something to subtract.
//! - [`vad`] — energy-based voice activity detection with adaptive noise
//!   floor and runtime threshold boost (used by the listener for barge-in).
//! - [`enrollment`] — speaker voiceprint enrollment + per-segment role
//!   classification (`TranscriptRole`, `Enrollment`, `SpeakerPipeline`).
//! - [`narration`] — TARS-style commentary helpers, pure functions.

pub mod config;
pub mod cpal_listener;
mod runtime;
pub(crate) use runtime::voice_runtime_handle;
pub mod elevenlabs_streaming_listener;
pub mod elevenlabs_stt;
pub mod elevenlabs_tts;
pub mod enrollment;
pub mod enrollment_store;
pub mod error;
pub mod events;
pub mod listener;
pub mod local_tts;
// Pure-function classification + bridge-phrase helpers used by both
// the macOS-only orchestrator and the cross-platform FFI dispatch
// helper (`car-ffi-common::voice_turn`). No platform deps.
pub mod utterance;
// Orchestrator owns a `VoiceMixerHandle` for cancellable TTS, which
// is itself gated to `target_os = "macos"` (the mixer feeds VPIO's
// bus 0). Gating the orchestrator to match keeps the workspace
// buildable on Linux/Windows. The two-track voice loop runs on
// macOS today; iOS reaches the orchestrator via the macOS-shared
// dispatch surface in `car-engine::voice_turn`, which is itself
// platform-agnostic.
/// SFSpeechRecognizer-backed STT — macOS 10.15+. On-device, free,
/// multilingual, no model download. See macOS Apple-frameworks proposal.
#[cfg(target_os = "macos")]
pub mod apple_speech_stt;
/// AVSpeechSynthesizer-backed TTS — macOS only, no model download,
/// no MLX dependency. See `docs/proposals/macos-apple-frameworks.md`.
#[cfg(target_os = "macos")]
pub mod apple_speech_tts;
#[cfg(feature = "diarization")]
pub mod diarization;
/// In-process Kokoro-82M TTS via MLX (Apple Silicon macOS only —
/// internally uses `car_inference::backend::mlx_kokoro` which is
/// itself gated on aarch64, so x86_64 Mac must not compile this).
#[cfg(all(target_os = "macos", target_arch = "aarch64", not(car_skip_mlx)))]
pub mod kokoro_tts;
pub mod narration;
#[cfg(target_os = "macos")]
pub mod orchestrator;
#[cfg(feature = "parakeet")]
pub mod parakeet_stt;
pub mod provider;
pub mod providers_info;
pub mod push_listener;
pub mod session;
pub mod streaming_whisper;
pub mod stt;
#[cfg(all(target_os = "macos", feature = "system-audio-macos"))]
pub mod system_audio_capture;
#[cfg(all(target_os = "macos", feature = "system-audio-macos"))]
pub mod system_audio_listener;
#[cfg(target_os = "linux")]
pub mod system_audio_listener_linux;
#[cfg(target_os = "windows")]
pub mod system_audio_listener_windows;
pub mod transcript;
pub mod tts;
pub mod vad;
pub mod vad_backend;
#[cfg(target_os = "macos")]
pub mod voice_audio_mixer;
#[cfg(target_os = "macos")]
pub mod voice_processing_listener;
pub mod whisper_cpp_stt;

pub use config::{
    compose_voice_context, voice_config_watch, ListenerMode, SttProvider as SttProviderKind,
    TtsProvider as TtsProviderKind, VoiceConfig, VoiceConfigHandle, VoiceConfigSender,
    DEFAULT_VOICE_PROMPT_OVERLAY,
};
// Cross-platform re-exports of the utterance helpers — every consumer
// of car-voice (incl. car-ffi-common on Linux/Windows) gets these
// without needing to reach into the macOS-only orchestrator.
#[cfg(target_os = "macos")]
pub use orchestrator::VoiceOrchestrator;
pub use utterance::{
    bridge_phrase, classify_utterance, format_for_voice, ToolKind, UtteranceClass,
};
// Re-export VoiceTelemetry + DirectDataFetcher so callers don't need a
// direct car-engine dep.
#[cfg(target_os = "macos")]
pub use apple_speech_stt::AppleSpeechSttProvider;
#[cfg(target_os = "macos")]
pub use apple_speech_tts::AppleSpeechSpeaker;
pub use car_engine::{DirectDataFetcher, VoiceTelemetry};
pub use cpal_listener::CpalListener;
#[cfg(feature = "diarization")]
pub use diarization::{
    DiarizationConfig, DiarizationError, Diarizer, SharedDiarizer, SpeakerDiarizer,
};
pub use elevenlabs_streaming_listener::ElevenLabsStreamingListener;
pub use elevenlabs_stt::ElevenLabsSttProvider;
pub use elevenlabs_tts::{ElevenLabsSpeaker, VoiceSettings};
pub use enrollment::{
    Enrollment, EnrollmentOutcome, FilterbankEmbedder, PendingEnrollment, SpeakerEmbedder,
    SpeakerEmbedding, SpeakerPipeline, TranscriptRole,
};
pub use enrollment_store::{
    enroll_from_pcm, enroll_from_wav, enrollment_dir, enrollment_path, list_enrollments,
    load_enrollment, remove_enrollment, save_enrollment, EnrollmentInfo,
};
pub use error::VoiceError;
pub use events::VoiceEvent;
#[cfg(all(target_os = "macos", target_arch = "aarch64", not(car_skip_mlx)))]
pub use kokoro_tts::KokoroSpeaker;
pub use listener::Listener;
pub use local_tts::LocalTtsSpeaker;
#[cfg(feature = "parakeet")]
pub use parakeet_stt::{ParakeetPartial, ParakeetSttProvider};
pub use provider::{build_stt_provider, build_tts_speaker};
pub use providers_info::{
    list_voice_providers, list_voice_providers_json, VoiceProviderInfo, VoiceProviderKind,
};
pub use push_listener::{PushHandle, PushListener};
pub use session::{VoiceEventSink, VoiceSession, VoiceSessionRegistry};
pub use streaming_whisper::{ChunkOverlapStreamer, StreamingConfig, StreamingPartial};
pub use stt::SttProvider;
#[cfg(all(target_os = "macos", feature = "system-audio-macos"))]
pub use system_audio_listener::SystemAudioListener;
#[cfg(target_os = "linux")]
pub use system_audio_listener_linux::{default_monitor_source, list_monitor_sources};
#[cfg(target_os = "windows")]
pub use system_audio_listener_windows::WindowsLoopbackListener;
pub use transcript::{parse_transcript, ParsedTranscript};
pub use tts::{play_audio, AudioFormat, Speaker, SynthesizedAudio};
pub use vad_backend::{EnergyVadBackend, SileroError, SileroVadBackend, VadBackend};
#[cfg(target_os = "macos")]
pub use voice_audio_mixer::{StemRole as MixerStemRole, VoiceAudioMixer, VoiceMixerHandle};
#[cfg(target_os = "macos")]
pub use voice_processing_listener::VoiceProcessingListener;
pub use whisper_cpp_stt::WhisperCppSttProvider;

/// Convenience result alias used throughout the crate.
pub type Result<T> = std::result::Result<T, VoiceError>;