car-voice 0.15.1

Voice I/O capability for CAR — mic capture, VAD, listener/speaker traits
Documentation
//! Voice provider introspection.
//!
//! [`list_voice_providers`] returns a stable, JSON-serializable description
//! of every STT and TTS provider built into this binary, with per-provider
//! availability (cfg-target, build-feature) so external clients can render
//! a picker, gate config UI, or surface diagnostics.
//!
//! Availability here reflects *build-time* presence — whether the impl
//! compiled in for this target. Runtime readiness (API key set, model
//! downloaded, permission granted) is a separate concern surfaced by each
//! provider's `from_config` / `transcribe` error paths.

use serde::Serialize;

/// Whether a provider does STT or TTS.
#[derive(Debug, Clone, Copy, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum VoiceProviderKind {
    Stt,
    Tts,
}

/// Describes a single voice provider.
///
/// Field names are stable wire-format — clients in JS, Python, or
/// behind WebSocket parse this and surface it directly.
#[derive(Debug, Clone, Serialize)]
pub struct VoiceProviderInfo {
    /// Provider id, matching the values accepted by `TOKHN_STT_PROVIDER`
    /// / `TOKHN_TTS_PROVIDER` and the snake_case form of the config enum.
    pub id: &'static str,
    pub kind: VoiceProviderKind,
    /// True when the provider was compiled into this build for this
    /// target. Linux/Windows builds will see `available: false` for
    /// macOS-only providers (Apple Speech, Kokoro), etc.
    pub available: bool,
    /// Free-form description for UI / diagnostics.
    pub description: &'static str,
}

/// Enumerate all voice providers known to this build.
///
/// Order is stable and deterministic — STT providers first, then TTS,
/// each group in declaration order matching the `SttProvider` /
/// `TtsProvider` enum variants in `config.rs`. Clients should not rely
/// on the order across versions, but within a version it's stable.
pub fn list_voice_providers() -> Vec<VoiceProviderInfo> {
    vec![
        // ─── STT ───
        VoiceProviderInfo {
            id: "elevenlabs",
            kind: VoiceProviderKind::Stt,
            available: true,
            description: "ElevenLabs Scribe cloud STT (requires API key)",
        },
        VoiceProviderInfo {
            id: "whisper_cpp",
            kind: VoiceProviderKind::Stt,
            available: true,
            description: "In-process Whisper via whisper.cpp \
                          (Metal on Apple Silicon, CPU elsewhere); \
                          model file downloaded on first use",
        },
        VoiceProviderInfo {
            id: "parakeet",
            kind: VoiceProviderKind::Stt,
            available: cfg!(feature = "parakeet"),
            description: "NVIDIA Parakeet TDT via ONNX Runtime — \
                          requires `parakeet` cargo feature",
        },
        VoiceProviderInfo {
            id: "apple_speech",
            kind: VoiceProviderKind::Stt,
            available: cfg!(target_os = "macos"),
            description: "macOS SFSpeechRecognizer — on-device, free, \
                          multilingual, no model download (macOS 10.15+); \
                          host must call requestAuthorization at startup",
        },
        // ─── TTS ───
        VoiceProviderInfo {
            id: "elevenlabs",
            kind: VoiceProviderKind::Tts,
            available: true,
            description: "ElevenLabs cloud TTS (requires API key)",
        },
        VoiceProviderInfo {
            id: "local",
            kind: VoiceProviderKind::Tts,
            available: true,
            description: "OpenAI-compatible /v1/audio/speech HTTP client \
                          (e.g. mlx-audio, Piper) — requires a separate server",
        },
        VoiceProviderInfo {
            id: "kokoro",
            kind: VoiceProviderKind::Tts,
            available: cfg!(all(
                target_os = "macos",
                target_arch = "aarch64",
                not(car_skip_mlx)
            )),
            description: "In-process Kokoro-82M TTS via MLX/Metal — \
                          Apple Silicon macOS only; model downloaded on first use",
        },
        VoiceProviderInfo {
            id: "apple_speech",
            kind: VoiceProviderKind::Tts,
            available: cfg!(target_os = "macos"),
            description: "macOS AVSpeechSynthesizer — built-in, no model \
                          download, no MLX dependency (macOS 10.14+)",
        },
    ]
}

/// JSON-serialize the provider list. Stable wire-format used by every
/// FFI binding so all of them produce identical strings.
pub fn list_voice_providers_json() -> String {
    serde_json::to_string(&list_voice_providers())
        .expect("VoiceProviderInfo serialization is infallible")
}