car-voice 0.14.0

Voice I/O capability for CAR — mic capture, VAD, listener/speaker traits
Documentation
//! Utterance classification and voice-formatting helpers.
//!
//! Pure-function helpers used by both the macOS-only `orchestrator`
//! module and the cross-platform FFI dispatch helper in
//! `car-ffi-common`. Lives outside `orchestrator` so callers that
//! don't need the mixer-routed `VoiceOrchestrator` (which depends on
//! the macOS-only `voice_audio_mixer`) can still classify utterances
//! and pick bridge phrases on Linux / Windows / iOS.
//!
//! No platform deps; safe on every target.

/// Coarse classification of a finalized utterance. Drives the
/// fast-track / bridge-phrase decision.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum UtteranceClass {
    /// Tool-likely (email / calendar / search). Skip the fast LLM —
    /// it would hallucinate without real data — and play a bridge
    /// phrase while the sidecar runs.
    ToolLikely(ToolKind),
    /// Conversational. Fast LLM is safe to speak from the utterance
    /// alone; sidecar still runs in parallel.
    Conversational,
}

/// Tool category used by [`UtteranceClass::ToolLikely`] and
/// [`bridge_phrase`].
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ToolKind {
    Email,
    Calendar,
    Search,
    /// Tool-like keyword present but no specific category matched.
    Unknown,
}

/// Keyword-based classifier. ~30 LoC, zero deps. Phase C (the
/// `DirectDataFetcher` path) extends this with category-specific
/// handlers.
pub fn classify_utterance(utterance: &str) -> UtteranceClass {
    let lower = utterance.to_lowercase();
    let kind = if lower.contains("email") || lower.contains("inbox") || lower.contains("mail") {
        Some(ToolKind::Email)
    } else if lower.contains("calendar")
        || lower.contains("schedule")
        || lower.contains("meeting")
        || lower.contains("appointment")
    {
        Some(ToolKind::Calendar)
    } else if lower.contains("search") || lower.contains("find") || lower.contains("look up") {
        Some(ToolKind::Search)
    } else {
        None
    };
    match kind {
        Some(k) => UtteranceClass::ToolLikely(k),
        None => UtteranceClass::Conversational,
    }
}

/// Hardcoded bridge phrase for a tool-likely utterance. Phase B uses
/// one phrase per category; pool variety is a follow-up (Open Q #2).
pub fn bridge_phrase(kind: ToolKind) -> &'static str {
    match kind {
        ToolKind::Email => "One moment, checking your inbox.",
        ToolKind::Calendar => "Let me look at your calendar.",
        ToolKind::Search => "One moment, looking that up.",
        ToolKind::Unknown => "One moment, let me check on that.",
    }
}

/// Strip markdown / formatting that doesn't survive TTS narration and
/// truncate to `max_chars` with an ellipsis. Use this on the output
/// of a `DirectDataFetcher` before handing it to the sidecar — TTS
/// will literally read asterisks and backticks otherwise.
///
/// Strips: `*`, `_`, `#`, `` ` ``. Collapses internal whitespace to a
/// single space. Truncates to `max_chars` codepoints (not bytes), then
/// appends `…` if anything was cut.
pub fn format_for_voice(text: &str, max_chars: usize) -> String {
    let stripped: String = text
        .chars()
        .filter(|c| !matches!(c, '*' | '_' | '#' | '`'))
        .collect();
    let collapsed = stripped.split_whitespace().collect::<Vec<_>>().join(" ");
    let count = collapsed.chars().count();
    if count <= max_chars {
        collapsed
    } else {
        let kept: String = collapsed.chars().take(max_chars).collect();
        format!("{kept}")
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn classify_utterance_email_keywords() {
        assert_eq!(
            classify_utterance("any new email today"),
            UtteranceClass::ToolLikely(ToolKind::Email)
        );
        assert_eq!(
            classify_utterance("Check my INBOX"),
            UtteranceClass::ToolLikely(ToolKind::Email)
        );
        assert_eq!(
            classify_utterance("forward this voicemail"),
            UtteranceClass::ToolLikely(ToolKind::Email)
        );
    }

    #[test]
    fn classify_utterance_calendar_keywords() {
        for utterance in [
            "what's on my calendar today",
            "schedule a meeting with bob",
            "any APPOINTMENTS this afternoon",
        ] {
            match classify_utterance(utterance) {
                UtteranceClass::ToolLikely(ToolKind::Calendar) => {}
                other => panic!("{utterance:?} → {other:?}"),
            }
        }
    }

    #[test]
    fn classify_utterance_search_keywords() {
        for utterance in [
            "search for nginx tuning",
            "find the docs",
            "look up RFC 9110",
        ] {
            match classify_utterance(utterance) {
                UtteranceClass::ToolLikely(ToolKind::Search) => {}
                other => panic!("{utterance:?} → {other:?}"),
            }
        }
    }

    #[test]
    fn classify_utterance_conversational() {
        for utterance in ["how are you", "tell me a joke", "I love sourdough bread"] {
            assert_eq!(
                classify_utterance(utterance),
                UtteranceClass::Conversational,
                "{utterance:?}"
            );
        }
    }

    #[test]
    fn bridge_phrase_per_kind() {
        let email = bridge_phrase(ToolKind::Email);
        let calendar = bridge_phrase(ToolKind::Calendar);
        let search = bridge_phrase(ToolKind::Search);
        let unknown = bridge_phrase(ToolKind::Unknown);
        assert!(email.to_lowercase().contains("inbox"));
        assert!(calendar.to_lowercase().contains("calendar"));
        assert_ne!(email, calendar);
        assert_ne!(search, unknown);
        for p in [email, calendar, search, unknown] {
            assert!(p.len() < 80, "bridge phrase too long: {p:?}");
        }
    }

    #[test]
    fn format_for_voice_strips_markdown_and_collapses_whitespace() {
        assert_eq!(
            format_for_voice("**bold** and `code`", 200),
            "bold and code"
        );
        assert_eq!(
            format_for_voice("line1\n\n  line2\t\tline3", 200),
            "line1 line2 line3"
        );
        assert_eq!(format_for_voice("# Heading\n\ntext", 200), "Heading text");
    }

    #[test]
    fn format_for_voice_truncates_with_ellipsis() {
        let s = "a".repeat(300);
        let out = format_for_voice(&s, 100);
        assert!(out.ends_with(''));
        assert_eq!(out.chars().count(), 101);
    }

    #[test]
    fn format_for_voice_no_truncate_when_under_limit() {
        assert_eq!(format_for_voice("hello world", 100), "hello world");
    }
}