car-inference 0.13.0

Local model inference for CAR — Candle backend with Qwen3 models
Documentation
//! Caller-facing routing intent — express requirements, not model IDs.
//!
//! Tracks Parslee-ai/car-releases#18. The motivation is that callers
//! today choose between two extremes:
//!
//! - `model = None` → the adaptive router picks. Quality on average is
//!   good but per-request variability surfaces as UX inconsistency.
//! - `model = Some("claude-sonnet-4-7")` → the caller pins. Provider
//!   awareness leaks up the stack — exactly what CAR is supposed to
//!   prevent.
//!
//! `IntentHint` is the middle ground. The caller expresses *what* they
//! need; the router resolves intent → model. Existing `model = None`
//! and `model = Some(...)` paths are unchanged when no intent is
//! supplied.
//!
//! ## MVP scope
//!
//! Just `task`, `prefer_local`, `require`. Cost/latency ceilings wait
//! for clean registry numbers; `prefer_family` was cut as a soft
//! routing knob that accumulates tweaks without clear semantics
//! (Linus design review, 2026-05-04).
//!
//! ## Routing semantics
//!
//! `prefer_local: true` maps to a dedicated
//! [`crate::RoutingWorkload::LocalPreferred`] variant. Distinct from
//! `Background` (which is "this is a background job, latency barely
//! matters") — `LocalPreferred` keeps a quality-aware weight profile
//! and a strong local_bonus so the hint wins ties decisively.

use serde::{Deserialize, Serialize};

use crate::schema::ModelCapability;

/// What the caller is doing — coarse-grained categories the adaptive
/// router maps to `InferenceTask`. A closed enum so adding a new task
/// type is a deliberate FFI-visible change rather than a silent
/// fallback when the router doesn't recognize a string.
///
/// The MVP intentionally ships only the variants that map to a
/// distinct `InferenceTask` today. `Summarize` / `Extract` were cut
/// because both would have collapsed to `Generate` with no observable
/// behavior change — shipping enum variants that are accepted, parsed,
/// and silently discarded is exactly the routing variability the
/// intent surface is designed to remove. Add them back when the
/// registry actually distinguishes summarize-tuned or extract-tuned
/// models.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TaskHint {
    /// Conversational chat — maps to `InferenceTask::Generate`.
    Chat,
    /// Label assignment / categorization. Maps to
    /// `InferenceTask::Classify`.
    Classify,
    /// Chain-of-thought, planning, multi-step analysis. Maps to
    /// `InferenceTask::Reasoning` and tends to favor frontier
    /// reasoning models.
    Reasoning,
    /// Code generation, repair, refactoring. Maps to
    /// `InferenceTask::Code`.
    Code,
}

/// Caller-supplied routing intent. All fields are optional / additive.
/// An `IntentHint` with default values matches the no-intent path
/// exactly, so threading `Option<IntentHint>` through is safe.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct IntentHint {
    /// What the caller is doing. None = let the router infer from the
    /// prompt as today.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub task: Option<TaskHint>,

    /// Hard filter — every required capability must be present on the
    /// candidate. Empty = no extra filter.
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub require: Vec<ModelCapability>,

    /// Bias the score profile toward local models (cost over quality).
    /// Internally this maps to `RoutingWorkload::Background` until the
    /// follow-up split lands (parslee-ai/car#106).
    #[serde(default, skip_serializing_if = "is_false")]
    pub prefer_local: bool,

    /// Bias the score profile aggressively toward latency. Maps to
    /// [`crate::tasks::RoutingWorkload::Fastest`] — a weight profile
    /// that downweights quality and cost in favour of time-to-first-token.
    /// Designed for voice turns where a sub-500ms first-audio target
    /// beats a richer-but-slower answer. Takes precedence over
    /// `prefer_local`; if both are set, the request is routed by
    /// `Fastest` rules.
    #[serde(default, skip_serializing_if = "is_false")]
    pub prefer_fast: bool,
}

fn is_false(b: &bool) -> bool {
    !*b
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn empty_intent_serializes_compactly() {
        // No-intent must round-trip through serde without verbose
        // null fields — the FFI layer transmits as JSON and clients
        // shouldn't see {"task":null,"require":[],"prefer_local":false}.
        let hint = IntentHint::default();
        let json = serde_json::to_string(&hint).unwrap();
        assert_eq!(json, "{}");
    }

    #[test]
    fn round_trip_with_capability_require() {
        let hint = IntentHint {
            task: Some(TaskHint::Code),
            require: vec![ModelCapability::Code, ModelCapability::ToolUse],
            prefer_local: true,
            prefer_fast: false,
        };
        let json = serde_json::to_string(&hint).unwrap();
        let back: IntentHint = serde_json::from_str(&json).unwrap();
        assert_eq!(back.task, Some(TaskHint::Code));
        assert_eq!(
            back.require,
            vec![ModelCapability::Code, ModelCapability::ToolUse]
        );
        assert!(back.prefer_local);
        assert!(!back.prefer_fast);
    }

    #[test]
    fn missing_fields_default_cleanly() {
        // Pre-MVP clients that don't know about IntentHint may send
        // partial JSON. Defaults must match the no-intent path.
        let hint: IntentHint = serde_json::from_str("{}").unwrap();
        assert_eq!(hint.task, None);
        assert!(hint.require.is_empty());
        assert!(!hint.prefer_local);
        assert!(!hint.prefer_fast);
    }

    #[test]
    fn prefer_fast_round_trips_and_skips_when_false() {
        let off = IntentHint::default();
        assert_eq!(serde_json::to_string(&off).unwrap(), "{}");

        let on = IntentHint {
            prefer_fast: true,
            ..IntentHint::default()
        };
        let json = serde_json::to_string(&on).unwrap();
        assert!(json.contains("prefer_fast"));
        let back: IntentHint = serde_json::from_str(&json).unwrap();
        assert!(back.prefer_fast);
    }
}