Skip to main content

car_inference/
intent.rs

1//! Caller-facing routing intent — express requirements, not model IDs.
2//!
3//! Tracks Parslee-ai/car-releases#18. The motivation is that callers
4//! today choose between two extremes:
5//!
6//! - `model = None` → the adaptive router picks. Quality on average is
7//!   good but per-request variability surfaces as UX inconsistency.
8//! - `model = Some("claude-sonnet-4-7")` → the caller pins. Provider
9//!   awareness leaks up the stack — exactly what CAR is supposed to
10//!   prevent.
11//!
12//! `IntentHint` is the middle ground. The caller expresses *what* they
13//! need; the router resolves intent → model. Existing `model = None`
14//! and `model = Some(...)` paths are unchanged when no intent is
15//! supplied.
16//!
17//! ## MVP scope
18//!
19//! Just `task`, `prefer_local`, `require`. Cost/latency ceilings wait
20//! for clean registry numbers; `prefer_family` was cut as a soft
21//! routing knob that accumulates tweaks without clear semantics
22//! (Linus design review, 2026-05-04).
23//!
24//! ## Routing semantics
25//!
26//! `prefer_local: true` maps to a dedicated
27//! [`crate::RoutingWorkload::LocalPreferred`] variant. Distinct from
28//! `Background` (which is "this is a background job, latency barely
29//! matters") — `LocalPreferred` keeps a quality-aware weight profile
30//! and a strong local_bonus so the hint wins ties decisively.
31
32use serde::{Deserialize, Serialize};
33
34use crate::schema::ModelCapability;
35
36/// What the caller is doing — coarse-grained categories the adaptive
37/// router maps to `InferenceTask`. A closed enum so adding a new task
38/// type is a deliberate FFI-visible change rather than a silent
39/// fallback when the router doesn't recognize a string.
40///
41/// The MVP intentionally ships only the variants that map to a
42/// distinct `InferenceTask` today. `Summarize` / `Extract` were cut
43/// because both would have collapsed to `Generate` with no observable
44/// behavior change — shipping enum variants that are accepted, parsed,
45/// and silently discarded is exactly the routing variability the
46/// intent surface is designed to remove. Add them back when the
47/// registry actually distinguishes summarize-tuned or extract-tuned
48/// models.
49#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
50#[serde(rename_all = "snake_case")]
51pub enum TaskHint {
52    /// Conversational chat — maps to `InferenceTask::Generate`.
53    Chat,
54    /// Label assignment / categorization. Maps to
55    /// `InferenceTask::Classify`.
56    Classify,
57    /// Chain-of-thought, planning, multi-step analysis. Maps to
58    /// `InferenceTask::Reasoning` and tends to favor frontier
59    /// reasoning models.
60    Reasoning,
61    /// Code generation, repair, refactoring. Maps to
62    /// `InferenceTask::Code`.
63    Code,
64}
65
66/// Caller-supplied routing intent. All fields are optional / additive.
67/// An `IntentHint` with default values matches the no-intent path
68/// exactly, so threading `Option<IntentHint>` through is safe.
69#[derive(Debug, Clone, Default, Serialize, Deserialize)]
70pub struct IntentHint {
71    /// What the caller is doing. None = let the router infer from the
72    /// prompt as today.
73    #[serde(default, skip_serializing_if = "Option::is_none")]
74    pub task: Option<TaskHint>,
75
76    /// Hard filter — every required capability must be present on the
77    /// candidate. Empty = no extra filter.
78    #[serde(default, skip_serializing_if = "Vec::is_empty")]
79    pub require: Vec<ModelCapability>,
80
81    /// Bias the score profile toward local models (cost over quality).
82    /// Internally this maps to `RoutingWorkload::Background` until the
83    /// follow-up split lands (parslee-ai/car#106).
84    #[serde(default, skip_serializing_if = "is_false")]
85    pub prefer_local: bool,
86
87    /// Bias the score profile aggressively toward latency. Maps to
88    /// [`crate::tasks::RoutingWorkload::Fastest`] — a weight profile
89    /// that downweights quality and cost in favour of time-to-first-token.
90    /// Designed for voice turns where a sub-500ms first-audio target
91    /// beats a richer-but-slower answer. Takes precedence over
92    /// `prefer_local`; if both are set, the request is routed by
93    /// `Fastest` rules.
94    #[serde(default, skip_serializing_if = "is_false")]
95    pub prefer_fast: bool,
96}
97
98fn is_false(b: &bool) -> bool {
99    !*b
100}
101
102#[cfg(test)]
103mod tests {
104    use super::*;
105
106    #[test]
107    fn empty_intent_serializes_compactly() {
108        // No-intent must round-trip through serde without verbose
109        // null fields — the FFI layer transmits as JSON and clients
110        // shouldn't see {"task":null,"require":[],"prefer_local":false}.
111        let hint = IntentHint::default();
112        let json = serde_json::to_string(&hint).unwrap();
113        assert_eq!(json, "{}");
114    }
115
116    #[test]
117    fn round_trip_with_capability_require() {
118        let hint = IntentHint {
119            task: Some(TaskHint::Code),
120            require: vec![ModelCapability::Code, ModelCapability::ToolUse],
121            prefer_local: true,
122            prefer_fast: false,
123        };
124        let json = serde_json::to_string(&hint).unwrap();
125        let back: IntentHint = serde_json::from_str(&json).unwrap();
126        assert_eq!(back.task, Some(TaskHint::Code));
127        assert_eq!(
128            back.require,
129            vec![ModelCapability::Code, ModelCapability::ToolUse]
130        );
131        assert!(back.prefer_local);
132        assert!(!back.prefer_fast);
133    }
134
135    #[test]
136    fn missing_fields_default_cleanly() {
137        // Pre-MVP clients that don't know about IntentHint may send
138        // partial JSON. Defaults must match the no-intent path.
139        let hint: IntentHint = serde_json::from_str("{}").unwrap();
140        assert_eq!(hint.task, None);
141        assert!(hint.require.is_empty());
142        assert!(!hint.prefer_local);
143        assert!(!hint.prefer_fast);
144    }
145
146    #[test]
147    fn prefer_fast_round_trips_and_skips_when_false() {
148        let off = IntentHint::default();
149        assert_eq!(serde_json::to_string(&off).unwrap(), "{}");
150
151        let on = IntentHint {
152            prefer_fast: true,
153            ..IntentHint::default()
154        };
155        let json = serde_json::to_string(&on).unwrap();
156        assert!(json.contains("prefer_fast"));
157        let back: IntentHint = serde_json::from_str(&json).unwrap();
158        assert!(back.prefer_fast);
159    }
160}