car_inference/intent.rs
1//! Caller-facing routing intent — express requirements, not model IDs.
2//!
3//! Tracks Parslee-ai/car-releases#18. The motivation is that callers
4//! today choose between two extremes:
5//!
6//! - `model = None` → the adaptive router picks. Quality on average is
7//! good but per-request variability surfaces as UX inconsistency.
8//! - `model = Some("claude-sonnet-4-7")` → the caller pins. Provider
9//! awareness leaks up the stack — exactly what CAR is supposed to
10//! prevent.
11//!
12//! `IntentHint` is the middle ground. The caller expresses *what* they
13//! need; the router resolves intent → model. Existing `model = None`
14//! and `model = Some(...)` paths are unchanged when no intent is
15//! supplied.
16//!
17//! ## MVP scope
18//!
19//! Just `task`, `prefer_local`, `require`. Cost/latency ceilings wait
20//! for clean registry numbers; `prefer_family` was cut as a soft
21//! routing knob that accumulates tweaks without clear semantics
22//! (Linus design review, 2026-05-04).
23//!
24//! ## Routing semantics
25//!
26//! `prefer_local: true` maps to a dedicated
27//! [`crate::RoutingWorkload::LocalPreferred`] variant. Distinct from
28//! `Background` (which is "this is a background job, latency barely
29//! matters") — `LocalPreferred` keeps a quality-aware weight profile
30//! and a strong local_bonus so the hint wins ties decisively.
31
32use serde::{Deserialize, Serialize};
33
34use crate::schema::ModelCapability;
35
36/// What the caller is doing — coarse-grained categories the adaptive
37/// router maps to `InferenceTask`. A closed enum so adding a new task
38/// type is a deliberate FFI-visible change rather than a silent
39/// fallback when the router doesn't recognize a string.
40///
41/// The MVP intentionally ships only the variants that map to a
42/// distinct `InferenceTask` today. `Summarize` / `Extract` were cut
43/// because both would have collapsed to `Generate` with no observable
44/// behavior change — shipping enum variants that are accepted, parsed,
45/// and silently discarded is exactly the routing variability the
46/// intent surface is designed to remove. Add them back when the
47/// registry actually distinguishes summarize-tuned or extract-tuned
48/// models.
49#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
50#[serde(rename_all = "snake_case")]
51pub enum TaskHint {
52 /// Conversational chat — maps to `InferenceTask::Generate`.
53 Chat,
54 /// Label assignment / categorization. Maps to
55 /// `InferenceTask::Classify`.
56 Classify,
57 /// Chain-of-thought, planning, multi-step analysis. Maps to
58 /// `InferenceTask::Reasoning` and tends to favor frontier
59 /// reasoning models.
60 Reasoning,
61 /// Code generation, repair, refactoring. Maps to
62 /// `InferenceTask::Code`.
63 Code,
64}
65
66/// Caller-supplied routing intent. All fields are optional / additive.
67/// An `IntentHint` with default values matches the no-intent path
68/// exactly, so threading `Option<IntentHint>` through is safe.
69#[derive(Debug, Clone, Default, Serialize, Deserialize)]
70pub struct IntentHint {
71 /// What the caller is doing. None = let the router infer from the
72 /// prompt as today.
73 #[serde(default, skip_serializing_if = "Option::is_none")]
74 pub task: Option<TaskHint>,
75
76 /// Hard filter — every required capability must be present on the
77 /// candidate. Empty = no extra filter.
78 #[serde(default, skip_serializing_if = "Vec::is_empty")]
79 pub require: Vec<ModelCapability>,
80
81 /// Bias the score profile toward local models (cost over quality).
82 /// Internally this maps to `RoutingWorkload::Background` until the
83 /// follow-up split lands (parslee-ai/car#106).
84 #[serde(default, skip_serializing_if = "is_false")]
85 pub prefer_local: bool,
86
87 /// Bias the score profile aggressively toward latency. Maps to
88 /// [`crate::tasks::RoutingWorkload::Fastest`] — a weight profile
89 /// that downweights quality and cost in favour of time-to-first-token.
90 /// Designed for voice turns where a sub-500ms first-audio target
91 /// beats a richer-but-slower answer. Takes precedence over
92 /// `prefer_local`; if both are set, the request is routed by
93 /// `Fastest` rules.
94 #[serde(default, skip_serializing_if = "is_false")]
95 pub prefer_fast: bool,
96}
97
98fn is_false(b: &bool) -> bool {
99 !*b
100}
101
102#[cfg(test)]
103mod tests {
104 use super::*;
105
106 #[test]
107 fn empty_intent_serializes_compactly() {
108 // No-intent must round-trip through serde without verbose
109 // null fields — the FFI layer transmits as JSON and clients
110 // shouldn't see {"task":null,"require":[],"prefer_local":false}.
111 let hint = IntentHint::default();
112 let json = serde_json::to_string(&hint).unwrap();
113 assert_eq!(json, "{}");
114 }
115
116 #[test]
117 fn round_trip_with_capability_require() {
118 let hint = IntentHint {
119 task: Some(TaskHint::Code),
120 require: vec![ModelCapability::Code, ModelCapability::ToolUse],
121 prefer_local: true,
122 prefer_fast: false,
123 };
124 let json = serde_json::to_string(&hint).unwrap();
125 let back: IntentHint = serde_json::from_str(&json).unwrap();
126 assert_eq!(back.task, Some(TaskHint::Code));
127 assert_eq!(
128 back.require,
129 vec![ModelCapability::Code, ModelCapability::ToolUse]
130 );
131 assert!(back.prefer_local);
132 assert!(!back.prefer_fast);
133 }
134
135 #[test]
136 fn missing_fields_default_cleanly() {
137 // Pre-MVP clients that don't know about IntentHint may send
138 // partial JSON. Defaults must match the no-intent path.
139 let hint: IntentHint = serde_json::from_str("{}").unwrap();
140 assert_eq!(hint.task, None);
141 assert!(hint.require.is_empty());
142 assert!(!hint.prefer_local);
143 assert!(!hint.prefer_fast);
144 }
145
146 #[test]
147 fn prefer_fast_round_trips_and_skips_when_false() {
148 let off = IntentHint::default();
149 assert_eq!(serde_json::to_string(&off).unwrap(), "{}");
150
151 let on = IntentHint {
152 prefer_fast: true,
153 ..IntentHint::default()
154 };
155 let json = serde_json::to_string(&on).unwrap();
156 assert!(json.contains("prefer_fast"));
157 let back: IntentHint = serde_json::from_str(&json).unwrap();
158 assert!(back.prefer_fast);
159 }
160}