harn_vm/llm/capability_audit.rs
1//! Compile-time footgun gate for the capability matrix.
2//!
3//! Harn is *opinionated* about provider/model/config combinations: a few
4//! combos are known footguns that silently break tool calling at runtime, and
5//! the only durable place to forbid them is the declarative matrix itself —
6//! before a harness author can ship a misconfigured route.
7//!
8//! This audit walks the parsed [`CapabilitiesFile`] and flags
9//! provider+model+config combinations that the matrix declares as invariants,
10//! NOT hard-coded model-name patterns. It generalizes the
11//! `reasoning_required_for_tools` precedent (a tool-using model that calls
12//! tools inside its reasoning channel) into a small set of data-driven rules:
13//!
14//! * **reasoning-off-for-tools contradiction** — a row that declares
15//! `reasoning_required_for_tools = true` must not also pin a tool task
16//! (`agent` / `code` / `verify`) to reasoning `"off"` via
17//! `auto_reasoning_overrides`. That is the self-inflicted
18//! billed-noncommittal failure #3305 fixed at its root; declaring both is a
19//! direct contradiction.
20//!
21//! * **lottery-route without a clean pin** — an OpenRouter row that declares
22//! `reasoning_required_for_tools = true` is a Harmony-style tool route on a
23//! sub-provider-lottery provider. Some OpenRouter upstreams mis-serialize
24//! the Harmony tool call even with reasoning ON, so such a row MUST pin a
25//! closed allowlist of known-clean upstreams via `openrouter_provider_order`
26//! (materialized to `provider.order` + `allow_fallbacks:false`). Without a
27//! pin the route can silently land on a sketchy upstream.
28//!
29//! * **native-tool declaration contradictions** — a row that prefers the
30//! native tool-call wire format, or declares native tool-choice modes, must
31//! also explicitly enable `native_tools`. Otherwise downstream request
32//! builders see mutually incompatible capability facts and harness authors
33//! get provider-specific surprises instead of one normalized toolchain.
34//!
35//! * **native-unreliable family consistency** — for a model family whose
36//! provider-native tool channel is unreliable as a *weight-intrinsic*
37//! property (it leaks tool markup into content / bills empty native
38//! completions on every host that serves those weights), EVERY route must
39//! steer to a text channel. A single outlier host pinning
40//! `preferred_tool_format = "native"` while its siblings pin text is exactly
41//! how a value model silently thrashes on one provider. This is the only
42//! check keyed on a model-family substring (see
43//! [`NATIVE_UNRELIABLE_TOOL_FAMILIES`]) rather than pure capability fields,
44//! and the bar to add a family is deliberately high: weight-intrinsic
45//! unreliability reproduced across independent hosts, never one rehoster's
46//! flakiness (which belongs in that host's own row).
47//!
48//! The first three checks are driven entirely by capability-row fields and the
49//! fourth by a tiny evidence-gated family list, so adding/closing a footgun
50//! route is a data edit (set the flag / forget the pin / pin native for an
51//! unreliable family) rather than a code change — and the mistake trips this
52//! gate.
53//!
54//! The audit is wired into `harn provider catalog build-capabilities --check` (see
55//! `harn-cli`), which runs under `make check-provider-capabilities` /
56//! `make check-provider-matrix`, so the matrix cannot drift into a footgun
57//! state without failing CI.
58
59use crate::llm::capabilities::CapabilitiesFile;
60
61/// Tool-bearing reasoning tasks. These are the tasks whose auto reasoning level
62/// must never resolve to `"off"` on a route that calls tools in its reasoning
63/// channel. Mirrors the guarded set in
64/// [`crate::llm::reasoning_policy`].
65const TOOL_TASKS: [&str; 3] = ["agent", "code", "verify"];
66
67/// Model families whose **provider-native** tool channel is unreliable as a
68/// *weight-intrinsic* property — the model itself emits tool-call markup as
69/// assistant content (or bills empty native completions) on every host that
70/// serves those weights, regardless of provider. For such a family, EVERY route
71/// must steer to a text channel (`preferred_tool_format` = `text`/`json`) and
72/// declare `tool_mode_parity = "native_unreliable"`; a route that pins
73/// `preferred_tool_format = "native"` is a footgun (it re-opens the leak this
74/// host can't fix server-side). Each entry is `(model_match-substring, evidence)`.
75///
76/// The bar for entry is HIGH on purpose: a quirk earns a row here only when it is
77/// demonstrated to be intrinsic to the weights (reproduced across independent
78/// hosts), NOT merely observed on one rehoster. Host-specific native flakiness
79/// belongs in that host's own row, not this cross-host invariant — e.g. a
80/// first-party authoritative endpoint may serve native cleanly while third-party
81/// rehosters do not, and that difference must be measured per host, not assumed.
82const NATIVE_UNRELIABLE_TOOL_FAMILIES: &[(&str, &str)] = &[(
83 "glm-5",
84 "GLM-5.x's native channel emits `<tool_call><arg_key>...` markup as assistant \
85 content instead of OpenAI message.tool_calls — reproduced across every GLM-5 host \
86 probed (zai/Baseten live, Together + OpenRouter agent-loop smoke, DeepInfra, Fireworks \
87 glm-5p*). Pin a text channel + tool_mode_parity = \"native_unreliable\".",
88)];
89
90/// A single footgun finding: a capability row that violates an opinionated
91/// provider/model/config invariant.
92#[derive(Debug, Clone, PartialEq, Eq)]
93pub struct CapabilityFootgun {
94 /// Provider id whose rule list contains the offending row.
95 pub provider: String,
96 /// The row's `model_match` pattern.
97 pub model_match: String,
98 /// Human-readable explanation + the declarative fix.
99 pub message: String,
100}
101
102/// Result of auditing a [`CapabilitiesFile`] for footgun combinations.
103#[derive(Debug, Clone, Default, PartialEq, Eq)]
104pub struct CapabilityAuditReport {
105 pub footguns: Vec<CapabilityFootgun>,
106}
107
108impl CapabilityAuditReport {
109 pub fn is_clean(&self) -> bool {
110 self.footguns.is_empty()
111 }
112
113 /// One line per finding, suitable for CLI/CI output.
114 pub fn render(&self) -> String {
115 self.footguns
116 .iter()
117 .map(|footgun| {
118 format!(
119 "provider.{} model_match=\"{}\": {}",
120 footgun.provider, footgun.model_match, footgun.message
121 )
122 })
123 .collect::<Vec<_>>()
124 .join("\n")
125 }
126}
127
128/// Audit the in-memory capability matrix for footgun provider/model/config
129/// combinations. Pure over the parsed file — no I/O, no model-name patterns.
130pub fn audit_capabilities(file: &CapabilitiesFile) -> CapabilityAuditReport {
131 let mut report = CapabilityAuditReport::default();
132 for (provider, rules) in &file.provider {
133 for rule in rules {
134 let reasoning_required_for_tools = rule.reasoning_required_for_tools.unwrap_or(false);
135
136 // Footgun 1: reasoning-off-for-tools contradiction. A route that
137 // calls tools inside its reasoning channel must not also force a
138 // tool task to reasoning-off.
139 if reasoning_required_for_tools {
140 if let Some(overrides) = &rule.auto_reasoning_overrides {
141 let offending: Vec<&str> = TOOL_TASKS
142 .iter()
143 .copied()
144 .filter(|task| {
145 overrides
146 .get(*task)
147 .map(|level| level.eq_ignore_ascii_case("off"))
148 .unwrap_or(false)
149 })
150 .collect();
151 if !offending.is_empty() {
152 report.footguns.push(CapabilityFootgun {
153 provider: provider.clone(),
154 model_match: rule.model_match.clone(),
155 message: format!(
156 "declares reasoning_required_for_tools = true but also pins \
157 auto_reasoning_overrides {{ {} = \"off\" }}; this route calls \
158 tools inside its reasoning channel, so forcing reasoning off \
159 for a tool task is the billed-noncommittal failure (0 \
160 tool_calls). Remove the \"off\" override(s) for tool tasks.",
161 offending.join("/")
162 ),
163 });
164 }
165 }
166 }
167
168 // Footgun 2: lottery-route without a clean sub-provider pin. An
169 // OpenRouter Harmony-style tool route must allowlist known-clean
170 // upstreams or it can silently land on a mis-serializing one.
171 if provider == "openrouter" && reasoning_required_for_tools {
172 let pinned = rule
173 .openrouter_provider_order
174 .as_ref()
175 .map(|order| !order.is_empty())
176 .unwrap_or(false);
177 if !pinned {
178 report.footguns.push(CapabilityFootgun {
179 provider: provider.clone(),
180 model_match: rule.model_match.clone(),
181 message: "is an OpenRouter route with \
182 reasoning_required_for_tools = true (a Harmony-style tool route on \
183 the OpenRouter sub-provider lottery) but declares no \
184 openrouter_provider_order pin. Some OpenRouter upstreams \
185 mis-serialize the tool call even with reasoning ON. Pin a closed \
186 allowlist of known-clean upstreams, e.g. \
187 openrouter_provider_order = [\"Cerebras\", \"Groq\"]."
188 .to_string(),
189 });
190 }
191 }
192
193 // Footgun 3: native tool declaration contradictions. These fields
194 // describe native tool-call request shape and must not be set on a
195 // text-tool-only row.
196 if rule
197 .preferred_tool_format
198 .as_deref()
199 .map(|format| format.eq_ignore_ascii_case("native"))
200 .unwrap_or(false)
201 && !rule.native_tools.unwrap_or(false)
202 {
203 report.footguns.push(CapabilityFootgun {
204 provider: provider.clone(),
205 model_match: rule.model_match.clone(),
206 message: "declares preferred_tool_format = \"native\" without \
207 native_tools = true. Native tool format is only coherent \
208 for rows that enable native tool calls; either set \
209 native_tools = true or choose a text-channel tool format."
210 .to_string(),
211 });
212 }
213
214 if rule
215 .allowed_tool_choice_modes
216 .as_ref()
217 .map(|modes| !modes.is_empty())
218 .unwrap_or(false)
219 && !rule.native_tools.unwrap_or(false)
220 {
221 report.footguns.push(CapabilityFootgun {
222 provider: provider.clone(),
223 model_match: rule.model_match.clone(),
224 message: "declares allowed_tool_choice_modes while native_tools is \
225 not true. Tool-choice modes are native request-shape \
226 capabilities; enable native_tools or remove the native \
227 tool-choice declaration."
228 .to_string(),
229 });
230 }
231
232 // Footgun 4: a route pins the provider-native tool channel for a model
233 // family whose native channel is unreliable as a weight-intrinsic
234 // property (see NATIVE_UNRELIABLE_TOOL_FAMILIES). One outlier host
235 // pinning `native` while every sibling host pins text is exactly how a
236 // value model silently thrashes (the model leaks tool markup into
237 // content / bills empty native completions, and this host can't fix it
238 // server-side). The family verdict must hold on every route.
239 let pins_native = rule
240 .preferred_tool_format
241 .as_deref()
242 .map(|format| format.eq_ignore_ascii_case("native"))
243 .unwrap_or(false);
244 if pins_native {
245 let model_match_lower = rule.model_match.to_ascii_lowercase();
246 for (family, evidence) in NATIVE_UNRELIABLE_TOOL_FAMILIES {
247 if model_match_lower.contains(family) {
248 report.footguns.push(CapabilityFootgun {
249 provider: provider.clone(),
250 model_match: rule.model_match.clone(),
251 message: format!(
252 "pins preferred_tool_format = \"native\" for the \
253 native-unreliable `{family}` family. {evidence} Steer this \
254 route to a text channel (preferred_tool_format = \"text\" or \
255 \"json\") and set tool_mode_parity = \"native_unreliable\" so \
256 the family verdict is consistent across hosts."
257 ),
258 });
259 }
260 }
261 }
262 }
263 }
264 report
265}
266
267/// Audit the built-in (shipped) capability matrix. Convenience entry point for
268/// the CLI gate.
269pub fn audit_builtin() -> CapabilityAuditReport {
270 audit_capabilities(crate::llm::capabilities::builtin_file())
271}
272
273#[cfg(test)]
274mod tests {
275 use super::*;
276 use crate::llm::capabilities::parse_capabilities_toml;
277
278 fn audit_toml(src: &str) -> CapabilityAuditReport {
279 audit_capabilities(&parse_capabilities_toml(src).expect("parses"))
280 }
281
282 #[test]
283 fn shipped_matrix_has_no_footguns() {
284 let report = audit_builtin();
285 assert!(
286 report.is_clean(),
287 "shipped capability matrix has footguns:\n{}",
288 report.render()
289 );
290 }
291
292 #[test]
293 fn flags_reasoning_off_for_tools_contradiction() {
294 let report = audit_toml(
295 r#"
296[[provider.someprov]]
297model_match = "harmony-*"
298reasoning_required_for_tools = true
299auto_reasoning_overrides = { agent = "off" }
300"#,
301 );
302 assert_eq!(report.footguns.len(), 1, "{}", report.render());
303 assert_eq!(report.footguns[0].provider, "someprov");
304 assert!(report.footguns[0].message.contains("billed-noncommittal"));
305 }
306
307 #[test]
308 fn flags_lottery_route_without_pin() {
309 let report = audit_toml(
310 r#"
311[[provider.openrouter]]
312model_match = "vendor/harmony-*"
313reasoning_required_for_tools = true
314reasoning_effort_levels = ["low", "medium", "high"]
315"#,
316 );
317 assert_eq!(report.footguns.len(), 1, "{}", report.render());
318 assert!(report.footguns[0]
319 .message
320 .contains("openrouter_provider_order"));
321 }
322
323 #[test]
324 fn pinned_lottery_route_is_clean() {
325 let report = audit_toml(
326 r#"
327[[provider.openrouter]]
328model_match = "vendor/harmony-*"
329reasoning_required_for_tools = true
330openrouter_provider_order = ["Cerebras", "Groq"]
331"#,
332 );
333 assert!(report.is_clean(), "{}", report.render());
334 }
335
336 #[test]
337 fn empty_pin_is_treated_as_no_pin() {
338 let report = audit_toml(
339 r#"
340[[provider.openrouter]]
341model_match = "vendor/harmony-*"
342reasoning_required_for_tools = true
343openrouter_provider_order = []
344"#,
345 );
346 assert_eq!(report.footguns.len(), 1, "{}", report.render());
347 }
348
349 #[test]
350 fn non_openrouter_required_route_does_not_need_a_pin() {
351 // Groq/Cerebras/Together gpt-oss rows require reasoning for tools but
352 // are NOT on the OpenRouter lottery, so they must not be flagged for a
353 // missing pin.
354 let report = audit_toml(
355 r#"
356[[provider.groq]]
357model_match = "*gpt-oss-*"
358reasoning_required_for_tools = true
359reasoning_effort_levels = ["low", "medium", "high"]
360"#,
361 );
362 assert!(report.is_clean(), "{}", report.render());
363 }
364
365 #[test]
366 fn qwen_style_off_override_without_required_flag_is_clean() {
367 // The Qwen quirk (reasoning-OFF-for-tools, no required-for-tools flag)
368 // is a legitimate config and must NOT be flagged.
369 let report = audit_toml(
370 r#"
371[[provider.ollama]]
372model_match = "qwen3.6*"
373auto_reasoning_overrides = { agent = "off" }
374"#,
375 );
376 assert!(report.is_clean(), "{}", report.render());
377 }
378
379 #[test]
380 fn ordinary_models_are_clean() {
381 let report = audit_toml(
382 r#"
383[[provider.openrouter]]
384model_match = "anthropic/claude-*"
385native_tools = true
386
387[[provider.openai]]
388model_match = "gpt-*"
389native_tools = true
390"#,
391 );
392 assert!(report.is_clean(), "{}", report.render());
393 }
394
395 #[test]
396 fn flags_native_tool_format_without_native_tools() {
397 let report = audit_toml(
398 r#"
399[[provider.someprov]]
400model_match = "some-model"
401native_tools = false
402preferred_tool_format = "native"
403"#,
404 );
405 assert_eq!(report.footguns.len(), 1, "{}", report.render());
406 assert!(report.footguns[0]
407 .message
408 .contains("preferred_tool_format = \"native\""));
409 }
410
411 #[test]
412 fn flags_native_unreliable_family_pinning_native() {
413 // A GLM-5 route that pins the native channel (the nvidia outlier shape):
414 // native_tools = true keeps Footgun 3 quiet, so the ONLY footgun is the
415 // family-consistency gate.
416 let report = audit_toml(
417 r#"
418[[provider.nvidia]]
419model_match = "*glm-5*"
420native_tools = true
421preferred_tool_format = "native"
422"#,
423 );
424 assert_eq!(report.footguns.len(), 1, "{}", report.render());
425 assert!(report.footguns[0]
426 .message
427 .contains("native-unreliable `glm-5` family"));
428 }
429
430 #[test]
431 fn native_unreliable_family_on_text_channel_is_clean() {
432 // The family verdict satisfied: text channel + native_unreliable.
433 let report = audit_toml(
434 r#"
435[[provider.nvidia]]
436model_match = "*glm-5*"
437native_tools = true
438preferred_tool_format = "text"
439tool_mode_parity = "native_unreliable"
440"#,
441 );
442 assert!(report.is_clean(), "{}", report.render());
443 }
444
445 #[test]
446 fn native_pin_for_non_family_model_is_clean() {
447 // A native pin is fine for a model NOT in the native-unreliable family
448 // list — the gate is scoped to families with weight-intrinsic evidence.
449 let report = audit_toml(
450 r#"
451[[provider.someprov]]
452model_match = "some-reliable-native-model-*"
453native_tools = true
454preferred_tool_format = "native"
455"#,
456 );
457 assert!(report.is_clean(), "{}", report.render());
458 }
459
460 #[test]
461 fn flags_tool_choice_modes_without_native_tools() {
462 let report = audit_toml(
463 r#"
464[[provider.someprov]]
465model_match = "some-model"
466native_tools = false
467preferred_tool_format = "text"
468allowed_tool_choice_modes = ["auto", "none"]
469"#,
470 );
471 assert_eq!(report.footguns.len(), 1, "{}", report.render());
472 assert!(report.footguns[0]
473 .message
474 .contains("allowed_tool_choice_modes"));
475 }
476}