harn_vm/llm/
capabilities.rs

1//! Data-driven provider capabilities.
2//!
3//! The per-(provider, model) capability matrix (native tools, deferred
4//! tool loading, tool-search variants, prompt caching, extended thinking,
5//! max tool count) lives in `capability_sources/**/*.toml`, which generates
6//! the shipped `capabilities.toml` snapshot, and is
7//! overridable per-project via `[[capabilities.provider.<name>]]` blocks
8//! in `harn.toml`. This module owns:
9//!
10//! - loading the built-in TOML (compiled in via `include_str!`);
11//! - merging user overrides on top;
12//! - matching a `(provider, model)` pair against the rule list with
13//!   glob + semver semantics;
14//! - exposing a stable `Capabilities` struct that the `LlmProvider`
15//!   trait delegates to as the single source of truth.
16//!
17//! Provider adapters still supply generation parsers for `version_min`, but
18//! feature gates live in this data table instead of adapter-specific boolean
19//! branches.
20
21use std::cell::RefCell;
22use std::collections::{BTreeMap, HashSet};
23use std::sync::OnceLock;
24
25use serde::{Deserialize, Serialize};
26
27use super::providers::anthropic::claude_generation;
28use super::providers::openai_compat::gpt_generation;
29
30/// Generated shipped default rules. Compiled into the binary at build time.
31const BUILTIN_TOML: &str = include_str!("capabilities.toml");
32/// Generated provider/model snapshot built from catalog_sources/**/*.toml.
33const BUILTIN_PROVIDERS_TOML: &str = include_str!("providers.toml");
34
35/// Parsed on-disk capabilities schema. Public so harn-cli can
36/// construct one directly when wiring harn.toml overrides.
37#[derive(Debug, Clone, Deserialize, Default)]
38pub struct CapabilitiesFile {
39    /// Per-provider ordered rule lists. First matching rule wins.
40    #[serde(default)]
41    pub provider: BTreeMap<String, Vec<ProviderRule>>,
42    /// Per-provider defaults applied to every matching row and to
43    /// provider/model pairs that have no model-specific row. This keeps
44    /// transport-shape facts in data without repeating them on every
45    /// generation-specific capability row.
46    #[serde(default)]
47    pub provider_defaults: BTreeMap<String, ProviderDefaults>,
48    /// Sibling → canonical family mapping. Providers with no rule of
49    /// their own fall through to the named family (recursively).
50    #[serde(default)]
51    pub provider_family: BTreeMap<String, String>,
52}
53
54/// Provider-wide default fields merged into matching rules.
55#[derive(Debug, Clone, Deserialize, Default)]
56pub struct ProviderDefaults {
57    /// Message/request/response wire format used by shared helpers.
58    /// Known values are `openai`, `anthropic`, `gemini`, and `ollama`.
59    #[serde(default)]
60    pub message_wire_format: Option<String>,
61    /// Native tool definition wire shape. Known values are `openai`
62    /// and `anthropic`.
63    #[serde(default)]
64    pub native_tool_wire_format: Option<String>,
65    /// Whether image content blocks may reference remote URLs.
66    #[serde(default)]
67    pub image_url_input_supported: Option<bool>,
68    /// File-upload transport used by `std/files.upload`. Known values
69    /// are `anthropic` and `gemini`.
70    #[serde(default)]
71    pub file_upload_wire_format: Option<String>,
72    /// Provider-specific reasoning request shape for OpenAI-compatible
73    /// transports. Known values are `openrouter` and `enabled`.
74    #[serde(default)]
75    pub reasoning_wire_format: Option<String>,
76    #[serde(default)]
77    pub files_api_supported: Option<bool>,
78    #[serde(default)]
79    pub seed_supported: Option<bool>,
80    #[serde(default)]
81    pub top_k_supported: Option<bool>,
82    #[serde(default)]
83    pub temperature_supported: Option<bool>,
84    #[serde(default)]
85    pub top_p_supported: Option<bool>,
86    #[serde(default)]
87    pub frequency_penalty_supported: Option<bool>,
88    #[serde(default)]
89    pub presence_penalty_supported: Option<bool>,
90}
91
92/// Copies `src` into `dst` when `src` is set (last-writer-wins overlay).
93fn overlay_opt<T: Clone>(dst: &mut Option<T>, src: &Option<T>) {
94    if src.is_some() {
95        dst.clone_from(src);
96    }
97}
98
99/// Copies `src` into `dst` only when `dst` is still unset (fill-the-gaps).
100fn fill_opt<T: Clone>(dst: &mut Option<T>, src: &Option<T>) {
101    if dst.is_none() {
102        dst.clone_from(src);
103    }
104}
105
106/// Visits every `ProviderDefaults` field once, applying `$op` (`overlay_opt`
107/// or `fill_opt`) to each `(dst, src)` pair. The field roster lives here only;
108/// `overlay`/`fill_missing_from` differ solely in the merge rule they pass.
109macro_rules! merge_provider_defaults {
110    ($dst:expr, $src:expr, $op:path) => {{
111        $op(&mut $dst.message_wire_format, &$src.message_wire_format);
112        $op(
113            &mut $dst.native_tool_wire_format,
114            &$src.native_tool_wire_format,
115        );
116        $op(
117            &mut $dst.image_url_input_supported,
118            &$src.image_url_input_supported,
119        );
120        $op(
121            &mut $dst.file_upload_wire_format,
122            &$src.file_upload_wire_format,
123        );
124        $op(&mut $dst.reasoning_wire_format, &$src.reasoning_wire_format);
125        $op(&mut $dst.files_api_supported, &$src.files_api_supported);
126        $op(&mut $dst.seed_supported, &$src.seed_supported);
127        $op(&mut $dst.top_k_supported, &$src.top_k_supported);
128        $op(&mut $dst.temperature_supported, &$src.temperature_supported);
129        $op(&mut $dst.top_p_supported, &$src.top_p_supported);
130        $op(
131            &mut $dst.frequency_penalty_supported,
132            &$src.frequency_penalty_supported,
133        );
134        $op(
135            &mut $dst.presence_penalty_supported,
136            &$src.presence_penalty_supported,
137        );
138    }};
139}
140
141impl ProviderDefaults {
142    fn overlay(&mut self, other: &ProviderDefaults) {
143        merge_provider_defaults!(self, other, overlay_opt);
144    }
145
146    fn fill_missing_from(&mut self, other: &ProviderDefaults) {
147        merge_provider_defaults!(self, other, fill_opt);
148    }
149
150    fn has_any_field(&self) -> bool {
151        self.message_wire_format.is_some()
152            || self.native_tool_wire_format.is_some()
153            || self.image_url_input_supported.is_some()
154            || self.file_upload_wire_format.is_some()
155            || self.reasoning_wire_format.is_some()
156            || self.files_api_supported.is_some()
157            || self.seed_supported.is_some()
158            || self.top_k_supported.is_some()
159            || self.temperature_supported.is_some()
160            || self.top_p_supported.is_some()
161            || self.frequency_penalty_supported.is_some()
162            || self.presence_penalty_supported.is_some()
163    }
164}
165
166/// One row of the capability matrix.
167#[derive(Debug, Clone, Deserialize)]
168pub struct ProviderRule {
169    /// Glob pattern (supports leading / trailing `*` and a single mid-`*`).
170    /// Matched case-insensitively against the model ID.
171    pub model_match: String,
172    /// Optional `[major, minor]` lower bound. When set, the model ID
173    /// must parse via the provider's version extractor AND compare ≥
174    /// this tuple. Rules with an unparseable `version_min` for the
175    /// given model are skipped, not merged.
176    #[serde(default)]
177    pub version_min: Option<Vec<u32>>,
178    #[serde(default)]
179    pub native_tools: Option<bool>,
180    /// Message/request/response wire format used by shared helpers.
181    /// Known values are `openai`, `anthropic`, `gemini`, and `ollama`.
182    #[serde(default)]
183    pub message_wire_format: Option<String>,
184    /// Native tool definition wire shape. Known values are `openai`
185    /// and `anthropic`.
186    #[serde(default)]
187    pub native_tool_wire_format: Option<String>,
188    #[serde(default)]
189    pub defer_loading: Option<bool>,
190    #[serde(default)]
191    pub tool_search: Option<Vec<String>>,
192    /// Whether Harn supports this route through the provider's native
193    /// Responses-style API instead of generic chat completions.
194    #[serde(default)]
195    pub responses_api: Option<bool>,
196    /// Provider-hosted tools Harn can pass through without local execution.
197    #[serde(default)]
198    pub hosted_tools: Option<Vec<String>>,
199    /// Whether provider-hosted remote MCP connectors can be mediated by the
200    /// provider for this route.
201    #[serde(default)]
202    pub remote_mcp: Option<bool>,
203    /// Whether provider-managed previous-response conversation state is
204    /// available.
205    #[serde(default)]
206    pub conversation_state: Option<bool>,
207    /// Whether provider-side truncation/compaction controls are available.
208    #[serde(default)]
209    pub compaction: Option<bool>,
210    /// Whether provider-side background Responses jobs are available.
211    #[serde(default)]
212    pub background_mode: Option<bool>,
213    /// Approval policy modes available when provider-hosted tools execute.
214    #[serde(default)]
215    pub tool_approval_policy: Option<String>,
216    #[serde(default)]
217    pub max_tools: Option<u32>,
218    #[serde(default)]
219    pub prompt_caching: Option<bool>,
220    /// Request-side cache breakpoint strategy for routes that require
221    /// `cache_control` to opt into provider prompt caching. Known values are
222    /// `none`, `top_level`, and `last_block`.
223    #[serde(default)]
224    pub cache_breakpoint_style: Option<String>,
225    /// Whether this provider/model route accepts image or other visual
226    /// input blocks through Harn's LLM message path.
227    #[serde(default)]
228    pub vision: Option<bool>,
229    /// Whether this provider/model route accepts audio input blocks
230    /// through Harn's LLM message path.
231    #[serde(default, alias = "audio_supported")]
232    pub audio: Option<bool>,
233    /// Whether this provider/model route accepts PDF/document input blocks
234    /// through Harn's LLM message path.
235    #[serde(default, alias = "pdf_supported")]
236    pub pdf: Option<bool>,
237    /// Whether this provider/model route accepts video input blocks
238    /// through Harn's LLM message path.
239    #[serde(default, alias = "video_supported")]
240    pub video: Option<bool>,
241    /// Whether uploaded file references can be reused in message content.
242    #[serde(default)]
243    pub files_api_supported: Option<bool>,
244    /// File-upload transport used by `std/files.upload`. Known values
245    /// are `anthropic` and `gemini`.
246    #[serde(default)]
247    pub file_upload_wire_format: Option<String>,
248    /// Structured-output transport strategy. Known values are:
249    /// `native`, `tool_use`, `format_kw`, and `none`.
250    #[serde(default)]
251    pub structured_output: Option<String>,
252    /// Legacy name retained for project overrides written before
253    /// `structured_output` became the canonical capability.
254    #[serde(default)]
255    pub json_schema: Option<String>,
256    /// Whether prompt sections should prefer XML-style tags such as
257    /// `<task>` / `<examples>` over Markdown headings.
258    #[serde(default)]
259    pub prefers_xml_scaffolding: Option<bool>,
260    /// Whether this model's tokenizer reserves `<tool_call>` / `</tool_call>`
261    /// as single special tokens (the native Hermes tool-call markers). When
262    /// true, harn remaps those delimiters to a non-special bracket form on the
263    /// wire to avoid degenerate opener repetition; see [`crate::llm::tool_delimiter`].
264    #[serde(default)]
265    pub reserved_tool_call_token: Option<bool>,
266    /// Whether prompt sections should prefer Markdown headings such as
267    /// `## Task` / `## Examples`.
268    #[serde(default)]
269    pub prefers_markdown_scaffolding: Option<bool>,
270    /// Preferred logical structured-output prompt shape. This is separate
271    /// from the transport-level `structured_output` strategy above.
272    /// Known values are `native_json`, `delimited`, and `xml_tagged`.
273    #[serde(default)]
274    pub structured_output_mode: Option<String>,
275    /// Whether the route accepts an assistant-role prefill message.
276    #[serde(default)]
277    pub supports_assistant_prefill: Option<bool>,
278    /// Whether durable instructions should use OpenAI's `developer` role
279    /// instead of `system`.
280    #[serde(default)]
281    pub prefers_role_developer: Option<bool>,
282    /// Whether text-rendered tool specifications should use XML wrappers
283    /// instead of JSON-schema prose.
284    #[serde(default)]
285    pub prefers_xml_tools: Option<bool>,
286    /// Preferred representation for model thinking/reasoning blocks in
287    /// transcript-like prompt context. Known values are `none`,
288    /// `thinking_blocks`, `reasoning_summary`, and `inline`.
289    #[serde(default)]
290    pub thinking_block_style: Option<String>,
291    /// Supported thinking/reasoning modes for this rule. Values are
292    /// script-facing mode names: `enabled`, `adaptive`, and `effort`.
293    #[serde(default)]
294    pub thinking_modes: Option<Vec<String>>,
295    /// Whether Anthropic interleaved thinking is supported for this
296    /// provider/model route.
297    #[serde(default)]
298    pub interleaved_thinking_supported: Option<bool>,
299    /// Anthropic beta features that should be requested for this route.
300    #[serde(default)]
301    pub anthropic_beta_features: Option<Vec<String>>,
302    /// Legacy override compatibility. New built-in rules should use
303    /// `thinking_modes` so the capability matrix preserves mode detail.
304    #[serde(default)]
305    pub thinking: Option<bool>,
306    /// Whether the model accepts image inputs in chat content.
307    #[serde(default)]
308    pub vision_supported: Option<bool>,
309    /// Whether image content blocks may reference remote URLs.
310    #[serde(default)]
311    pub image_url_input_supported: Option<bool>,
312    /// Carry `<think>...</think>` blocks in assistant history across turns.
313    /// Qwen3.6 exposes this as `chat_template_kwargs.preserve_thinking`;
314    /// Alibaba recommends enabling it for long-horizon agent loops so the
315    /// model doesn't re-derive context it already worked out in prior turns.
316    /// Anthropic's adaptive-thinking signature contract is stricter but plays
317    /// the same role there.
318    #[serde(default)]
319    pub preserve_thinking: Option<bool>,
320    /// Name of any server-side response parser that can transform model
321    /// bytes before Harn sees them. `none` means the provider returns the
322    /// model text/tool channel without an implicit parser.
323    #[serde(default)]
324    pub server_parser: Option<String>,
325    /// Whether provider-specific chat-template options are honored. Most
326    /// OpenAI-compatible servers call this `chat_template_kwargs`; Baseten's
327    /// Model APIs spell the same concept `chat_template_args`.
328    #[serde(default)]
329    pub honors_chat_template_kwargs: Option<bool>,
330    /// Request body field for provider-specific chat-template options when it
331    /// differs from the default `chat_template_kwargs`.
332    #[serde(default)]
333    pub chat_template_options_field: Option<String>,
334    /// Whether this route requires OpenAI's `max_completion_tokens`
335    /// request field instead of legacy `max_tokens`.
336    #[serde(default)]
337    pub requires_completion_tokens: Option<bool>,
338    /// Whether this route rejects non-streaming chat-completion requests.
339    /// Harn forces streaming for such routes so callers can keep provider-
340    /// neutral `stream` preferences.
341    #[serde(default)]
342    pub requires_streaming: Option<bool>,
343    /// Whether this route accepts Harn's provider-neutral reasoning effort
344    /// control. Providers project this to their native field (for example
345    /// OpenAI `reasoning_effort` or Anthropic `output_config.effort`).
346    #[serde(default)]
347    pub reasoning_effort_supported: Option<bool>,
348    /// Accepted effort values for routes that expose a narrower subset than
349    /// Harn's provider-neutral enum. Empty means "unknown/all".
350    #[serde(default)]
351    pub reasoning_effort_levels: Option<Vec<String>>,
352    /// Whether this route accepts effort "none" as a true reasoning-off
353    /// setting. Older GPT-5 variants support effort but only floor at
354    /// `minimal`.
355    #[serde(default)]
356    pub reasoning_none_supported: Option<bool>,
357    /// Maximum thinking-budget tokens this model accepts for its high/xhigh/max
358    /// reasoning levels, when the provider takes an explicit token budget rather
359    /// than an effort enum. The canonical case is the native Gemini API
360    /// `generationConfig.thinkingConfig.thinkingBudget` field, whose ceiling
361    /// differs by model (Gemini 2.5 Flash caps at 24576, Pro at 32768).
362    /// Declared alongside the model's other wire capabilities instead of a
363    /// hard-coded `model.contains("flash")` branch in the provider.
364    #[serde(default)]
365    pub max_thinking_budget: Option<i64>,
366    /// Whether this route accepts an explicit disabled/off reasoning switch.
367    /// Some routes require reasoning and reject the provider's disabled shape.
368    #[serde(default)]
369    pub reasoning_disable_supported: Option<bool>,
370    /// Whether this model performs *tool calls inside its reasoning channel*,
371    /// so disabling reasoning silently breaks tool calling. The canonical case
372    /// is the OpenAI gpt-oss (Harmony) family: with reasoning disabled it emits
373    /// 0 tool_calls and a tiny billed-noncommittal completion; with reasoning
374    /// enabled (even `low`) it emits clean native tool calls. This is the
375    /// *opposite* of the Qwen3 quirk (Qwen narrates tool intent in the
376    /// reasoning trace and emits zero `tool_calls`, so Qwen needs reasoning
377    /// OFF for tools). When set, `reasoning_policy` refuses to downgrade the
378    /// auto reasoning level to `off` for tool-bearing tasks (agent/code/verify)
379    /// — flooring instead to the lowest supported effort — so no future
380    /// auto-policy default or session pin can re-introduce the
381    /// billed-noncommittal failure at the data layer.
382    #[serde(default)]
383    pub reasoning_required_for_tools: Option<bool>,
384    /// Whether reasoning-only clean stops may be promoted into visible text.
385    /// Disable this for providers whose `reasoning` field is always private
386    /// trace, even when `content` is empty.
387    #[serde(default)]
388    pub reasoning_text_promotable: Option<bool>,
389    /// Provider-specific reasoning request shape for OpenAI-compatible
390    /// transports. Known values are `openrouter`, `enabled`, and `minimax`.
391    #[serde(default)]
392    pub reasoning_wire_format: Option<String>,
393    #[serde(default)]
394    pub seed_supported: Option<bool>,
395    #[serde(default)]
396    pub top_k_supported: Option<bool>,
397    #[serde(default)]
398    pub temperature_supported: Option<bool>,
399    #[serde(default)]
400    pub top_p_supported: Option<bool>,
401    #[serde(default)]
402    pub frequency_penalty_supported: Option<bool>,
403    #[serde(default)]
404    pub presence_penalty_supported: Option<bool>,
405    /// Accepted provider-native `tool_choice` modes. Empty means unrestricted
406    /// or unknown. Use this for routes whose native tools work, but whose API
407    /// rejects forced/specified tool choices.
408    #[serde(default)]
409    pub allowed_tool_choice_modes: Option<Vec<String>>,
410    /// Whether an assistant `tool_calls` message must be followed immediately
411    /// by `role=tool` messages for every emitted `tool_call_id`.
412    #[serde(default)]
413    pub requires_tool_result_adjacency: Option<bool>,
414    /// Whether a single assistant message may contain multiple tool calls.
415    /// Some OpenAI-compatible providers reject replayed history with more than
416    /// one `tool_calls[]` entry even when the calls were parsed from Harn's text
417    /// tool protocol, so the request builder must serialize history as
418    /// one-call assistant turns for those routes.
419    #[serde(default)]
420    pub supports_parallel_tool_calls: Option<bool>,
421    /// Whether the route rejects `response_format` when native `tools` are
422    /// present. Strict OpenAI-compatible servers such as Cerebras accept each
423    /// feature alone but reject the pair together.
424    #[serde(default)]
425    pub tools_exclude_response_format: Option<bool>,
426    /// Preferred endpoint family for this provider/model route. Values
427    /// are descriptive labels consumed by providers, e.g.
428    /// `/api/generate-raw` for Ollama raw prompt bypass.
429    #[serde(default)]
430    pub recommended_endpoint: Option<String>,
431    /// Whether Harn's text-tool protocol (`<tool_call>name({...})`) can
432    /// survive the provider route and return in the visible response body.
433    #[serde(default)]
434    pub text_tool_wire_format_supported: Option<bool>,
435    /// Preferred tool-calling mode for this provider/model route when
436    /// callers do not explicitly choose `tool_format`. This lets the
437    /// capability matrix route around known provider-native regressions
438    /// without making presets branch on model names.
439    #[serde(default)]
440    pub preferred_tool_format: Option<String>,
441    /// Empirical native/text interchangeability status for this route.
442    /// Known values are descriptive, not gates: `interchangeable`,
443    /// `native_unreliable`, `text_unreliable`, `native_only`,
444    /// `text_only`, and `unknown`.
445    #[serde(default)]
446    pub tool_mode_parity: Option<String>,
447    /// Short human-readable note explaining `tool_mode_parity`.
448    #[serde(default)]
449    pub tool_mode_parity_notes: Option<String>,
450    /// In-prompt directive that disables this model's "thinking" mode when
451    /// the API doesn't expose a first-class field (or exposes it
452    /// inconsistently across templates / quantizations). For Qwen3 family
453    /// chat templates this is `/no_think`. When `thinking: false` is
454    /// requested and this is set, Harn auto-prepends the directive to the
455    /// system message so script authors don't need to know it exists.
456    #[serde(default)]
457    pub thinking_disable_directive: Option<String>,
458    /// Per-task auto-policy reasoning-level overrides for this route.
459    /// Keys are task labels (`agent`, `verify`, `chat`, `summarize`,
460    /// `code`); values are reasoning levels (`off`, `minimal`, `low`,
461    /// `medium`, `high`, `xhigh`, `max`). Consulted by `reasoning_policy` only
462    /// when policy resolves to `auto` — explicit policies always win.
463    ///
464    /// Use this to declare known per-model regressions that should
465    /// flip the auto-policy default, instead of hard-coding the model/
466    /// provider pattern in resolver code. The canonical example is the
467    /// Qwen3 tool-call regression — `{ agent = "off" }` disables
468    /// reasoning whenever a script registers tools with that route,
469    /// matching Qwen's own published guidance.
470    #[serde(default)]
471    pub auto_reasoning_overrides: Option<BTreeMap<String, String>>,
472    /// OpenRouter upstream provider names that must be excluded from routing
473    /// for this `(provider, model)` row. Materialized into the request body's
474    /// `provider.ignore` array (see
475    /// [`crate::llm::providers::openai_compat::apply_openrouter_route_denylist`]).
476    /// This is a data-driven route-around for upstreams that serve a route
477    /// incorrectly while still advertising the model — the canonical case is
478    /// OpenRouter's `Ambient` upstream billing reasoning tokens for
479    /// `qwen/qwen3.6-35b-a3b` and then finishing with empty `tool_calls`,
480    /// while Parasail / AtlasCloud / AkashML serve the identical request
481    /// natively. Only consulted for the `openrouter` provider.
482    #[serde(default)]
483    pub provider_route_denylist: Option<Vec<String>>,
484    /// OpenRouter upstream provider names this `(provider, model)` row is
485    /// PINNED to, in preference order. Materialized into the request body's
486    /// `provider.order` array with `allow_fallbacks = false` (see
487    /// [`crate::llm::providers::openai_compat::apply_openrouter_provider_order`]),
488    /// so OpenRouter only ever routes the model to these known-clean upstreams
489    /// and never silently falls back to a sketchier one. This is the
490    /// *allowlist* counterpart to [`Self::provider_route_denylist`]: prefer it
491    /// when the bad upstreams are intermittent / hard to enumerate but the
492    /// clean ones are few and stable. The canonical case is OpenRouter's
493    /// `openai/gpt-oss-*` route, which fans out across ~17 upstreams in a
494    /// sub-provider lottery; some mis-serialize the Harmony tool call even with
495    /// reasoning ON (billed-noncommittal: 0 tool_calls), while Cerebras and
496    /// Groq serve it cleanly. Only consulted for the `openrouter` provider. An
497    /// empty / unset list means "no pin" (free OpenRouter routing). When both a
498    /// pin and a denylist are present the pin wins (a closed allowlist already
499    /// excludes everything not on it). Validated by the footgun gate in
500    /// [`crate::llm::capability_audit`].
501    #[serde(default)]
502    pub openrouter_provider_order: Option<Vec<String>>,
503    /// Serving-quality / precision trust verdict for this `(provider, model)`
504    /// route. A provider can be live and fast yet still serve a model at
505    /// DEGRADED quality (e.g. an undocumented quantization) or reject otherwise
506    /// valid requests, silently contaminating any eval/meter that trusts its
507    /// numbers. This is the data-driven sibling of [`Self::provider_route_denylist`]
508    /// / [`Self::openrouter_provider_order`]: instead of routing *around* a bad
509    /// upstream, it labels the route's measured precision so tooling (the
510    /// meter precision canary) can refuse to trust a `degraded` route and flag a
511    /// `throttled` one. Known values are `trusted` (full-precision verified
512    /// against a reference), `degraded` (proven to serve at reduced quality),
513    /// `throttled` (full-precision but rate-limited to unusable timing), and
514    /// `unverified` (no verdict — treated the same as unset). Unset means
515    /// `unverified`.
516    #[serde(default)]
517    pub serving_precision: Option<String>,
518}
519
520/// Resolved capabilities for a `(provider, model)` pair. Unset rule
521/// fields resolve to `false` / empty / `None` so callers never have to
522/// unwrap an `Option<bool>` for what are really boolean gates.
523#[derive(Debug, Clone, PartialEq, Eq)]
524pub struct Capabilities {
525    pub native_tools: bool,
526    pub message_wire_format: String,
527    pub native_tool_wire_format: String,
528    pub defer_loading: bool,
529    pub tool_search: Vec<String>,
530    pub responses_api: bool,
531    pub hosted_tools: Vec<String>,
532    pub remote_mcp: bool,
533    pub conversation_state: bool,
534    pub compaction: bool,
535    pub background_mode: bool,
536    pub tool_approval_policy: Option<String>,
537    pub max_tools: Option<u32>,
538    pub prompt_caching: bool,
539    pub cache_breakpoint_style: String,
540    pub vision: bool,
541    pub audio: bool,
542    pub pdf: bool,
543    pub video: bool,
544    pub files_api_supported: bool,
545    pub file_upload_wire_format: Option<String>,
546    pub structured_output: Option<String>,
547    /// Legacy mirror for CLI display and older callers.
548    pub json_schema: Option<String>,
549    pub prefers_xml_scaffolding: bool,
550    /// See [`ProviderRule::reserved_tool_call_token`].
551    pub reserved_tool_call_token: bool,
552    pub prefers_markdown_scaffolding: bool,
553    pub structured_output_mode: String,
554    pub supports_assistant_prefill: bool,
555    pub prefers_role_developer: bool,
556    pub prefers_xml_tools: bool,
557    pub thinking_block_style: String,
558    pub thinking_modes: Vec<String>,
559    pub interleaved_thinking_supported: bool,
560    pub anthropic_beta_features: Vec<String>,
561    pub vision_supported: bool,
562    pub image_url_input_supported: bool,
563    pub preserve_thinking: bool,
564    pub server_parser: String,
565    pub honors_chat_template_kwargs: bool,
566    pub chat_template_options_field: Option<String>,
567    pub requires_completion_tokens: bool,
568    pub requires_streaming: bool,
569    pub reasoning_effort_supported: bool,
570    pub reasoning_effort_levels: Vec<String>,
571    pub reasoning_none_supported: bool,
572    /// See [`ProviderRule::max_thinking_budget`]. `None` means the model uses
573    /// the provider's own default ceiling.
574    pub max_thinking_budget: Option<i64>,
575    pub reasoning_disable_supported: bool,
576    /// See [`ProviderRule::reasoning_required_for_tools`].
577    pub reasoning_required_for_tools: bool,
578    pub reasoning_text_promotable: bool,
579    pub reasoning_wire_format: Option<String>,
580    pub seed_supported: bool,
581    pub top_k_supported: bool,
582    pub temperature_supported: bool,
583    pub top_p_supported: bool,
584    pub frequency_penalty_supported: bool,
585    pub presence_penalty_supported: bool,
586    pub allowed_tool_choice_modes: Vec<String>,
587    pub requires_tool_result_adjacency: bool,
588    pub supports_parallel_tool_calls: bool,
589    pub tools_exclude_response_format: bool,
590    pub recommended_endpoint: Option<String>,
591    pub text_tool_wire_format_supported: bool,
592    pub preferred_tool_format: Option<String>,
593    pub tool_mode_parity: Option<String>,
594    pub tool_mode_parity_notes: Option<String>,
595    pub thinking_disable_directive: Option<String>,
596    /// Per-task auto-policy reasoning-level overrides for this route.
597    /// See [`ProviderRule::auto_reasoning_overrides`].
598    pub auto_reasoning_overrides: BTreeMap<String, String>,
599    /// OpenRouter upstream provider names to exclude from routing for this
600    /// row. See [`ProviderRule::provider_route_denylist`]. Empty means "no
601    /// route restriction".
602    pub provider_route_denylist: Vec<String>,
603    /// OpenRouter upstream provider names this row is PINNED to (allowlist), in
604    /// preference order. See [`ProviderRule::openrouter_provider_order`]. Empty
605    /// means "no pin" (free OpenRouter routing).
606    pub openrouter_provider_order: Vec<String>,
607    /// Serving-quality / precision trust verdict for this route. See
608    /// [`ProviderRule::serving_precision`]. `"unverified"` when unset.
609    pub serving_precision: String,
610}
611
612impl Default for Capabilities {
613    fn default() -> Self {
614        Self {
615            native_tools: false,
616            message_wire_format: "openai".to_string(),
617            native_tool_wire_format: "openai".to_string(),
618            defer_loading: false,
619            tool_search: Vec::new(),
620            responses_api: false,
621            hosted_tools: Vec::new(),
622            remote_mcp: false,
623            conversation_state: false,
624            compaction: false,
625            background_mode: false,
626            tool_approval_policy: None,
627            max_tools: None,
628            prompt_caching: false,
629            cache_breakpoint_style: "none".to_string(),
630            vision: false,
631            audio: false,
632            pdf: false,
633            video: false,
634            files_api_supported: false,
635            file_upload_wire_format: None,
636            structured_output: None,
637            json_schema: None,
638            prefers_xml_scaffolding: false,
639            reserved_tool_call_token: false,
640            prefers_markdown_scaffolding: false,
641            structured_output_mode: "none".to_string(),
642            supports_assistant_prefill: false,
643            prefers_role_developer: false,
644            prefers_xml_tools: false,
645            thinking_block_style: "none".to_string(),
646            thinking_modes: Vec::new(),
647            interleaved_thinking_supported: false,
648            anthropic_beta_features: Vec::new(),
649            vision_supported: false,
650            image_url_input_supported: true,
651            preserve_thinking: false,
652            server_parser: "none".to_string(),
653            honors_chat_template_kwargs: false,
654            chat_template_options_field: None,
655            requires_completion_tokens: false,
656            requires_streaming: false,
657            reasoning_effort_supported: false,
658            reasoning_effort_levels: Vec::new(),
659            reasoning_none_supported: false,
660            max_thinking_budget: None,
661            reasoning_disable_supported: true,
662            reasoning_required_for_tools: false,
663            reasoning_text_promotable: true,
664            reasoning_wire_format: None,
665            seed_supported: true,
666            top_k_supported: true,
667            temperature_supported: true,
668            top_p_supported: true,
669            frequency_penalty_supported: true,
670            presence_penalty_supported: true,
671            allowed_tool_choice_modes: Vec::new(),
672            requires_tool_result_adjacency: false,
673            supports_parallel_tool_calls: true,
674            tools_exclude_response_format: false,
675            recommended_endpoint: None,
676            text_tool_wire_format_supported: true,
677            preferred_tool_format: None,
678            tool_mode_parity: None,
679            tool_mode_parity_notes: None,
680            thinking_disable_directive: None,
681            auto_reasoning_overrides: BTreeMap::new(),
682            provider_route_denylist: Vec::new(),
683            openrouter_provider_order: Vec::new(),
684            serving_precision: "unverified".to_string(),
685        }
686    }
687}
688
689/// Display-oriented row for `harn provider catalog matrix`, the legacy
690/// `harn check --provider-matrix` surface, and the generated docs page. Rows
691/// are intentionally rule-shaped: `model` is the rule's `model_match` pattern,
692/// because the shipped capability source of truth is a first-match rule table
693/// rather than an exhaustive remote model inventory.
694#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
695pub struct ProviderCapabilityMatrixRow {
696    pub provider: String,
697    pub model: String,
698    pub version_min: Option<Vec<u32>>,
699    pub thinking: Vec<String>,
700    pub vision: bool,
701    pub audio: bool,
702    pub pdf: bool,
703    pub video: bool,
704    pub streaming: bool,
705    pub files_api_supported: bool,
706    pub json_schema: Option<String>,
707    pub prefers_xml_scaffolding: bool,
708    pub reserved_tool_call_token: bool,
709    pub prefers_markdown_scaffolding: bool,
710    pub structured_output_mode: String,
711    pub supports_assistant_prefill: bool,
712    pub prefers_role_developer: bool,
713    pub prefers_xml_tools: bool,
714    pub thinking_block_style: String,
715    pub native_tools: bool,
716    pub text_tools: bool,
717    pub preferred_tool_format: String,
718    pub tool_mode_parity: String,
719    pub tools: bool,
720    pub cache: bool,
721    /// Serving-quality / precision trust verdict for this route. See
722    /// [`ProviderRule::serving_precision`]. `"unverified"` when unset.
723    pub serving_precision: String,
724    pub source: String,
725}
726
727#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
728pub struct ToolCapabilityAuditReport {
729    pub audited_models: usize,
730    pub gaps: Vec<ToolCapabilityAuditGap>,
731}
732
733impl ToolCapabilityAuditReport {
734    pub fn ok(&self) -> bool {
735        self.gaps.is_empty()
736    }
737
738    pub fn render_human(&self) -> String {
739        if self.gaps.is_empty() {
740            return format!(
741                "provider capability audit OK: {} priced chat models have explicit native_tools and preferred_tool_format rules",
742                self.audited_models
743            );
744        }
745
746        let mut out = format!(
747            "provider capability audit found {} catalog gaps among {} priced chat models:",
748            self.gaps.len(),
749            self.audited_models
750        );
751        for gap in &self.gaps {
752            let matched = match (&gap.rule_provider, &gap.rule_model_match) {
753                (Some(provider), Some(model_match)) => {
754                    format!("provider.{provider} model_match=\"{model_match}\"")
755                }
756                _ => "no matching rule".to_string(),
757            };
758            out.push_str(&format!(
759                "\n- {}:{} ({matched}) missing {}; suggest native_tools = {}, preferred_tool_format = \"{}\"",
760                gap.provider,
761                gap.model,
762                gap.missing_fields.join(", "),
763                gap.suggested_native_tools,
764                gap.suggested_preferred_tool_format,
765            ));
766        }
767        out
768    }
769}
770
771#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
772pub struct ToolCapabilityAuditGap {
773    pub provider: String,
774    pub model: String,
775    pub rule_provider: Option<String>,
776    pub rule_model_match: Option<String>,
777    pub missing_fields: Vec<String>,
778    pub suggested_native_tools: bool,
779    pub suggested_preferred_tool_format: String,
780}
781
782thread_local! {
783    /// Per-thread user overrides installed by the CLI at startup. Kept
784    /// thread-local (not process-static) to match the rest of the VM
785    /// state model — the VM is !Send and each VM thread owns its own
786    /// configuration.
787    static USER_OVERRIDES: RefCell<Option<CapabilitiesFile>> = const { RefCell::new(None) };
788}
789
790/// Lazily-parsed built-in rules. The `include_str!` content is a static
791/// constant; parsing it once per process is safe and free of ordering
792/// hazards.
793static BUILTIN: OnceLock<CapabilitiesFile> = OnceLock::new();
794
795fn builtin() -> &'static CapabilitiesFile {
796    BUILTIN.get_or_init(|| {
797        toml::from_str::<CapabilitiesFile>(BUILTIN_TOML)
798            .expect("capabilities.toml must parse at build time")
799    })
800}
801
802/// The shipped (built-in) capability matrix. Public so the footgun gate in
803/// [`crate::llm::capability_audit`] can audit exactly what Harn ships.
804pub fn builtin_file() -> &'static CapabilitiesFile {
805    builtin()
806}
807
808/// Install project-level overrides for the current thread. Usually
809/// called once at CLI bootstrap after reading `harn.toml`. Passing
810/// `None` clears any prior override.
811pub fn set_user_overrides(file: Option<CapabilitiesFile>) {
812    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = file);
813}
814
815/// Clear any thread-local user overrides. Used between test runs.
816pub fn clear_user_overrides() {
817    set_user_overrides(None);
818}
819
820/// Parse a TOML string containing the capabilities section's own shape
821/// (i.e. top-level `[[provider.X]]` + optional `[provider_family]`, the
822/// same layout used by the built-in `capabilities.toml`) and install as
823/// the current thread's override.
824pub fn set_user_overrides_toml(src: &str) -> Result<(), String> {
825    set_user_overrides(Some(parse_capabilities_toml(src)?));
826    Ok(())
827}
828
829/// Parse a capabilities TOML document (the same layout used by the built-in
830/// `capabilities.toml`) without installing it anywhere, for callers that
831/// thread an explicit capability overlay instead of mutating thread state
832/// (e.g. `harn provider catalog export --capabilities-overlay`).
833pub fn parse_capabilities_toml(src: &str) -> Result<CapabilitiesFile, String> {
834    toml::from_str(src).map_err(|e| e.to_string())
835}
836
837/// Extract the `[capabilities]` section from a full `harn.toml` source
838/// and install it as the current thread's override. The schema inside
839/// that section mirrors `CapabilitiesFile` but with every key prefixed
840/// by `capabilities.`:
841///
842/// ```toml
843/// [[capabilities.provider.my-proxy]]
844/// model_match = "*"
845/// native_tools = true
846/// tool_search = ["hosted"]
847/// ```
848pub fn set_user_overrides_from_manifest_toml(src: &str) -> Result<(), String> {
849    #[derive(Deserialize)]
850    struct Manifest {
851        #[serde(default)]
852        capabilities: Option<CapabilitiesFile>,
853    }
854    let parsed: Manifest = toml::from_str(src).map_err(|e| e.to_string())?;
855    set_user_overrides(parsed.capabilities);
856    Ok(())
857}
858
859/// Look up effective capabilities for a `(provider, model)` pair.
860/// Walks the provider_family chain until it finds a rule list that
861/// matches. Within any one provider's rule list, user overrides are
862/// consulted before the built-in rules. The first matching rule wins —
863/// later rules (and later layers in the family chain) are ignored.
864pub fn lookup(provider: &str, model: &str) -> Capabilities {
865    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
866    lookup_with_user_overrides(provider, model, user.as_ref())
867}
868
869pub fn lookup_with_user_overrides(
870    provider: &str,
871    model: &str,
872    user_overrides: Option<&CapabilitiesFile>,
873) -> Capabilities {
874    let mut caps = lookup_with(provider, model, builtin(), user_overrides);
875    if provider != "openai" && provider != "mock" {
876        caps.responses_api = false;
877        caps.hosted_tools.clear();
878        caps.remote_mcp = false;
879        caps.conversation_state = false;
880        caps.compaction = false;
881        caps.background_mode = false;
882        caps.tool_approval_policy = None;
883    }
884    caps
885}
886
887/// The wire channel a `tool_format` string flows through. `native` is the
888/// provider's structured `tool_calls` JSON channel; `text` and `json` are
889/// text-channel grammars carried in assistant content. Mirrors
890/// `llm_config::ToolFormatChannel`, kept local so the capability registry
891/// (the single source of truth for tool-call dialect validity) has no
892/// dependency on the resolver.
893#[derive(Debug, Clone, Copy, PartialEq, Eq)]
894pub enum ToolFormatWire {
895    /// Provider-native JSON tool calling (`tool_format = "native"`).
896    Native,
897    /// A text-channel grammar (`tool_format = "text"` or `"json"`).
898    Text,
899}
900
901impl ToolFormatWire {
902    /// Classify a `tool_format` string. Returns `None` for unknown values so
903    /// callers can reject typos loudly rather than guessing a channel.
904    pub fn classify(tool_format: &str) -> Option<Self> {
905        match tool_format {
906            "native" => Some(Self::Native),
907            "text" | "json" => Some(Self::Text),
908            _ => None,
909        }
910    }
911}
912
913/// Outcome of validating a requested `(provider, model, tool_format)` combo
914/// against the capability registry's tool-call dialect validity model.
915///
916/// This is the FOOTGUN-REMOVAL contract: a harness developer can ask for any
917/// tool_format, and the registry guarantees the resolved format is one that
918/// actually yields parseable tool calls for that route — auto-correcting a
919/// known-broken combo (e.g. a `native` pin on a `native_unreliable` route that
920/// silently drops to unparsed DSML text) and explaining why.
921#[derive(Debug, Clone, PartialEq, Eq)]
922pub struct ToolFormatDecision {
923    /// The tool_format that should actually be used on the wire. Equal to the
924    /// requested format when the combo was already valid; otherwise the
925    /// registry's `preferred_tool_format` for the route.
926    pub effective: String,
927    /// Set when the requested format was overridden. Human-readable, names the
928    /// bad combo and the working alternative — surface this to the harness
929    /// developer so vanishing tool calls are never silent.
930    pub correction: Option<String>,
931}
932
933impl ToolFormatDecision {
934    fn accepted(format: String) -> Self {
935        Self {
936            effective: format,
937            correction: None,
938        }
939    }
940}
941
942/// True when a route's `tool_mode_parity` says the native (provider JSON)
943/// channel cannot be trusted to yield parseable tool calls. `unsupported`
944/// (no working channel) is intentionally excluded: there is no better format
945/// to steer to, so the gate leaves such a route alone rather than rewriting to
946/// another broken channel under a misleading "Using X instead" message.
947fn parity_forbids_native(parity: &str) -> bool {
948    matches!(parity, "native_unreliable" | "text_only")
949}
950
951/// True when a route's `tool_mode_parity` says a text-channel grammar cannot be
952/// trusted to yield parseable tool calls. See [`parity_forbids_native`] for why
953/// `unsupported` is excluded.
954fn parity_forbids_text(parity: &str) -> bool {
955    matches!(parity, "text_unreliable" | "native_only")
956}
957
958/// True when the requested wire channel is known not to return parseable tool
959/// calls for a route. The gate auto-corrects only on *positive* evidence of
960/// breakage, never on a "we don't know" default:
961///
962/// - `tool_mode_parity` is an explicit verdict (`parity_forbids_*`).
963/// - `text_tool_wire_format_supported = false` is an explicit declaration that
964///   the text channel does not survive this route (e.g. native-only local
965///   Ollama Qwen3 rows that omit a parity string). It defaults to `true`, so an
966///   unknown route is never wrongly judged text-broken.
967///
968/// `native_tools` is deliberately NOT consulted here: it defaults to `false`
969/// for unknown providers, so treating `!native_tools` as "native is broken"
970/// would wrongly rewrite a custom proxy that does support native tools. The
971/// hard `native` + `!native_tools` capability gate in `extract_llm_options`
972/// already rejects a genuine native-on-non-native mismatch loudly.
973fn channel_forbidden(wire: ToolFormatWire, caps: &Capabilities) -> bool {
974    let parity = caps.tool_mode_parity.as_deref().unwrap_or("unknown");
975    match wire {
976        ToolFormatWire::Native => parity_forbids_native(parity),
977        ToolFormatWire::Text => {
978            parity_forbids_text(parity) || !caps.text_tool_wire_format_supported
979        }
980    }
981}
982
983/// Validate (and, where the registry knows better, auto-correct) a requested
984/// `tool_format` for a `(provider, model)` route.
985///
986/// This is the single enforcement seam for tool-call dialect validity. The
987/// capability registry already declares, per route, which channel actually
988/// returns parseable tool calls (`tool_mode_parity`) and which format to use
989/// (`preferred_tool_format`). Before this function those fields were advisory
990/// metadata that any alias pin or explicit `--tool-format` flag could silently
991/// override — the footgun behind the DeepSeek V3.2 DSML "vanishing tool calls"
992/// dead-abstain. Now any combo whose requested channel is forbidden — by the
993/// route's `tool_mode_parity` verdict OR by an explicit
994/// `text_tool_wire_format_supported = false` declaration — is rewritten to a
995/// working channel (preferring the route's `preferred_tool_format`), with a
996/// `correction` message naming both. Unknown formats, routes with no adverse
997/// signal (`unknown`/`interchangeable`), and routes with no working channel at
998/// all pass through unchanged.
999pub fn validate_tool_format(provider: &str, model: &str, requested: &str) -> ToolFormatDecision {
1000    let caps = lookup(provider, model);
1001    validate_tool_format_with_caps(provider, model, requested, &caps)
1002}
1003
1004/// `validate_tool_format` against an already-resolved [`Capabilities`], so hot
1005/// callers that already hold one avoid a second matrix lookup.
1006pub fn validate_tool_format_with_caps(
1007    provider: &str,
1008    model: &str,
1009    requested: &str,
1010    caps: &Capabilities,
1011) -> ToolFormatDecision {
1012    // Unknown / unclassifiable formats are not ours to second-guess — the
1013    // exhaustive-match guard elsewhere already rejects typos loudly.
1014    let Some(wire) = ToolFormatWire::classify(requested) else {
1015        return ToolFormatDecision::accepted(requested.to_string());
1016    };
1017
1018    if !channel_forbidden(wire, caps) {
1019        return ToolFormatDecision::accepted(requested.to_string());
1020    }
1021
1022    // The requested channel is known-broken for this route. Pick the opposite
1023    // channel as the steer target, preferring the route's declared
1024    // `preferred_tool_format` when it lands on a channel that is itself not
1025    // forbidden. If BOTH channels are forbidden (a route with no working tool
1026    // surface), there is nothing better to offer — pass the request through
1027    // unchanged rather than rewrite to an equally-broken format under a
1028    // misleading correction message.
1029    let opposite = match wire {
1030        ToolFormatWire::Native => ToolFormatWire::Text,
1031        ToolFormatWire::Text => ToolFormatWire::Native,
1032    };
1033    if channel_forbidden(opposite, caps) {
1034        return ToolFormatDecision::accepted(requested.to_string());
1035    }
1036    let preferred = caps
1037        .preferred_tool_format
1038        .clone()
1039        .filter(|fmt| ToolFormatWire::classify(fmt) == Some(opposite))
1040        .unwrap_or_else(|| match opposite {
1041            ToolFormatWire::Native => "native".to_string(),
1042            ToolFormatWire::Text => "json".to_string(),
1043        });
1044
1045    let parity = caps.tool_mode_parity.as_deref().unwrap_or("unknown");
1046    let mut correction = format!(
1047        "tool_format `{requested}` is not safe for {provider}/{model} \
1048         (tool_mode_parity = `{parity}`): this route does not return parseable \
1049         tool calls on the {} channel, so calls would silently vanish. \
1050         Using `{preferred}` instead.",
1051        match wire {
1052            ToolFormatWire::Native => "provider-native",
1053            ToolFormatWire::Text => "text",
1054        }
1055    );
1056    if let Some(note) = caps.tool_mode_parity_notes.as_deref() {
1057        if !note.is_empty() {
1058            correction.push_str(" (");
1059            correction.push_str(note);
1060            correction.push(')');
1061        }
1062    }
1063
1064    ToolFormatDecision {
1065        effective: preferred,
1066        correction: Some(correction),
1067    }
1068}
1069
1070/// FOOTGUN-REMOVAL — fail fast when a `(provider, model)` route has NO viable
1071/// tool channel at all: the registry forbids both the provider-native channel
1072/// AND every text-channel grammar. `validate_tool_format` deliberately passes
1073/// such a route through unchanged (it has no *better* format to steer to and
1074/// must not rewrite to an equally-broken one under a misleading "Using X
1075/// instead" message); but a tool-bearing call dispatched on a route with no
1076/// working channel can only produce a silent empty tool stream. This guard lets
1077/// the call seam reject that combo BEFORE dispatch with an actionable message —
1078/// naming the bad `(provider, model)` and a suggested alternative provider for
1079/// the same model family — instead of billing a noncommittal completion.
1080///
1081/// Returns `Some(message)` only when both channels are forbidden (e.g. a route
1082/// flagged `native_unreliable` whose text channel is also declared unsupported,
1083/// or one explicitly pinned `tool_mode_parity = "unsupported"`). Returns `None`
1084/// for every route that still has at least one working channel, so it never
1085/// fires on the auto-correctable DeepInfra/SambaNova gpt-oss rows (those keep a
1086/// working text channel) or on any healthy route. Modeled on the same
1087/// `channel_forbidden` machinery `validate_tool_format` uses, so the two stay in
1088/// lock-step: the gate auto-corrects when one channel works and fails fast when
1089/// neither does.
1090pub fn no_viable_tool_channel(provider: &str, model: &str) -> Option<String> {
1091    let caps = lookup(provider, model);
1092    no_viable_tool_channel_with_caps(provider, model, &caps)
1093}
1094
1095/// `no_viable_tool_channel` against an already-resolved [`Capabilities`], so hot
1096/// callers that already hold one avoid a second matrix lookup.
1097pub fn no_viable_tool_channel_with_caps(
1098    provider: &str,
1099    model: &str,
1100    caps: &Capabilities,
1101) -> Option<String> {
1102    let native_forbidden = channel_forbidden(ToolFormatWire::Native, caps);
1103    let text_forbidden = channel_forbidden(ToolFormatWire::Text, caps);
1104    if !(native_forbidden && text_forbidden) {
1105        return None;
1106    }
1107    let parity = caps.tool_mode_parity.as_deref().unwrap_or("unknown");
1108    let mut message = format!(
1109        "no viable tool-calling channel for {provider}/{model} \
1110         (tool_mode_parity = `{parity}`): the registry trusts neither the \
1111         provider-native `tool_calls` channel nor a text-channel grammar to \
1112         return parseable tool calls on this route, so a tool-bearing call here \
1113         can only emit a silent empty tool stream. {}",
1114        suggested_alternative_provider_hint(model)
1115    );
1116    if let Some(note) = caps.tool_mode_parity_notes.as_deref() {
1117        if !note.is_empty() {
1118            message.push_str(" (");
1119            message.push_str(note);
1120            message.push(')');
1121        }
1122    }
1123    Some(message)
1124}
1125
1126/// A short, actionable "try this provider instead" hint for a model whose
1127/// current route has no viable tool channel. gpt-oss (Harmony) is the canonical
1128/// case: its native channel is a footgun on several pay-per-token routes, so
1129/// steer callers to the channels Harn has proven clean (Fireworks/DeepInfra/
1130/// SambaNova on TEXT, or a native-clean route). Generic for everything else.
1131fn suggested_alternative_provider_hint(model: &str) -> String {
1132    if model.to_ascii_lowercase().contains("gpt-oss") {
1133        "For gpt-oss (Harmony), use a TEXT-channel route (e.g. \
1134         `fireworks`/`deepinfra`/`sambanova` gpt-oss, which Harn pins to \
1135         `tool_format = \"text\"`) or a native-clean route; the provider-native \
1136         Harmony channel drops tool calls into the reasoning channel."
1137            .to_string()
1138    } else {
1139        "Pick a provider whose route for this model has a working native or \
1140         text tool channel (see `harn provider catalog matrix`)."
1141            .to_string()
1142    }
1143}
1144
1145/// Return the currently-effective provider capability rule matrix. User
1146/// override rows, when installed for the current thread, are emitted before
1147/// built-in rows so the display mirrors lookup precedence.
1148pub fn matrix_rows() -> Vec<ProviderCapabilityMatrixRow> {
1149    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1150    let mut rows = Vec::new();
1151    if let Some(user) = user.as_ref() {
1152        push_matrix_rows(&mut rows, user, "project");
1153    }
1154    push_matrix_rows(&mut rows, builtin(), "builtin");
1155    rows
1156}
1157
1158/// Audit the currently effective provider/model catalog against the currently
1159/// effective capability rules. This is the user-facing path used by the CLI
1160/// when authors are adding provider catalog or capability override rows.
1161pub fn audit_catalogued_chat_model_tool_capabilities() -> ToolCapabilityAuditReport {
1162    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1163    audit_tool_capability_coverage(
1164        crate::llm_config::model_catalog_entries(),
1165        builtin(),
1166        user.as_ref(),
1167    )
1168}
1169
1170/// Audit the built-in catalog only. The CI test uses this path so external
1171/// provider config cannot hide a gap in the shipped TOML assets.
1172pub fn audit_builtin_catalogued_chat_model_tool_capabilities() -> ToolCapabilityAuditReport {
1173    let catalog = crate::llm_config::parse_config_toml(BUILTIN_PROVIDERS_TOML)
1174        .expect("providers.toml must parse at build time");
1175    audit_tool_capability_coverage(catalog.models, builtin(), None)
1176}
1177
1178fn audit_tool_capability_coverage<I>(
1179    models: I,
1180    builtin: &CapabilitiesFile,
1181    user: Option<&CapabilitiesFile>,
1182) -> ToolCapabilityAuditReport
1183where
1184    I: IntoIterator<Item = (String, crate::llm_config::ModelDef)>,
1185{
1186    let mut gaps = Vec::new();
1187    let mut audited_models = 0;
1188
1189    for (model_id, model) in models {
1190        if model.pricing.is_none() {
1191            continue;
1192        }
1193        audited_models += 1;
1194        let matched = first_matching_rule(user, builtin, &model.provider, &model_id);
1195        let mut missing_fields = Vec::new();
1196        match matched.as_ref().map(|matched| matched.rule) {
1197            Some(rule) => {
1198                if rule.native_tools.is_none() {
1199                    missing_fields.push("native_tools".to_string());
1200                }
1201                if rule.preferred_tool_format.is_none() {
1202                    missing_fields.push("preferred_tool_format".to_string());
1203                }
1204            }
1205            None => {
1206                missing_fields.push("native_tools".to_string());
1207                missing_fields.push("preferred_tool_format".to_string());
1208            }
1209        }
1210        if missing_fields.is_empty() {
1211            continue;
1212        }
1213
1214        let (suggested_native_tools, suggested_preferred_tool_format) =
1215            suggested_tool_capability_defaults(
1216                &model.provider,
1217                &model_id,
1218                &model,
1219                matched.as_ref(),
1220            );
1221        gaps.push(ToolCapabilityAuditGap {
1222            provider: model.provider,
1223            model: model_id,
1224            rule_provider: matched.as_ref().map(|matched| matched.provider.clone()),
1225            rule_model_match: matched.map(|matched| matched.rule.model_match.clone()),
1226            missing_fields,
1227            suggested_native_tools,
1228            suggested_preferred_tool_format,
1229        });
1230    }
1231
1232    gaps.sort_by(|left, right| {
1233        left.provider
1234            .cmp(&right.provider)
1235            .then_with(|| left.model.cmp(&right.model))
1236    });
1237    ToolCapabilityAuditReport {
1238        audited_models,
1239        gaps,
1240    }
1241}
1242
1243struct MatchedCapabilityRule<'a> {
1244    provider: String,
1245    rule: &'a ProviderRule,
1246}
1247
1248fn first_matching_rule<'a>(
1249    user: Option<&'a CapabilitiesFile>,
1250    builtin: &'a CapabilitiesFile,
1251    provider: &str,
1252    model: &str,
1253) -> Option<MatchedCapabilityRule<'a>> {
1254    let mut current = provider.to_string();
1255    let mut visited = HashSet::new();
1256    while visited.insert(current.clone()) {
1257        if let Some(rule) = user
1258            .and_then(|file| first_matching_rule_in_file(file, &current, model))
1259            .or_else(|| first_matching_rule_in_file(builtin, &current, model))
1260        {
1261            return Some(MatchedCapabilityRule {
1262                provider: current,
1263                rule,
1264            });
1265        }
1266        let next = user
1267            .and_then(|file| file.provider_family.get(&current))
1268            .or_else(|| builtin.provider_family.get(&current))
1269            .cloned();
1270        current = next?;
1271    }
1272    None
1273}
1274
1275fn first_matching_rule_in_file<'a>(
1276    file: &'a CapabilitiesFile,
1277    provider: &str,
1278    model: &str,
1279) -> Option<&'a ProviderRule> {
1280    file.provider
1281        .get(provider)?
1282        .iter()
1283        .find(|rule| rule_matches(rule, model))
1284}
1285
1286fn suggested_tool_capability_defaults(
1287    provider: &str,
1288    model_id: &str,
1289    model: &crate::llm_config::ModelDef,
1290    matched: Option<&MatchedCapabilityRule<'_>>,
1291) -> (bool, String) {
1292    if let Some(rule) = matched.map(|matched| matched.rule) {
1293        let native_tools = rule.native_tools.unwrap_or_else(|| {
1294            // Resolve native_tools from the pinned tool_format via its channel
1295            // so `json` (a TEXT-channel format) correctly implies
1296            // native_tools = false, identically to `text`. Falling through to
1297            // the provider heuristic for `json` would wrongly mark a gemini /
1298            // cerebras row native. Unknown formats keep the heuristic.
1299            match rule
1300                .preferred_tool_format
1301                .as_deref()
1302                .and_then(crate::llm_config::tool_format_channel)
1303            {
1304                Some(crate::llm_config::ToolFormatChannel::Native) => true,
1305                Some(crate::llm_config::ToolFormatChannel::Text) => false,
1306                None => suggested_native_tools(provider, model_id, model),
1307            }
1308        });
1309        let preferred_tool_format = rule
1310            .preferred_tool_format
1311            .clone()
1312            .unwrap_or_else(|| tool_format_for_native(native_tools));
1313        return (native_tools, preferred_tool_format);
1314    }
1315
1316    let native_tools = suggested_native_tools(provider, model_id, model);
1317    (native_tools, tool_format_for_native(native_tools))
1318}
1319
1320fn suggested_native_tools(
1321    provider: &str,
1322    model_id: &str,
1323    model: &crate::llm_config::ModelDef,
1324) -> bool {
1325    if provider == "anthropic" || model_id.contains("claude") {
1326        return true;
1327    }
1328    if matches!(
1329        provider,
1330        "openai" | "gemini" | "cerebras" | "bedrock" | "azure_openai" | "vertex"
1331    ) {
1332        return true;
1333    }
1334    model
1335        .capabilities
1336        .iter()
1337        .any(|capability| capability == "tools")
1338}
1339
1340/// The derived `preferred_tool_format` for a capability row (or unmatched
1341/// model) that does not pin one. Native-capable models derive `native`;
1342/// text-channel models derive `json` (fenced-JSON), the GLOBAL text-channel
1343/// default. Heredoc (`text`) is never auto-derived — it is reachable only via
1344/// an explicit `preferred_tool_format = "text"` pin or an explicit request (the
1345/// reverse safety valve). This is the primary default site: it fires for every
1346/// model that matches a capability row without an explicit format pin.
1347fn tool_format_for_native(native_tools: bool) -> String {
1348    if native_tools {
1349        "native".to_string()
1350    } else {
1351        "json".to_string()
1352    }
1353}
1354
1355fn push_matrix_rows(
1356    rows: &mut Vec<ProviderCapabilityMatrixRow>,
1357    file: &CapabilitiesFile,
1358    source: &str,
1359) {
1360    for (provider, rules) in &file.provider {
1361        for rule in rules {
1362            rows.push(rule_to_matrix_row(provider, rule, source));
1363        }
1364    }
1365}
1366
1367fn rule_to_matrix_row(
1368    provider: &str,
1369    rule: &ProviderRule,
1370    source: &str,
1371) -> ProviderCapabilityMatrixRow {
1372    ProviderCapabilityMatrixRow {
1373        provider: provider.to_string(),
1374        model: rule.model_match.clone(),
1375        version_min: rule.version_min.clone(),
1376        thinking: rule_thinking_modes(rule),
1377        vision: rule_vision(rule),
1378        audio: rule.audio.unwrap_or(false),
1379        pdf: rule.pdf.unwrap_or(false),
1380        video: rule.video.unwrap_or(false),
1381        streaming: true,
1382        files_api_supported: rule.files_api_supported.unwrap_or(false),
1383        json_schema: rule_structured_output(rule),
1384        prefers_xml_scaffolding: rule.prefers_xml_scaffolding.unwrap_or(false),
1385        reserved_tool_call_token: rule.reserved_tool_call_token.unwrap_or(false),
1386        prefers_markdown_scaffolding: rule.prefers_markdown_scaffolding.unwrap_or(false),
1387        structured_output_mode: rule_structured_output_mode(rule),
1388        supports_assistant_prefill: rule.supports_assistant_prefill.unwrap_or(false),
1389        prefers_role_developer: rule
1390            .prefers_role_developer
1391            .unwrap_or_else(|| rule.requires_completion_tokens.unwrap_or(false)),
1392        prefers_xml_tools: rule.prefers_xml_tools.unwrap_or(false),
1393        thinking_block_style: rule_thinking_block_style(rule),
1394        native_tools: rule.native_tools.unwrap_or(false),
1395        text_tools: rule.text_tool_wire_format_supported.unwrap_or(true),
1396        preferred_tool_format: rule_preferred_tool_format(rule),
1397        tool_mode_parity: rule_tool_mode_parity(rule),
1398        tools: rule.native_tools.unwrap_or(false)
1399            || rule.text_tool_wire_format_supported.unwrap_or(true),
1400        cache: rule.prompt_caching.unwrap_or(false),
1401        serving_precision: rule
1402            .serving_precision
1403            .clone()
1404            .unwrap_or_else(|| "unverified".to_string()),
1405        source: source.to_string(),
1406    }
1407}
1408
1409fn rule_thinking_modes(rule: &ProviderRule) -> Vec<String> {
1410    rule.thinking_modes.clone().unwrap_or_else(|| {
1411        if rule.thinking.unwrap_or(false) {
1412            vec!["enabled".to_string()]
1413        } else {
1414            Vec::new()
1415        }
1416    })
1417}
1418
1419fn rule_vision(rule: &ProviderRule) -> bool {
1420    rule.vision.or(rule.vision_supported).unwrap_or(false)
1421}
1422
1423fn lookup_with(
1424    provider: &str,
1425    model: &str,
1426    builtin: &CapabilitiesFile,
1427    user: Option<&CapabilitiesFile>,
1428) -> Capabilities {
1429    // Special case: mock spoofs either shape. Try anthropic first
1430    // (Claude-shape model strings) so `mock` + `claude-opus-4-7`
1431    // resolves to the Anthropic capability row — the same behaviour
1432    // the hardcoded dispatch gave before this refactor. The native
1433    // tool-definition wire shape is pinned to OpenAI so existing
1434    // mock-based tests keep observing `t.function.name` regardless of
1435    // which family's capability row matched; per-message wire format
1436    // still tracks the matched family so Anthropic-specific request
1437    // plumbing (beta headers, file-id passthrough) is exercised when
1438    // a Claude model is mocked.
1439    if provider == "mock" {
1440        let anthropic_defaults = merged_provider_defaults(user, builtin, "anthropic");
1441        if let Some(mut caps) =
1442            try_match_layer(user, builtin, "anthropic", model, &anthropic_defaults)
1443        {
1444            caps.native_tool_wire_format = "openai".to_string();
1445            return caps;
1446        }
1447        let openai_defaults = merged_provider_defaults(user, builtin, "openai");
1448        if let Some(caps) = try_match_layer(user, builtin, "openai", model, &openai_defaults) {
1449            return caps;
1450        }
1451        let gemini_defaults = merged_provider_defaults(user, builtin, "gemini");
1452        if let Some(caps) = try_match_layer(user, builtin, "gemini", model, &gemini_defaults) {
1453            return caps;
1454        }
1455        return Capabilities::default();
1456    }
1457
1458    // Normal chain: walk provider → family(provider) → ... with a
1459    // visited-guard to avoid cycles in malformed user overrides.
1460    let mut current = provider.to_string();
1461    let mut effective_defaults = ProviderDefaults::default();
1462    let mut visited: std::collections::HashSet<String> = std::collections::HashSet::new();
1463    while visited.insert(current.clone()) {
1464        let layer_defaults = merged_provider_defaults(user, builtin, &current);
1465        if effective_defaults.has_any_field() {
1466            effective_defaults.fill_missing_from(&layer_defaults);
1467        } else {
1468            effective_defaults.overlay(&layer_defaults);
1469        }
1470        if let Some(caps) = try_match_layer(user, builtin, &current, model, &effective_defaults) {
1471            return caps;
1472        }
1473        let next = user
1474            .and_then(|f| f.provider_family.get(&current))
1475            .or_else(|| builtin.provider_family.get(&current))
1476            .cloned();
1477        match next {
1478            Some(parent) => current = parent,
1479            None => break,
1480        }
1481    }
1482    if effective_defaults.has_any_field() {
1483        return defaults_to_caps(&effective_defaults);
1484    }
1485    Capabilities::default()
1486}
1487
1488/// Try the ordered rule list for `layer_provider` (user rules first,
1489/// then built-in rules). Returns `Some(caps)` on the first match, else
1490/// `None`. `original_provider` is threaded through only for diagnostics.
1491fn try_match_layer(
1492    user: Option<&CapabilitiesFile>,
1493    builtin: &CapabilitiesFile,
1494    layer_provider: &str,
1495    model: &str,
1496    defaults: &ProviderDefaults,
1497) -> Option<Capabilities> {
1498    if let Some(user) = user {
1499        if let Some(rules) = user.provider.get(layer_provider) {
1500            for rule in rules {
1501                if rule_matches(rule, model) {
1502                    return Some(rule_to_caps(rule, defaults));
1503                }
1504            }
1505        }
1506    }
1507    if let Some(rules) = builtin.provider.get(layer_provider) {
1508        for rule in rules {
1509            if rule_matches(rule, model) {
1510                return Some(rule_to_caps(rule, defaults));
1511            }
1512        }
1513    }
1514    None
1515}
1516
1517fn merged_provider_defaults(
1518    user: Option<&CapabilitiesFile>,
1519    builtin: &CapabilitiesFile,
1520    provider: &str,
1521) -> ProviderDefaults {
1522    let mut defaults = builtin
1523        .provider_defaults
1524        .get(provider)
1525        .cloned()
1526        .unwrap_or_default();
1527    if let Some(user_defaults) = user.and_then(|file| file.provider_defaults.get(provider)) {
1528        defaults.overlay(user_defaults);
1529    }
1530    defaults
1531}
1532
1533fn defaults_to_caps(defaults: &ProviderDefaults) -> Capabilities {
1534    let empty = ProviderRule {
1535        model_match: "*".to_string(),
1536        version_min: None,
1537        native_tools: None,
1538        message_wire_format: None,
1539        native_tool_wire_format: None,
1540        defer_loading: None,
1541        tool_search: None,
1542        responses_api: None,
1543        hosted_tools: None,
1544        remote_mcp: None,
1545        conversation_state: None,
1546        compaction: None,
1547        background_mode: None,
1548        tool_approval_policy: None,
1549        max_tools: None,
1550        prompt_caching: None,
1551        cache_breakpoint_style: None,
1552        vision: None,
1553        audio: None,
1554        pdf: None,
1555        video: None,
1556        files_api_supported: None,
1557        file_upload_wire_format: None,
1558        structured_output: None,
1559        prefers_xml_scaffolding: None,
1560        reserved_tool_call_token: None,
1561        prefers_markdown_scaffolding: None,
1562        structured_output_mode: None,
1563        supports_assistant_prefill: None,
1564        prefers_role_developer: None,
1565        prefers_xml_tools: None,
1566        thinking_block_style: None,
1567        json_schema: None,
1568        thinking_modes: None,
1569        interleaved_thinking_supported: None,
1570        anthropic_beta_features: None,
1571        thinking: None,
1572        vision_supported: None,
1573        image_url_input_supported: None,
1574        preserve_thinking: None,
1575        server_parser: None,
1576        honors_chat_template_kwargs: None,
1577        chat_template_options_field: None,
1578        requires_completion_tokens: None,
1579        requires_streaming: None,
1580        reasoning_effort_supported: None,
1581        reasoning_effort_levels: None,
1582        reasoning_none_supported: None,
1583        max_thinking_budget: None,
1584        reasoning_disable_supported: None,
1585        reasoning_required_for_tools: None,
1586        reasoning_text_promotable: None,
1587        reasoning_wire_format: None,
1588        seed_supported: None,
1589        top_k_supported: None,
1590        temperature_supported: None,
1591        top_p_supported: None,
1592        frequency_penalty_supported: None,
1593        presence_penalty_supported: None,
1594        allowed_tool_choice_modes: None,
1595        requires_tool_result_adjacency: None,
1596        supports_parallel_tool_calls: None,
1597        tools_exclude_response_format: None,
1598        recommended_endpoint: None,
1599        text_tool_wire_format_supported: None,
1600        preferred_tool_format: None,
1601        tool_mode_parity: None,
1602        tool_mode_parity_notes: None,
1603        thinking_disable_directive: None,
1604        auto_reasoning_overrides: None,
1605        provider_route_denylist: None,
1606        openrouter_provider_order: None,
1607        serving_precision: None,
1608    };
1609    let mut caps = rule_to_caps(&empty, defaults);
1610    caps.preferred_tool_format = None;
1611    caps.tool_mode_parity = None;
1612    caps
1613}
1614
1615fn rule_to_caps(rule: &ProviderRule, defaults: &ProviderDefaults) -> Capabilities {
1616    let thinking_modes = rule_thinking_modes(rule);
1617    Capabilities {
1618        native_tools: rule.native_tools.unwrap_or(false),
1619        message_wire_format: rule
1620            .message_wire_format
1621            .clone()
1622            .or_else(|| defaults.message_wire_format.clone())
1623            .unwrap_or_else(|| "openai".to_string()),
1624        native_tool_wire_format: rule
1625            .native_tool_wire_format
1626            .clone()
1627            .or_else(|| defaults.native_tool_wire_format.clone())
1628            .unwrap_or_else(|| "openai".to_string()),
1629        defer_loading: rule.defer_loading.unwrap_or(false),
1630        tool_search: rule.tool_search.clone().unwrap_or_default(),
1631        responses_api: rule.responses_api.unwrap_or(false),
1632        hosted_tools: rule.hosted_tools.clone().unwrap_or_default(),
1633        remote_mcp: rule.remote_mcp.unwrap_or(false),
1634        conversation_state: rule.conversation_state.unwrap_or(false),
1635        compaction: rule.compaction.unwrap_or(false),
1636        background_mode: rule.background_mode.unwrap_or(false),
1637        tool_approval_policy: rule.tool_approval_policy.clone(),
1638        max_tools: rule.max_tools,
1639        prompt_caching: rule.prompt_caching.unwrap_or(false),
1640        cache_breakpoint_style: rule
1641            .cache_breakpoint_style
1642            .clone()
1643            .unwrap_or_else(|| "none".to_string()),
1644        vision: rule_vision(rule),
1645        audio: rule.audio.unwrap_or(false),
1646        pdf: rule.pdf.unwrap_or(false),
1647        video: rule.video.unwrap_or(false),
1648        files_api_supported: rule
1649            .files_api_supported
1650            .or(defaults.files_api_supported)
1651            .unwrap_or(false),
1652        file_upload_wire_format: rule
1653            .file_upload_wire_format
1654            .clone()
1655            .or_else(|| defaults.file_upload_wire_format.clone()),
1656        structured_output: rule_structured_output(rule),
1657        json_schema: rule_structured_output(rule),
1658        prefers_xml_scaffolding: rule.prefers_xml_scaffolding.unwrap_or(false),
1659        reserved_tool_call_token: rule.reserved_tool_call_token.unwrap_or(false),
1660        prefers_markdown_scaffolding: rule.prefers_markdown_scaffolding.unwrap_or(false),
1661        structured_output_mode: rule_structured_output_mode(rule),
1662        supports_assistant_prefill: rule.supports_assistant_prefill.unwrap_or(false),
1663        prefers_role_developer: rule.prefers_role_developer.unwrap_or(false),
1664        prefers_xml_tools: rule.prefers_xml_tools.unwrap_or(false),
1665        thinking_block_style: rule_thinking_block_style(rule),
1666        thinking_modes,
1667        interleaved_thinking_supported: rule.interleaved_thinking_supported.unwrap_or(false),
1668        anthropic_beta_features: rule.anthropic_beta_features.clone().unwrap_or_default(),
1669        vision_supported: rule.vision_supported.unwrap_or(false),
1670        image_url_input_supported: rule
1671            .image_url_input_supported
1672            .or(defaults.image_url_input_supported)
1673            .unwrap_or(true),
1674        preserve_thinking: rule.preserve_thinking.unwrap_or(false),
1675        server_parser: rule
1676            .server_parser
1677            .clone()
1678            .unwrap_or_else(|| "none".to_string()),
1679        honors_chat_template_kwargs: rule.honors_chat_template_kwargs.unwrap_or(false),
1680        chat_template_options_field: rule.chat_template_options_field.clone(),
1681        requires_completion_tokens: rule.requires_completion_tokens.unwrap_or(false),
1682        requires_streaming: rule.requires_streaming.unwrap_or(false),
1683        reasoning_effort_supported: rule.reasoning_effort_supported.unwrap_or(false),
1684        reasoning_effort_levels: rule.reasoning_effort_levels.clone().unwrap_or_default(),
1685        reasoning_none_supported: rule.reasoning_none_supported.unwrap_or(false),
1686        max_thinking_budget: rule.max_thinking_budget,
1687        reasoning_disable_supported: rule.reasoning_disable_supported.unwrap_or(true),
1688        reasoning_required_for_tools: rule.reasoning_required_for_tools.unwrap_or(false),
1689        reasoning_text_promotable: rule.reasoning_text_promotable.unwrap_or(true),
1690        reasoning_wire_format: rule
1691            .reasoning_wire_format
1692            .clone()
1693            .or_else(|| defaults.reasoning_wire_format.clone()),
1694        seed_supported: rule
1695            .seed_supported
1696            .or(defaults.seed_supported)
1697            .unwrap_or(true),
1698        top_k_supported: rule
1699            .top_k_supported
1700            .or(defaults.top_k_supported)
1701            .unwrap_or(true),
1702        temperature_supported: rule
1703            .temperature_supported
1704            .or(defaults.temperature_supported)
1705            .unwrap_or(true),
1706        top_p_supported: rule
1707            .top_p_supported
1708            .or(defaults.top_p_supported)
1709            .unwrap_or(true),
1710        frequency_penalty_supported: rule
1711            .frequency_penalty_supported
1712            .or(defaults.frequency_penalty_supported)
1713            .unwrap_or(true),
1714        presence_penalty_supported: rule
1715            .presence_penalty_supported
1716            .or(defaults.presence_penalty_supported)
1717            .unwrap_or(true),
1718        allowed_tool_choice_modes: rule.allowed_tool_choice_modes.clone().unwrap_or_default(),
1719        requires_tool_result_adjacency: rule.requires_tool_result_adjacency.unwrap_or(false),
1720        supports_parallel_tool_calls: rule.supports_parallel_tool_calls.unwrap_or(true),
1721        tools_exclude_response_format: rule.tools_exclude_response_format.unwrap_or(false),
1722        recommended_endpoint: rule.recommended_endpoint.clone(),
1723        text_tool_wire_format_supported: rule.text_tool_wire_format_supported.unwrap_or(true),
1724        preferred_tool_format: Some(rule_preferred_tool_format(rule)),
1725        tool_mode_parity: Some(rule_tool_mode_parity(rule)),
1726        tool_mode_parity_notes: rule.tool_mode_parity_notes.clone(),
1727        thinking_disable_directive: rule.thinking_disable_directive.clone(),
1728        auto_reasoning_overrides: rule.auto_reasoning_overrides.clone().unwrap_or_default(),
1729        provider_route_denylist: rule.provider_route_denylist.clone().unwrap_or_default(),
1730        openrouter_provider_order: rule.openrouter_provider_order.clone().unwrap_or_default(),
1731        serving_precision: rule
1732            .serving_precision
1733            .clone()
1734            .unwrap_or_else(|| "unverified".to_string()),
1735    }
1736}
1737
1738fn rule_preferred_tool_format(rule: &ProviderRule) -> String {
1739    // This is the `caps.preferred_tool_format` the runtime `lookup` returns for
1740    // a matched capability row. When the row pins a format, honor it (including
1741    // an explicit `text` — the reverse safety valve). Otherwise derive: native
1742    // models get `native`, text-channel models get `json` (fenced-JSON), the
1743    // GLOBAL text-channel default. Heredoc `text` is never auto-derived.
1744    rule.preferred_tool_format.clone().unwrap_or_else(|| {
1745        if rule.native_tools.unwrap_or(false) {
1746            "native".to_string()
1747        } else {
1748            "json".to_string()
1749        }
1750    })
1751}
1752
1753fn rule_tool_mode_parity(rule: &ProviderRule) -> String {
1754    rule.tool_mode_parity.clone().unwrap_or_else(|| {
1755        match (
1756            rule.native_tools.unwrap_or(false),
1757            rule.text_tool_wire_format_supported.unwrap_or(true),
1758        ) {
1759            (true, true) => "unknown".to_string(),
1760            (true, false) => "native_only".to_string(),
1761            (false, true) => "text_only".to_string(),
1762            (false, false) => "unsupported".to_string(),
1763        }
1764    })
1765}
1766
1767fn rule_structured_output(rule: &ProviderRule) -> Option<String> {
1768    rule.structured_output
1769        .clone()
1770        .or_else(|| rule.json_schema.clone())
1771        .filter(|value| value != "none")
1772}
1773
1774fn rule_structured_output_mode(rule: &ProviderRule) -> String {
1775    if let Some(mode) = &rule.structured_output_mode {
1776        return mode.clone();
1777    }
1778    match rule_structured_output(rule).as_deref() {
1779        Some("native") | Some("format_kw") => "native_json".to_string(),
1780        Some("tool_use") => "xml_tagged".to_string(),
1781        _ => "none".to_string(),
1782    }
1783}
1784
1785fn rule_thinking_block_style(rule: &ProviderRule) -> String {
1786    rule.thinking_block_style.clone().unwrap_or_else(|| {
1787        if rule.reasoning_effort_supported.unwrap_or(false)
1788            || rule.requires_completion_tokens.unwrap_or(false)
1789        {
1790            "reasoning_summary".to_string()
1791        } else {
1792            "none".to_string()
1793        }
1794    })
1795}
1796
1797fn rule_matches(rule: &ProviderRule, model: &str) -> bool {
1798    let lower = model.to_lowercase();
1799    if !glob_match(&rule.model_match.to_lowercase(), &lower) {
1800        return false;
1801    }
1802    if let Some(version_min) = &rule.version_min {
1803        if version_min.len() != 2 {
1804            return false;
1805        }
1806        let want = (version_min[0], version_min[1]);
1807        let have = match extract_version(model) {
1808            Some(v) => v,
1809            // `version_min` was set but the model ID can't be parsed.
1810            // Fail closed: skip this rule so more permissive catch-all
1811            // rules below can still match.
1812            None => return false,
1813        };
1814        if have < want {
1815            return false;
1816        }
1817    }
1818    true
1819}
1820
1821/// Extract `(major, minor)` from a model ID by trying the Anthropic
1822/// parser first (for `claude-*` shapes) then the OpenAI parser (`gpt-*`).
1823/// Both parsers return `None` for shapes they don't recognise so this
1824/// never mis-parses across families.
1825fn extract_version(model: &str) -> Option<(u32, u32)> {
1826    claude_generation(model).or_else(|| gpt_generation(model))
1827}
1828
1829// Model-pattern matching for capability rules. Shared workspace semantics live
1830// in `harn-glob`; keep capability and provider matching on that helper instead
1831// of mirroring glob behavior locally.
1832use harn_glob::match_name as glob_match;
1833
1834#[cfg(test)]
1835mod tests {
1836    use super::*;
1837
1838    fn reset() {
1839        clear_user_overrides();
1840    }
1841
1842    fn assert_cerebras_effort_reasoning(model: &str, thinking_block_style: &str) {
1843        let caps = lookup("cerebras", model);
1844        assert_eq!(caps.thinking_modes, vec!["effort"]);
1845        assert!(caps.reasoning_effort_supported);
1846        // tool_format is NOT asserted here: cerebras gpt-oss and zai-glm have
1847        // different defaults (gpt-oss harmonized to `json`, glm stays
1848        // `native`), and this shared helper is about reasoning-effort
1849        // behavior. Tool-format resolution is asserted in the dedicated
1850        // harmonization tests.
1851        assert_eq!(caps.structured_output.as_deref(), Some("native"));
1852        assert_eq!(caps.structured_output_mode, "native_json");
1853        assert_eq!(caps.thinking_block_style, thinking_block_style);
1854    }
1855
1856    fn assert_openrouter_anthropic_runtime_parity(model: &str) {
1857        let direct = lookup("anthropic", model);
1858        let routed = lookup("openrouter", model);
1859
1860        assert_eq!(
1861            routed.native_tools, direct.native_tools,
1862            "{model}: native tool support should match direct Anthropic"
1863        );
1864        assert_eq!(
1865            routed.preferred_tool_format, direct.preferred_tool_format,
1866            "{model}: preferred tool format should match direct Anthropic"
1867        );
1868        assert_eq!(
1869            routed.structured_output, direct.structured_output,
1870            "{model}: structured output transport should match direct Anthropic"
1871        );
1872        assert_eq!(
1873            routed.structured_output_mode, direct.structured_output_mode,
1874            "{model}: structured output mode should match direct Anthropic"
1875        );
1876        assert_eq!(
1877            routed.thinking_modes,
1878            Vec::<String>::new(),
1879            "{model}: OpenRouter Claude routes must not advertise direct Anthropic thinking controls"
1880        );
1881        assert!(
1882            !routed.reasoning_effort_supported,
1883            "{model}: OpenRouter Claude routes must not advertise direct Anthropic effort controls"
1884        );
1885        assert!(
1886            !routed.interleaved_thinking_supported,
1887            "{model}: OpenRouter Claude routes must not advertise interleaved thinking"
1888        );
1889        assert_eq!(
1890            routed.supports_assistant_prefill, direct.supports_assistant_prefill,
1891            "{model}: assistant prefill support should match direct Anthropic"
1892        );
1893        assert_eq!(
1894            routed.prompt_caching, direct.prompt_caching,
1895            "{model}: prompt cache support should match direct Anthropic"
1896        );
1897        assert_eq!(
1898            routed.prefers_xml_scaffolding, direct.prefers_xml_scaffolding,
1899            "{model}: XML scaffolding preference should match direct Anthropic"
1900        );
1901        assert_eq!(
1902            routed.prefers_markdown_scaffolding, direct.prefers_markdown_scaffolding,
1903            "{model}: Markdown scaffolding preference should match direct Anthropic"
1904        );
1905        assert_eq!(
1906            routed.prefers_role_developer, direct.prefers_role_developer,
1907            "{model}: developer role preference should match direct Anthropic"
1908        );
1909        assert_eq!(
1910            routed.prefers_xml_tools, direct.prefers_xml_tools,
1911            "{model}: XML tool preference should match direct Anthropic"
1912        );
1913        assert_eq!(
1914            routed.thinking_block_style, direct.thinking_block_style,
1915            "{model}: thinking block style should match direct Anthropic"
1916        );
1917        assert_eq!(
1918            routed.text_tool_wire_format_supported, direct.text_tool_wire_format_supported,
1919            "{model}: text-tool fallback support should match direct Anthropic"
1920        );
1921    }
1922
1923    #[test]
1924    fn every_catalogued_chat_model_has_explicit_tool_capabilities() {
1925        reset();
1926        let report = audit_builtin_catalogued_chat_model_tool_capabilities();
1927        assert!(report.ok(), "{}", report.render_human());
1928    }
1929
1930    #[test]
1931    fn every_catalogued_alias_has_explicit_tool_capabilities() {
1932        // The model-level audit only covers priced catalog `models`, so a
1933        // `[[provider.local]]` / Ollama alias (e.g. the local gemma-4 route in
1934        // Fix A) could omit native_tools/preferred_tool_format and silently
1935        // degrade to text tools without tripping a test. Walk every alias's
1936        // (provider, id) through the same matcher and require explicit fields.
1937        reset();
1938        let catalog = crate::llm_config::parse_config_toml(BUILTIN_PROVIDERS_TOML)
1939            .expect("providers.toml must parse at build time");
1940        let builtin = builtin();
1941        let mut gaps = Vec::new();
1942        for (alias, def) in &catalog.aliases {
1943            let matched = first_matching_rule(None, builtin, &def.provider, &def.id);
1944            let explicit = matched
1945                .as_ref()
1946                .map(|matched| {
1947                    matched.rule.native_tools.is_some()
1948                        && matched.rule.preferred_tool_format.is_some()
1949                })
1950                .unwrap_or(false);
1951            if !explicit {
1952                gaps.push(format!(
1953                    "{alias} -> {}:{} (rule={})",
1954                    def.provider,
1955                    def.id,
1956                    matched
1957                        .as_ref()
1958                        .map(|matched| matched.rule.model_match.as_str())
1959                        .unwrap_or("<none>")
1960                ));
1961            }
1962        }
1963        assert!(
1964            gaps.is_empty(),
1965            "aliases missing explicit native_tools/preferred_tool_format:\n- {}",
1966            gaps.join("\n- ")
1967        );
1968    }
1969
1970    #[test]
1971    fn every_catalogued_alias_tool_format_pin_is_safe_for_route() {
1972        // Alias pins are consumed directly by downstream catalogs and CLI
1973        // routing. They must not encode a known-broken channel that the
1974        // central runtime guard would have to correct later.
1975        reset();
1976        let catalog = crate::llm_config::parse_config_toml(BUILTIN_PROVIDERS_TOML)
1977            .expect("providers.toml must parse at build time");
1978        let mut unsafe_pins = Vec::new();
1979        for (alias, def) in &catalog.aliases {
1980            let Some(tool_format) = def.tool_format.as_deref() else {
1981                continue;
1982            };
1983            let decision = validate_tool_format(&def.provider, &def.id, tool_format);
1984            if let Some(correction) = decision.correction.as_deref() {
1985                unsafe_pins.push(format!(
1986                    "{alias} -> {}:{} pins {tool_format}, would be corrected to {} ({correction})",
1987                    def.provider, def.id, decision.effective
1988                ));
1989            }
1990        }
1991        assert!(
1992            unsafe_pins.is_empty(),
1993            "aliases pin unsafe tool_format values:\n- {}",
1994            unsafe_pins.join("\n- ")
1995        );
1996    }
1997
1998    #[test]
1999    fn tool_capability_audit_reports_suggested_defaults() {
2000        reset();
2001        let capabilities: CapabilitiesFile = toml::from_str(
2002            r#"
2003[[provider.acme]]
2004model_match = "acme-good-*"
2005preferred_tool_format = "native"
2006"#,
2007        )
2008        .unwrap();
2009        let report = audit_tool_capability_coverage(
2010            vec![(
2011                "acme-good-1".to_string(),
2012                crate::llm_config::ModelDef {
2013                    name: "Acme Good".to_string(),
2014                    provider: "acme".to_string(),
2015                    context_window: 128_000,
2016                    logical_model: None,
2017                    equivalence_group: None,
2018                    served_variant: None,
2019                    wire_model: None,
2020                    api_dialect: None,
2021                    rate_limits: None,
2022                    performance: None,
2023                    architecture: None,
2024                    local_memory: None,
2025                    runtime_context_window: None,
2026                    stream_timeout: None,
2027                    capabilities: Vec::new(),
2028                    pricing: Some(crate::llm_config::ModelPricing {
2029                        input_per_mtok: 1.0,
2030                        output_per_mtok: 2.0,
2031                        cache_read_per_mtok: None,
2032                        cache_write_per_mtok: None,
2033                    }),
2034                    deprecated: false,
2035                    deprecation_note: None,
2036                    superseded_by: None,
2037                    fast_mode: None,
2038                    quality_tags: Vec::new(),
2039                    availability: crate::llm_config::ModelAvailability::Serverless,
2040                    tier: None,
2041                    open_weight: None,
2042                    strengths: Vec::new(),
2043                    benchmarks: std::collections::BTreeMap::new(),
2044                    family: None,
2045                    lineage: None,
2046                    complementary_with: Vec::new(),
2047                    avoid_as_reviewer_for: Vec::new(),
2048                },
2049            )],
2050            &capabilities,
2051            None,
2052        );
2053
2054        assert!(!report.ok());
2055        assert_eq!(report.audited_models, 1);
2056        assert_eq!(report.gaps.len(), 1);
2057        assert_eq!(report.gaps[0].missing_fields, ["native_tools"]);
2058        assert!(report.gaps[0].suggested_native_tools);
2059        assert_eq!(report.gaps[0].suggested_preferred_tool_format, "native");
2060        assert!(report.render_human().contains(
2061            "acme:acme-good-1 (provider.acme model_match=\"acme-good-*\") missing native_tools; suggest native_tools = true, preferred_tool_format = \"native\""
2062        ));
2063    }
2064
2065    #[test]
2066    fn openrouter_qwen36_keeps_native_and_denies_ambient_upstream() {
2067        reset();
2068        let caps = lookup("openrouter", "qwen/qwen3.6-35b-a3b");
2069        // The route-around must NOT downgrade the tool format: native stays on.
2070        assert!(caps.native_tools);
2071        assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
2072        // The broken Ambient upstream is denied via the data-driven denylist.
2073        assert_eq!(caps.provider_route_denylist, vec!["Ambient".to_string()]);
2074    }
2075
2076    #[test]
2077    fn provider_route_denylist_defaults_empty_for_unmarked_rows() {
2078        reset();
2079        let caps = lookup("anthropic", "claude-opus-4-7");
2080        assert!(caps.provider_route_denylist.is_empty());
2081    }
2082
2083    #[test]
2084    fn strict_openai_compat_rows_require_tool_result_adjacency() {
2085        reset();
2086        assert!(lookup("moonshot", "moonshot/kimi-k2.6").requires_tool_result_adjacency);
2087        assert!(lookup("moonshot", "moonshot/kimi-k2.7-code").requires_tool_result_adjacency);
2088        assert!(lookup("minimax", "MiniMax-M2").requires_tool_result_adjacency);
2089        assert!(lookup("minimax", "MiniMax-M2.7").requires_tool_result_adjacency);
2090        assert!(!lookup("openai", "gpt-4o").requires_tool_result_adjacency);
2091    }
2092
2093    #[test]
2094    fn fireworks_gpt_oss_disables_parallel_tool_call_history() {
2095        reset();
2096        assert!(
2097            !lookup("fireworks", "accounts/fireworks/models/gpt-oss-120b")
2098                .supports_parallel_tool_calls
2099        );
2100        assert!(lookup("openai", "gpt-4o").supports_parallel_tool_calls);
2101    }
2102
2103    #[test]
2104    fn cerebras_tools_exclude_response_format() {
2105        reset();
2106        assert!(lookup("cerebras", "gpt-oss-120b").tools_exclude_response_format);
2107        assert!(lookup("cerebras", "zai-glm-4.7").tools_exclude_response_format);
2108        assert!(!lookup("openai", "gpt-4o").tools_exclude_response_format);
2109    }
2110
2111    #[test]
2112    fn serving_precision_seeds_known_gpt_oss_verdicts() {
2113        reset();
2114        // Full-precision routes verified during the 2026-06 meter effort.
2115        assert_eq!(
2116            lookup("fireworks", "accounts/fireworks/models/gpt-oss-120b").serving_precision,
2117            "trusted"
2118        );
2119        assert_eq!(
2120            lookup("openrouter", "openai/gpt-oss-120b").serving_precision,
2121            "trusted"
2122        );
2123        // SambaNova serves gpt-oss quantized (proven 0/5 vs reference 3/3).
2124        assert_eq!(
2125            lookup("sambanova", "gpt-oss-120b").serving_precision,
2126            "degraded"
2127        );
2128        // Cerebras is full precision but rate-throttled to unusable timing.
2129        assert_eq!(
2130            lookup("cerebras", "gpt-oss-120b").serving_precision,
2131            "throttled"
2132        );
2133    }
2134
2135    #[test]
2136    fn serving_precision_defaults_unverified_for_unmarked_rows() {
2137        reset();
2138        // A route with no serving_precision verdict resolves to "unverified",
2139        // never an empty string, so callers can branch on a stable enum.
2140        assert_eq!(
2141            lookup("anthropic", "claude-opus-4-7").serving_precision,
2142            "unverified"
2143        );
2144    }
2145
2146    #[test]
2147    fn anthropic_opus_47_gets_full_capabilities() {
2148        reset();
2149        let caps = lookup("anthropic", "claude-opus-4-7");
2150        assert!(caps.native_tools);
2151        assert!(caps.defer_loading);
2152        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
2153        assert!(caps.prompt_caching);
2154        assert_eq!(caps.thinking_modes, vec!["adaptive", "effort"]);
2155        assert!(caps.reasoning_effort_supported);
2156        assert_eq!(
2157            caps.reasoning_effort_levels,
2158            vec!["low", "medium", "high", "xhigh", "max"]
2159        );
2160        assert!(caps.interleaved_thinking_supported);
2161        assert!(caps.vision_supported);
2162        assert!(caps.audio);
2163        assert!(caps.pdf);
2164        assert!(caps.files_api_supported);
2165        assert_eq!(caps.max_tools, Some(10000));
2166        assert!(caps.prefers_xml_scaffolding);
2167        assert!(!caps.prefers_markdown_scaffolding);
2168        assert_eq!(caps.structured_output_mode, "xml_tagged");
2169        assert!(!caps.supports_assistant_prefill);
2170        assert!(!caps.prefers_role_developer);
2171        assert!(caps.prefers_xml_tools);
2172        assert_eq!(caps.thinking_block_style, "thinking_blocks");
2173    }
2174
2175    #[test]
2176    fn anthropic_sonnet_5_gets_adaptive_effort_capabilities() {
2177        reset();
2178        let caps = lookup("anthropic", "claude-sonnet-5");
2179        assert!(caps.native_tools);
2180        assert!(caps.defer_loading);
2181        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
2182        assert!(caps.prompt_caching);
2183        assert_eq!(caps.thinking_modes, vec!["adaptive", "effort"]);
2184        assert!(caps.reasoning_effort_supported);
2185        assert_eq!(
2186            caps.reasoning_effort_levels,
2187            vec!["low", "medium", "high", "xhigh", "max"]
2188        );
2189        assert!(caps.reasoning_disable_supported);
2190        assert!(!caps.reasoning_none_supported);
2191        assert!(caps.interleaved_thinking_supported);
2192        assert!(!caps.supports_assistant_prefill);
2193        assert_eq!(caps.thinking_block_style, "thinking_blocks");
2194    }
2195
2196    #[test]
2197    fn anthropic_fable_effort_cannot_be_disabled() {
2198        reset();
2199        for model in ["claude-fable-5", "anthropic/claude-fable-5"] {
2200            let caps = lookup("anthropic", model);
2201            assert_eq!(caps.thinking_modes, vec!["adaptive", "effort"]);
2202            assert!(caps.reasoning_effort_supported);
2203            assert_eq!(
2204                caps.reasoning_effort_levels,
2205                vec!["low", "medium", "high", "xhigh", "max"]
2206            );
2207            assert!(!caps.reasoning_disable_supported);
2208            assert!(!caps.supports_assistant_prefill);
2209        }
2210    }
2211
2212    #[test]
2213    fn anthropic_opus_46_uses_budgeted_thinking() {
2214        reset();
2215        let caps = lookup("anthropic", "claude-opus-4-6");
2216        assert_eq!(caps.thinking_modes, vec!["enabled"]);
2217        assert!(caps.interleaved_thinking_supported);
2218        assert!(!caps.supports_assistant_prefill);
2219    }
2220
2221    #[test]
2222    fn anthropic_opus_45_does_not_support_interleaved_thinking() {
2223        reset();
2224        let caps = lookup("anthropic", "claude-opus-4-5");
2225        assert_eq!(caps.thinking_modes, vec!["enabled"]);
2226        assert!(!caps.interleaved_thinking_supported);
2227        assert!(caps.supports_assistant_prefill);
2228    }
2229
2230    #[test]
2231    fn openrouter_claude_rows_track_direct_anthropic_runtime_quirks() {
2232        reset();
2233        for model in [
2234            "anthropic/claude-fable-5-0",
2235            "anthropic/claude-mythos-5-0",
2236            "anthropic/claude-haiku-4-5",
2237            "anthropic/claude-haiku-4-7",
2238            "anthropic/claude-sonnet-4-6",
2239            "anthropic/claude-sonnet-4-7",
2240            "anthropic/claude-sonnet-5",
2241            "anthropic/claude-opus-4-6",
2242            "anthropic/claude-opus-4-7",
2243        ] {
2244            assert_openrouter_anthropic_runtime_parity(model);
2245        }
2246    }
2247
2248    #[test]
2249    fn override_can_supply_anthropic_beta_features() {
2250        reset();
2251        let toml_src = r#"
2252[[provider.anthropic]]
2253model_match = "claude-custom-*"
2254native_tools = true
2255anthropic_beta_features = ["fine-grained-tool-streaming-2025-05-14"]
2256"#;
2257        set_user_overrides_toml(toml_src).unwrap();
2258        let caps = lookup("anthropic", "claude-custom-1");
2259        assert_eq!(
2260            caps.anthropic_beta_features,
2261            vec!["fine-grained-tool-streaming-2025-05-14"]
2262        );
2263        reset();
2264    }
2265
2266    #[test]
2267    fn anthropic_haiku_44_has_no_tool_search() {
2268        reset();
2269        let caps = lookup("anthropic", "claude-haiku-4-4");
2270        // Haiku 4.4 falls through to the `claude-*` catch-all row.
2271        assert!(caps.native_tools);
2272        assert!(caps.prompt_caching);
2273        assert!(!caps.defer_loading);
2274        assert!(caps.tool_search.is_empty());
2275    }
2276
2277    #[test]
2278    fn anthropic_haiku_45_supports_tool_search() {
2279        reset();
2280        let caps = lookup("anthropic", "claude-haiku-4-5");
2281        assert!(caps.defer_loading);
2282        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
2283    }
2284
2285    #[test]
2286    fn old_claude_gets_catchall() {
2287        reset();
2288        let caps = lookup("anthropic", "claude-opus-3-5");
2289        assert!(caps.native_tools);
2290        assert!(caps.prompt_caching);
2291        assert!(!caps.defer_loading);
2292        assert!(caps.tool_search.is_empty());
2293    }
2294
2295    #[test]
2296    fn openai_gpt_54_supports_tool_search() {
2297        reset();
2298        let caps = lookup("openai", "gpt-5.4");
2299        assert!(caps.defer_loading);
2300        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
2301        assert_eq!(caps.json_schema.as_deref(), Some("native"));
2302        assert_eq!(caps.thinking_modes, vec!["effort"]);
2303        assert!(caps.reasoning_effort_supported);
2304        assert!(caps.reasoning_none_supported);
2305        assert!(!caps.prefers_xml_scaffolding);
2306        assert!(caps.prefers_markdown_scaffolding);
2307        assert_eq!(caps.structured_output_mode, "native_json");
2308        assert!(!caps.supports_assistant_prefill);
2309        assert!(!caps.prefers_role_developer);
2310        assert!(!caps.prefers_xml_tools);
2311        assert_eq!(caps.thinking_block_style, "reasoning_summary");
2312    }
2313
2314    #[test]
2315    fn openai_gpt_53_has_reasoning_none_without_tool_search() {
2316        reset();
2317        let caps = lookup("openai", "gpt-5.3");
2318        assert!(caps.native_tools);
2319        assert!(!caps.defer_loading);
2320        assert!(caps.vision_supported);
2321        assert!(caps.tool_search.is_empty());
2322        assert_eq!(caps.thinking_modes, vec!["effort"]);
2323        assert!(caps.reasoning_effort_supported);
2324        assert!(caps.reasoning_none_supported);
2325    }
2326
2327    #[test]
2328    fn openai_original_gpt_5_has_reasoning_floor_without_none() {
2329        reset();
2330        let caps = lookup("openai", "gpt-5");
2331        assert!(caps.native_tools);
2332        assert!(!caps.defer_loading);
2333        assert_eq!(caps.thinking_modes, vec!["effort"]);
2334        assert!(caps.reasoning_effort_supported);
2335        assert!(!caps.reasoning_none_supported);
2336    }
2337
2338    #[test]
2339    fn gemini_thinking_budget_quirks_are_declared_in_matrix() {
2340        reset();
2341        // Flash: 24576 ceiling, can disable thinking.
2342        let flash = lookup("gemini", "gemini-2.5-flash");
2343        assert_eq!(flash.max_thinking_budget, Some(24_576));
2344        assert!(flash.reasoning_disable_supported);
2345        assert!(flash.thinking_modes.iter().any(|m| m == "effort"));
2346        // Pro: 32768 ceiling, cannot disable thinking.
2347        let pro = lookup("gemini", "gemini-2.5-pro");
2348        assert_eq!(pro.max_thinking_budget, Some(32_768));
2349        assert!(!pro.reasoning_disable_supported);
2350        assert!(pro.thinking_modes.iter().any(|m| m == "effort"));
2351        // The `models/` REST resource name resolves the same.
2352        let flash_resource = lookup("gemini", "models/gemini-2.5-flash");
2353        assert_eq!(flash_resource.max_thinking_budget, Some(24_576));
2354        assert!(flash_resource.reasoning_disable_supported);
2355        // Non-2.5 gemini has no effort thinking support -> provider sends no
2356        // thinkingConfig (unchanged behavior).
2357        let legacy = lookup("gemini", "gemini-1.5-pro");
2358        assert!(!legacy.thinking_modes.iter().any(|m| m == "effort"));
2359    }
2360
2361    #[test]
2362    fn openai_gpt_4o_matrix_fields_include_multimodal_support() {
2363        reset();
2364        let caps = lookup("openai", "gpt-4o");
2365        assert!(caps.native_tools);
2366        assert!(caps.vision);
2367        assert!(caps.audio);
2368        assert!(!caps.pdf);
2369        assert_eq!(caps.json_schema.as_deref(), Some("native"));
2370    }
2371
2372    #[test]
2373    fn openai_reasoning_models_support_effort() {
2374        reset();
2375        let caps = lookup("openai", "o3");
2376        assert_eq!(caps.thinking_modes, vec!["effort"]);
2377        assert!(caps.requires_completion_tokens);
2378        assert!(caps.reasoning_effort_supported);
2379        assert!(caps.prefers_role_developer);
2380        assert_eq!(caps.thinking_block_style, "reasoning_summary");
2381        let prefixed = lookup("openrouter", "openai/o4-mini");
2382        assert!(prefixed.requires_completion_tokens);
2383        assert!(prefixed.reasoning_effort_supported);
2384    }
2385
2386    #[test]
2387    fn vision_capability_gates_known_multimodal_models() {
2388        reset();
2389        let minimax_m3 = lookup("minimax", "MiniMax-M3");
2390        assert!(minimax_m3.vision_supported);
2391        assert!(minimax_m3.video);
2392        assert_eq!(minimax_m3.thinking_modes, vec!["adaptive"]);
2393        assert_eq!(minimax_m3.reasoning_wire_format.as_deref(), Some("minimax"));
2394        assert!(minimax_m3.requires_completion_tokens);
2395        let openrouter_m3 = lookup("openrouter", "minimax/minimax-m3");
2396        assert!(openrouter_m3.vision_supported);
2397        assert!(openrouter_m3.video);
2398        assert!(lookup("openai", "gpt-4o").vision_supported);
2399        assert!(lookup("openai", "gpt-5.4-preview").vision_supported);
2400        assert!(lookup("anthropic", "claude-sonnet-4-6").vision_supported);
2401        assert!(lookup("anthropic", "claude-sonnet-4-6").pdf);
2402        assert!(lookup("anthropic", "claude-sonnet-4-6").files_api_supported);
2403        assert!(lookup("openrouter", "google/gemini-2.5-flash").vision_supported);
2404        assert!(lookup("gemini", "gemini-2.5-flash").vision_supported);
2405        assert!(lookup("gemini", "gemini-2.5-flash").audio);
2406        assert!(lookup("gemini", "gemini-2.5-flash").pdf);
2407        assert_eq!(
2408            lookup("gemini", "gemini-2.5-flash").structured_output_mode,
2409            "native_json"
2410        );
2411        assert!(lookup("ollama", "llava:latest").vision_supported);
2412        assert!(lookup("ollama", "gemma4:26b").vision_supported);
2413        assert!(lookup("ollama", "gemma4-128k:latest").vision_supported);
2414        assert!(!lookup("openai", "gpt-3.5-turbo").vision_supported);
2415        assert!(!lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4").vision_supported);
2416    }
2417
2418    #[test]
2419    fn openrouter_gemini_explicit_cache_uses_block_breakpoints() {
2420        reset();
2421        let caps = lookup("openrouter", "google/gemini-2.5-flash");
2422        assert!(caps.prompt_caching);
2423        assert_eq!(caps.cache_breakpoint_style, "last_block");
2424    }
2425
2426    #[test]
2427    fn local_gemma4_exposes_native_tools_and_structured_output() {
2428        // Fix A: vLLM/SGLang serve Gemma 4 over the OpenAI-compatible surface,
2429        // so the local route must declare native tools + native structured
2430        // output like its hosted gemma-4 siblings — not silently fall back to
2431        // text tools.
2432        reset();
2433        let caps = lookup("local", "gemma-4-26b-a4b-it");
2434        assert!(caps.native_tools);
2435        assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
2436        assert_eq!(caps.structured_output.as_deref(), Some("native"));
2437    }
2438
2439    #[test]
2440    fn local_gemma4_exposes_vision_like_hosted_siblings() {
2441        // harn#3585: Gemma 4 is multimodal on every served surface. The local
2442        // OpenAI-compat route must declare vision so the derived structured
2443        // caps and emitted `capability_tags` agree with the gemini/openrouter/
2444        // together siblings.
2445        reset();
2446        for model in ["gemma-4-e4b-it", "gemma-4-e2b-it", "gemma-4-26b-a4b-it"] {
2447            let caps = lookup("local", model);
2448            assert!(
2449                caps.vision_supported,
2450                "local {model} should expose vision_supported"
2451            );
2452            let tags = crate::llm_config::capability_tags_from_capabilities(&caps);
2453            assert!(
2454                tags.iter().any(|t| t == "vision"),
2455                "local {model} emitted capability_tags should include `vision`, got {tags:?}"
2456            );
2457        }
2458    }
2459
2460    #[test]
2461    fn ollama_vision_models_have_no_reasoning_scaffold() {
2462        // Fix B: bakllava / llama3.2-vision / gemma3 are caption/vision models
2463        // with no reasoning capability; they must resolve to the "none" thinking
2464        // block style (like the llava sibling) so the template does not emit a
2465        // spurious "## Reasoning" scaffold.
2466        reset();
2467        for model in ["bakllava:latest", "llama3.2-vision:11b", "gemma3:27b"] {
2468            assert_eq!(
2469                lookup("ollama", model).thinking_block_style,
2470                "none",
2471                "{model} should resolve to thinking_block_style=\"none\""
2472            );
2473        }
2474        // Sibling sanity check.
2475        assert_eq!(
2476            lookup("ollama", "llava:latest").thinking_block_style,
2477            "none"
2478        );
2479    }
2480
2481    #[test]
2482    fn ollama_gemma4_supports_structured_output_and_text_tools() {
2483        // Fix C: Ollama honors the `format` kwarg, so both gemma4 rules must
2484        // declare structured_output="format_kw" (otherwise JSON/schema output
2485        // was blocked) plus explicit text tools for parity with the qwen rules.
2486        reset();
2487        for model in ["gemma4:12b-mlx", "gemma4:26b"] {
2488            let caps = lookup("ollama", model);
2489            assert_eq!(
2490                caps.structured_output.as_deref(),
2491                Some("format_kw"),
2492                "{model} should resolve structured_output=\"format_kw\""
2493            );
2494            assert!(!caps.native_tools, "{model} should use text tools");
2495            assert_eq!(
2496                caps.preferred_tool_format.as_deref(),
2497                Some("text"),
2498                "{model} should prefer text tool format"
2499            );
2500            assert_eq!(
2501                caps.thinking_block_style, "none",
2502                "{model} ships thinking-off"
2503            );
2504        }
2505    }
2506
2507    #[test]
2508    fn openrouter_inherits_openai() {
2509        reset();
2510        let caps = lookup("openrouter", "gpt-5.4");
2511        assert!(caps.defer_loading);
2512        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
2513        assert_eq!(caps.reasoning_wire_format.as_deref(), Some("openrouter"));
2514        assert!(!caps.top_k_supported);
2515    }
2516
2517    #[test]
2518    fn openrouter_kimi27_code_records_tool_choice_and_sampling_limits() {
2519        reset();
2520        let caps = lookup("openrouter", "moonshotai/kimi-k2.7-code");
2521        assert!(caps.native_tools);
2522        assert!(caps.prompt_caching);
2523        assert!(caps.vision_supported);
2524        assert!(caps.video);
2525        // 2026-06-24 forced-format sweep flipped this route native -> text:
2526        // native double-escaped backslash bodies (1/5) and fenced-JSON produced
2527        // no parseable Harn call (0/5); heredoc text was 5/5 byte-clean.
2528        assert_eq!(caps.preferred_tool_format.as_deref(), Some("text"));
2529        assert_eq!(caps.tool_mode_parity.as_deref(), Some("native_unreliable"));
2530        assert_eq!(caps.thinking_modes, vec!["enabled"]);
2531        assert_eq!(caps.allowed_tool_choice_modes, vec!["auto", "none"]);
2532        assert!(!caps.temperature_supported);
2533        assert!(!caps.top_p_supported);
2534        assert!(!caps.frequency_penalty_supported);
2535        assert!(!caps.presence_penalty_supported);
2536
2537        let prior = lookup("openrouter", "moonshotai/kimi-k2.6");
2538        assert!(prior.prompt_caching);
2539        assert!(prior.vision_supported);
2540        assert!(!prior.video);
2541        assert!(prior.allowed_tool_choice_modes.is_empty());
2542        assert!(prior.temperature_supported);
2543    }
2544
2545    #[test]
2546    fn qwen37_routes_record_prompt_cache_vision_and_streaming_quirks() {
2547        reset();
2548        let plus = lookup("openrouter", "qwen/qwen3.7-plus");
2549        assert!(plus.native_tools);
2550        assert!(plus.prompt_caching);
2551        assert!(plus.vision_supported);
2552        assert_eq!(plus.preferred_tool_format.as_deref(), Some("native"));
2553        assert_eq!(plus.thinking_modes, vec!["enabled"]);
2554        assert_eq!(
2555            plus.auto_reasoning_overrides
2556                .get("agent")
2557                .map(String::as_str),
2558            Some("off"),
2559            "Qwen tool-bearing agent turns should disable reasoning automatically",
2560        );
2561
2562        let max = lookup("openrouter", "qwen/qwen3.7-max");
2563        assert!(max.native_tools);
2564        assert!(max.prompt_caching);
2565        assert!(!max.vision_supported);
2566        assert_eq!(max.thinking_modes, vec!["enabled"]);
2567
2568        let together = lookup("together", "Qwen/Qwen3.7-Max");
2569        assert!(together.native_tools);
2570        assert!(together.prompt_caching);
2571        assert!(together.requires_streaming);
2572        assert!(!together.honors_chat_template_kwargs);
2573
2574        let glm = lookup("together", "zai-org/GLM-5.1");
2575        assert!(glm.native_tools);
2576        assert!(glm.prompt_caching);
2577        assert_eq!(glm.preferred_tool_format.as_deref(), Some("text"));
2578        assert_eq!(glm.tool_mode_parity.as_deref(), Some("native_unreliable"));
2579        assert_eq!(
2580            glm.auto_reasoning_overrides
2581                .get("agent")
2582                .map(String::as_str),
2583            Some("off"),
2584        );
2585
2586        let minimax = lookup("together", "MiniMaxAI/MiniMax-M2.7");
2587        assert!(minimax.native_tools);
2588        assert!(minimax.prompt_caching);
2589        // 2026-06-24 forced-format sweep flipped this route json -> text: heredoc
2590        // beat fenced-JSON on both dispatch and backslash-body fidelity at N=5.
2591        assert_eq!(minimax.preferred_tool_format.as_deref(), Some("text"));
2592        assert_eq!(
2593            minimax.tool_mode_parity.as_deref(),
2594            Some("native_unreliable")
2595        );
2596        assert!(!minimax.reasoning_text_promotable);
2597
2598        let step = lookup("openrouter", "stepfun/step-3.7-flash");
2599        assert!(step.native_tools);
2600        assert!(step.prompt_caching);
2601        assert!(!step.reasoning_disable_supported);
2602        assert_eq!(step.thinking_modes, vec!["enabled"]);
2603    }
2604
2605    #[test]
2606    fn openrouter_structured_routes_cover_current_open_models() {
2607        reset();
2608        for model in [
2609            "deepseek/deepseek-v4-flash",
2610            "mistralai/devstral-small",
2611            "meta-llama/llama-4-scout",
2612            "kwaipilot/kat-coder-pro-v2",
2613        ] {
2614            let caps = lookup("openrouter", model);
2615            assert!(caps.native_tools, "{model} should expose native tools");
2616            assert_eq!(caps.structured_output.as_deref(), Some("native"));
2617            assert_eq!(caps.structured_output_mode, "native_json");
2618        }
2619        assert!(lookup("openrouter", "deepseek/deepseek-v4-flash").top_k_supported);
2620        assert!(lookup("openrouter", "meta-llama/llama-4-scout").top_k_supported);
2621        assert!(!lookup("openrouter", "mistralai/devstral-small").top_k_supported);
2622        assert!(lookup("openrouter", "google/gemma-4-26b-a4b-it").top_k_supported);
2623    }
2624
2625    #[test]
2626    fn openrouter_anthropic_claude_models_support_native_tools() {
2627        // Regression for #2319: OpenRouter Anthropic slugs must match the
2628        // Anthropic capability rules before the OpenRouter -> OpenAI family
2629        // chain, otherwise native-tool requests get rejected as unsupported.
2630        reset();
2631        for model in [
2632            "anthropic/claude-haiku-4-5",
2633            "anthropic/claude-haiku-4-5-20251001",
2634            "anthropic/claude-sonnet-4-6",
2635            "anthropic/claude-sonnet-4-7",
2636            "anthropic/claude-opus-4-7",
2637        ] {
2638            let caps = lookup("openrouter", model);
2639            assert!(
2640                caps.native_tools,
2641                "{model} via openrouter should report native_tools=true",
2642            );
2643            assert!(
2644                caps.prompt_caching,
2645                "{model} via openrouter should report prompt_caching=true",
2646            );
2647            assert_eq!(
2648                caps.cache_breakpoint_style, "top_level",
2649                "{model} via openrouter should use top-level cache_control",
2650            );
2651            assert_eq!(
2652                caps.structured_output.as_deref(),
2653                Some("tool_use"),
2654                "{model} via openrouter should structured_output=tool_use (matches direct anthropic)",
2655            );
2656        }
2657    }
2658
2659    #[test]
2660    fn openrouter_deepseek_v32_defaults_to_text_tools() {
2661        reset();
2662        let caps = lookup("openrouter", "deepseek/deepseek-v3.2");
2663        assert!(caps.native_tools);
2664        assert!(caps.text_tool_wire_format_supported);
2665        assert_eq!(caps.preferred_tool_format.as_deref(), Some("text"));
2666        assert_eq!(caps.tool_mode_parity.as_deref(), Some("native_unreliable"));
2667        assert_eq!(caps.structured_output.as_deref(), Some("native"));
2668        assert!(caps.prompt_caching);
2669        assert_eq!(caps.cache_breakpoint_style, "last_block");
2670
2671        let automated = lookup("openrouter", "deepseek/deepseek-v3");
2672        assert!(automated.prompt_caching);
2673        assert_eq!(automated.cache_breakpoint_style, "none");
2674    }
2675
2676    #[test]
2677    fn openrouter_explicit_cache_routes_get_block_breakpoints() {
2678        reset();
2679        for model in [
2680            "qwen/qwen3.6-plus",
2681            "qwen/qwen3-coder-plus",
2682            "qwen/qwen3-coder-flash",
2683            "qwen/qwen3-max",
2684            "qwen/qwen-plus",
2685        ] {
2686            let caps = lookup("openrouter", model);
2687            assert!(caps.prompt_caching, "{model} should support prompt cache");
2688            assert_eq!(
2689                caps.cache_breakpoint_style, "last_block",
2690                "{model} should request explicit content-block cache breakpoints",
2691            );
2692        }
2693
2694        let open_weight = lookup("openrouter", "qwen/qwen3.6-35b-a3b");
2695        assert!(!open_weight.prompt_caching);
2696        assert_eq!(open_weight.cache_breakpoint_style, "none");
2697    }
2698
2699    #[test]
2700    fn openrouter_deepseek_alias_slugs_support_native_tools() {
2701        reset();
2702        for model in ["deepseek/deepseek-chat", "deepseek/deepseek-chat-v3-0324"] {
2703            let caps = lookup("openrouter", model);
2704            assert!(caps.native_tools, "{model} should expose native tools");
2705            assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
2706            assert_eq!(caps.structured_output.as_deref(), Some("native"));
2707            assert!(
2708                caps.thinking_modes.is_empty(),
2709                "{model} is not a reasoning route"
2710            );
2711            assert_eq!(caps.thinking_block_style, "none");
2712            assert!(
2713                caps.top_k_supported,
2714                "{model} should accept top_k through OpenRouter"
2715            );
2716        }
2717
2718        for model in [
2719            "deepseek/deepseek-chat-v3.1",
2720            "deepseek/deepseek-r1",
2721            "deepseek/deepseek-r1-0528",
2722        ] {
2723            let caps = lookup("openrouter", model);
2724            assert!(caps.native_tools, "{model} should expose native tools");
2725            assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
2726            assert_eq!(caps.structured_output.as_deref(), Some("native"));
2727            assert_eq!(caps.thinking_modes, vec!["enabled", "effort"]);
2728            assert_eq!(caps.thinking_block_style, "reasoning_summary");
2729            assert!(
2730                caps.top_k_supported,
2731                "{model} should accept top_k through OpenRouter"
2732            );
2733        }
2734
2735        assert!(!lookup("openrouter", "deepseek/deepseek-r1-distill-qwen-32b").native_tools);
2736    }
2737
2738    #[test]
2739    fn openrouter_qwen_coder_defaults_to_text_tools() {
2740        reset();
2741        let caps = lookup("openrouter", "qwen/qwen3-coder-flash");
2742        assert!(caps.native_tools);
2743        assert!(caps.text_tool_wire_format_supported);
2744        assert_eq!(caps.preferred_tool_format.as_deref(), Some("text"));
2745        assert_eq!(caps.tool_mode_parity.as_deref(), Some("native_unreliable"));
2746    }
2747
2748    #[test]
2749    fn bedrock_claude_uses_anthropic_wire_capabilities() {
2750        reset();
2751        let caps = lookup("bedrock", "anthropic.claude-3-5-sonnet-20240620-v1:0");
2752        assert!(caps.native_tools);
2753        assert_eq!(caps.message_wire_format, "anthropic");
2754        assert_eq!(caps.native_tool_wire_format, "anthropic");
2755    }
2756
2757    #[test]
2758    fn groq_inherits_openai_family_only() {
2759        reset();
2760        let caps = lookup("groq", "gpt-5.5-preview");
2761        assert!(caps.defer_loading);
2762    }
2763
2764    #[test]
2765    fn cerebras_inherits_openai_family() {
2766        reset();
2767        let caps = lookup("cerebras", "gpt-oss-120b");
2768        assert_eq!(caps.message_wire_format, "openai");
2769        assert_eq!(caps.native_tool_wire_format, "openai");
2770        // gpt-oss uses NATIVE tool calls across cerebras/groq/together. Under
2771        // json/text it emits a bare {"tool","arguments"} dialect the
2772        // fenced-JSON parser rejects (zero parsed calls), so native is the only
2773        // working channel.
2774        assert!(caps.native_tools);
2775        assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
2776    }
2777
2778    #[test]
2779    fn cerebras_gpt_oss_declares_supported_reasoning_efforts() {
2780        // Cerebras GPT-OSS accepts low/medium/high only. The policy resolver
2781        // uses this list to floor `reasoning_policy: "off"` to `low` instead
2782        // of sending unsupported `none` or `minimal` values.
2783        reset();
2784        let caps = lookup("cerebras", "gpt-oss-120b");
2785        assert_cerebras_effort_reasoning("gpt-oss-120b", "reasoning_summary");
2786        assert!(!caps.reasoning_none_supported);
2787        assert_eq!(caps.reasoning_effort_levels, vec!["low", "medium", "high"]);
2788    }
2789
2790    #[test]
2791    fn gpt_oss_requires_reasoning_for_tools_with_provider_specific_tool_wire() {
2792        // gpt-oss (Harmony) calls tools INSIDE the chain-of-thought channel, so
2793        // reasoning-off breaks tool calling. Provider catch-all rules carry no
2794        // reasoning fields, so without a dedicated `*gpt-oss*` row gpt-oss
2795        // would fall through to reasoning-OFF and the eval loop would bill a
2796        // noncommittal. Tool wire support is provider-specific: the pay-per-token
2797        // routes (OpenRouter, Fireworks, DeepInfra, SambaNova) ride Harn's TEXT
2798        // channel — their provider-native Harmony path drops tool calls into the
2799        // reasoning/commentary channel (empty `tool_calls` / billed-noncommittal,
2800        // see the DeepInfra/SambaNova rows + vLLM #22578/#44216, SGLang
2801        // #8976/#10738, openai/harmony #68). Within the text channel they use the
2802        // escape-free heredoc (`text`) grammar rather than fenced-JSON, because
2803        // gpt-oss double-escapes the backslashes a JSON string arg requires and
2804        // corrupts `\\`-heavy code bodies (empirical A/B 2026-06-21: text beats
2805        // json on both dispatch and byte-fidelity). Only the native-clean direct
2806        // routes (Cerebras, Groq) still use provider-native tools.
2807        reset();
2808        for (provider, model, native_tools, preferred_tool_format) in [
2809            ("openrouter", "openai/gpt-oss-120b", false, "text"),
2810            (
2811                "fireworks",
2812                "accounts/fireworks/models/gpt-oss-120b",
2813                false,
2814                "text",
2815            ),
2816            ("deepinfra", "openai/gpt-oss-120b", false, "text"),
2817            ("sambanova", "sambanova/gpt-oss-120b", false, "text"),
2818            ("cerebras", "gpt-oss-120b", true, "native"),
2819            ("groq", "openai/gpt-oss-120b", true, "native"),
2820        ] {
2821            let caps = lookup(provider, model);
2822            assert!(
2823                caps.reasoning_required_for_tools,
2824                "{provider}/{model}: reasoning_required_for_tools must be true"
2825            );
2826            assert!(
2827                caps.reasoning_effort_supported,
2828                "{provider}/{model}: reasoning_effort_supported must be true"
2829            );
2830            assert_eq!(
2831                caps.reasoning_effort_levels,
2832                vec!["low", "medium", "high"],
2833                "{provider}/{model}: effort levels"
2834            );
2835            assert_eq!(caps.thinking_modes, vec!["effort"], "{provider}/{model}");
2836            assert_eq!(
2837                caps.native_tools, native_tools,
2838                "{provider}/{model}: native_tools"
2839            );
2840            assert_eq!(
2841                caps.preferred_tool_format.as_deref(),
2842                Some(preferred_tool_format),
2843                "{provider}/{model}: preferred tool format"
2844            );
2845            assert_eq!(
2846                caps.thinking_block_style, "reasoning_summary",
2847                "{provider}/{model}"
2848            );
2849        }
2850    }
2851
2852    #[test]
2853    fn cerebras_glm_47_supports_reasoning_none() {
2854        // Cerebras documents GLM 4.7's no-reasoning value as
2855        // reasoning_effort="none"; the older disable_reasoning knob is
2856        // deprecated. Keep the route on the same policy path as GPT-OSS.
2857        reset();
2858        let caps = lookup("cerebras", "zai-glm-4.7");
2859        assert_cerebras_effort_reasoning("zai-glm-4.7", "inline");
2860        assert!(caps.reasoning_none_supported);
2861    }
2862
2863    #[test]
2864    fn mock_with_claude_model_routes_to_anthropic() {
2865        reset();
2866        let caps = lookup("mock", "claude-sonnet-4-7");
2867        assert!(caps.defer_loading);
2868        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
2869    }
2870
2871    #[test]
2872    fn mock_with_gpt_model_routes_to_openai() {
2873        reset();
2874        let caps = lookup("mock", "gpt-5.4-preview");
2875        assert!(caps.defer_loading);
2876        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
2877    }
2878
2879    #[test]
2880    fn mock_with_gemini_model_routes_to_gemini() {
2881        reset();
2882        let caps = lookup("mock", "gemini-2.5-flash");
2883        assert_eq!(caps.message_wire_format, "gemini");
2884        assert_eq!(caps.native_tool_wire_format, "openai");
2885        assert!(caps.prefers_xml_scaffolding);
2886    }
2887
2888    #[test]
2889    fn qwen36_ollama_preserves_thinking() {
2890        reset();
2891        let caps = lookup("ollama", "qwen3.6:35b-a3b-coding-nvfp4");
2892        assert!(!caps.native_tools);
2893        assert_eq!(caps.json_schema.as_deref(), Some("format_kw"));
2894        assert!(!caps.thinking_modes.is_empty());
2895        assert!(
2896            caps.preserve_thinking,
2897            "Qwen3.6 should enable preserve_thinking by default for long-horizon loops"
2898        );
2899        assert_eq!(caps.server_parser, "none");
2900        assert!(!caps.honors_chat_template_kwargs);
2901        assert_eq!(caps.recommended_endpoint.as_deref(), Some("/api/chat"));
2902        assert!(caps.text_tool_wire_format_supported);
2903        assert!(caps.prefers_markdown_scaffolding);
2904        assert_eq!(caps.structured_output_mode, "delimited");
2905        assert!(!caps.prefers_xml_tools);
2906        assert_eq!(caps.thinking_block_style, "inline");
2907    }
2908
2909    #[test]
2910    fn qwen35_ollama_does_not_preserve_thinking() {
2911        reset();
2912        let caps = lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4");
2913        assert!(caps.native_tools);
2914        assert!(!caps.thinking_modes.is_empty());
2915        assert!(
2916            !caps.preserve_thinking,
2917            "Qwen3.5 lacks the preserve_thinking kwarg — rely on the chat template's rolling checkpoint instead"
2918        );
2919        assert_eq!(caps.server_parser, "ollama_qwen3coder");
2920        assert!(!caps.text_tool_wire_format_supported);
2921    }
2922
2923    #[test]
2924    fn qwen36_routed_providers_all_preserve_thinking() {
2925        reset();
2926        for (provider, model) in [
2927            ("openrouter", "qwen/qwen3.6-plus"),
2928            ("together", "Qwen/Qwen3.6-Plus"),
2929            ("huggingface", "Qwen/Qwen3.6-35B-A3B"),
2930            ("fireworks", "accounts/fireworks/models/qwen3p6-plus"),
2931            ("dashscope", "qwen3.6-plus"),
2932            ("local", "Qwen3.6-35B-A3B"),
2933            ("mlx", "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit"),
2934            ("mlx", "Qwen/Qwen3.6-35B-A3B"),
2935        ] {
2936            let caps = lookup(provider, model);
2937            assert!(
2938                !caps.thinking_modes.is_empty(),
2939                "{provider}/{model}: thinking"
2940            );
2941            assert!(
2942                caps.preserve_thinking,
2943                "{provider}/{model}: preserve_thinking must be on for Qwen3.6"
2944            );
2945            assert!(caps.native_tools, "{provider}/{model}: native_tools");
2946            assert_ne!(
2947                caps.server_parser, "ollama_qwen3coder",
2948                "{provider}/{model}: only Ollama routes through the qwen3coder response parser"
2949            );
2950        }
2951
2952        let caps = lookup("llamacpp", "unsloth/Qwen3.6-35B-A3B-GGUF");
2953        assert!(!caps.thinking_modes.is_empty());
2954        assert!(caps.preserve_thinking);
2955        assert!(!caps.native_tools);
2956        assert!(caps.text_tool_wire_format_supported);
2957        assert_eq!(caps.server_parser, "none");
2958    }
2959
2960    #[test]
2961    fn qwen_coder_models_do_not_claim_thinking_modes() {
2962        reset();
2963        for (provider, model) in [
2964            ("together", "Qwen/Qwen3-Coder-Next-FP8"),
2965            ("together", "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"),
2966            ("openrouter", "qwen/qwen3-coder-next"),
2967            ("huggingface", "Qwen/Qwen3-Coder-Next"),
2968        ] {
2969            let caps = lookup(provider, model);
2970            assert!(caps.native_tools, "{provider}/{model}: native_tools");
2971            assert!(
2972                caps.thinking_modes.is_empty(),
2973                "{provider}/{model}: coder models are non-thinking routes"
2974            );
2975            assert!(
2976                !caps.preserve_thinking,
2977                "{provider}/{model}: preserve_thinking must stay off"
2978            );
2979            assert!(
2980                caps.thinking_disable_directive.is_none(),
2981                "{provider}/{model}: no /no_think shim should be needed"
2982            );
2983        }
2984    }
2985
2986    #[test]
2987    fn llamacpp_qwen_keeps_text_tool_wire_format() {
2988        reset();
2989        let caps = lookup("llamacpp", "unsloth/Qwen3.5-Coder-GGUF");
2990        assert_eq!(caps.server_parser, "none");
2991        assert!(caps.honors_chat_template_kwargs);
2992        assert!(!caps.native_tools);
2993        assert!(caps.text_tool_wire_format_supported);
2994        assert_eq!(
2995            caps.recommended_endpoint.as_deref(),
2996            Some("/v1/chat/completions")
2997        );
2998    }
2999
3000    #[test]
3001    fn devstral_local_routes_default_to_json_tools() {
3002        reset();
3003        for provider in ["ollama", "llamacpp"] {
3004            let caps = lookup(provider, "devstral-small-2:24b");
3005            assert!(!caps.native_tools, "{provider}: native tools stay opt-in");
3006            assert!(
3007                caps.text_tool_wire_format_supported,
3008                "{provider}: text tools should remain available"
3009            );
3010            // devstral has no reserved-token constraint, so it uses the global
3011            // `json` (fenced-JSON) text-channel default. Heredoc stays
3012            // reachable via an explicit `preferred_tool_format = "text"` pin.
3013            assert_eq!(
3014                caps.preferred_tool_format.as_deref(),
3015                Some("json"),
3016                "{provider}: devstral inherits the global json default"
3017            );
3018        }
3019    }
3020
3021    #[test]
3022    fn openrouter_mistral_routes_use_native_tools() {
3023        reset();
3024        let caps = lookup("openrouter", "mistralai/mistral-small-2603");
3025        assert!(caps.native_tools);
3026        assert!(caps.text_tool_wire_format_supported);
3027        assert_eq!(caps.structured_output.as_deref(), Some("native"));
3028        assert_eq!(caps.structured_output_mode, "native_json");
3029    }
3030
3031    #[test]
3032    fn dashscope_and_llamacpp_resolve_capabilities() {
3033        reset();
3034        // New sibling providers should fall through to `openai` for
3035        // gpt-*  models even without dedicated rules.
3036        let caps = lookup("dashscope", "gpt-5.4-preview");
3037        assert!(caps.defer_loading);
3038        let caps = lookup("llamacpp", "gpt-5.4-preview");
3039        assert!(caps.defer_loading);
3040    }
3041
3042    #[test]
3043    fn unknown_provider_has_no_capabilities() {
3044        reset();
3045        let caps = lookup("my-custom-proxy", "foo-bar-1");
3046        assert!(!caps.native_tools);
3047        assert!(!caps.defer_loading);
3048        assert!(caps.tool_search.is_empty());
3049    }
3050
3051    #[test]
3052    fn openrouter_specific_rules_win_and_family_inheritance_is_preserved() {
3053        // Capability resolution is first-match-wins over fragment order
3054        // (`first_matching_rule_in_file` -> `Iterator::find`), and when no
3055        // `provider.openrouter` rule matches it walks the `[provider_family]`
3056        // chain (openrouter -> openai). Both contracts must hold so that:
3057        //   1. a specific OpenRouter carve-out beats a broader OpenRouter rule,
3058        //   2. gpt-/o-family slugs routed through OpenRouter still inherit the
3059        //      rich openai-family capability set (a blanket `*` openrouter row
3060        //      would shadow this — see the catalog-or-defaults report).
3061        reset();
3062
3063        // 1. Specific carve-out wins: deepseek/deepseek-v3.2 is pinned to the
3064        // Harn text-tool channel even though the broader deepseek/deepseek-v3*
3065        // rule below it would otherwise resolve `native`.
3066        let deepseek = lookup("openrouter", "deepseek/deepseek-v3.2");
3067        assert_eq!(
3068            deepseek.preferred_tool_format.as_deref(),
3069            Some("text"),
3070            "deepseek-v3.2 text carve-out must win over the broader deepseek-v3* rule"
3071        );
3072        assert_eq!(
3073            deepseek.tool_mode_parity.as_deref(),
3074            Some("native_unreliable")
3075        );
3076        // The broader sibling still resolves native for non-3.2 v3 slugs.
3077        assert_eq!(
3078            lookup("openrouter", "deepseek/deepseek-v3-base")
3079                .preferred_tool_format
3080                .as_deref(),
3081            Some("native")
3082        );
3083
3084        // 2. Family inheritance preserved: an openai-prefixed slug routed via
3085        // OpenRouter still picks up openai-family reasoning fields.
3086        let prefixed = lookup("openrouter", "openai/o4-mini");
3087        assert!(prefixed.requires_completion_tokens);
3088        assert!(prefixed.reasoning_effort_supported);
3089
3090        // The newly added MiniMax M2.5 OR mirror resolves native via the
3091        // existing `minimax/minimax-m2*` rule.
3092        let m25 = lookup("openrouter", "minimax/minimax-m2.5");
3093        assert!(m25.native_tools);
3094        assert_eq!(m25.preferred_tool_format.as_deref(), Some("native"));
3095    }
3096
3097    #[test]
3098    fn enterprise_routes_expose_format_preferences() {
3099        reset();
3100        let bedrock_claude = lookup("bedrock", "anthropic.claude-opus-4-7-v1:0");
3101        assert!(bedrock_claude.prefers_xml_scaffolding);
3102        assert_eq!(bedrock_claude.structured_output_mode, "xml_tagged");
3103        assert!(!bedrock_claude.supports_assistant_prefill);
3104        assert!(bedrock_claude.prefers_xml_tools);
3105
3106        let azure_o = lookup("azure_openai", "o3-prod");
3107        assert!(azure_o.prefers_markdown_scaffolding);
3108        assert_eq!(azure_o.structured_output_mode, "native_json");
3109        assert!(azure_o.prefers_role_developer);
3110        assert_eq!(azure_o.thinking_block_style, "reasoning_summary");
3111    }
3112
3113    #[test]
3114    fn user_override_adds_new_provider() {
3115        reset();
3116        let toml_src = concat!(
3117            "[[provider.my-proxy]]\n",
3118            "model_match = \"*\"\n",
3119            "native_tools = true\n",
3120            "tool_search = [\"hosted\"]\n",
3121            "prefers_xml_scaffolding = true\n",
3122            "structured_output_mode = \"xml_tagged\"\n",
3123            "supports_assistant_prefill = true\n",
3124            "prefers_xml_tools = true\n",
3125            "thinking_block_style = \"thinking_blocks\"\n",
3126        );
3127        set_user_overrides_toml(toml_src).unwrap();
3128        let caps = lookup("my-proxy", "anything");
3129        assert!(caps.native_tools);
3130        assert_eq!(caps.tool_search, vec!["hosted"]);
3131        assert!(caps.prefers_xml_scaffolding);
3132        assert_eq!(caps.structured_output_mode, "xml_tagged");
3133        assert!(caps.supports_assistant_prefill);
3134        assert!(caps.prefers_xml_tools);
3135        assert_eq!(caps.thinking_block_style, "thinking_blocks");
3136        clear_user_overrides();
3137    }
3138
3139    #[test]
3140    fn user_override_takes_precedence_over_builtin() {
3141        reset();
3142        let toml_src = r#"
3143[[provider.anthropic]]
3144model_match = "claude-opus-*"
3145native_tools = true
3146defer_loading = false
3147tool_search = []
3148"#;
3149        set_user_overrides_toml(toml_src).unwrap();
3150        let caps = lookup("anthropic", "claude-opus-4-7");
3151        assert!(caps.native_tools);
3152        assert!(!caps.defer_loading);
3153        assert!(caps.tool_search.is_empty());
3154        clear_user_overrides();
3155    }
3156
3157    #[test]
3158    fn user_override_from_manifest_toml() {
3159        reset();
3160        let manifest = concat!(
3161            "[package]\n",
3162            "name = \"demo\"\n\n",
3163            "[[capabilities.provider.my-proxy]]\n",
3164            "model_match = \"*\"\n",
3165            "native_tools = true\n",
3166            "tool_search = [\"hosted\"]\n",
3167            "prefers_markdown_scaffolding = true\n",
3168            "structured_output_mode = \"native_json\"\n",
3169            "prefers_role_developer = true\n",
3170            "thinking_block_style = \"reasoning_summary\"\n",
3171        );
3172        set_user_overrides_from_manifest_toml(manifest).unwrap();
3173        let caps = lookup("my-proxy", "foo");
3174        assert!(caps.native_tools);
3175        assert_eq!(caps.tool_search, vec!["hosted"]);
3176        assert!(caps.prefers_markdown_scaffolding);
3177        assert_eq!(caps.structured_output_mode, "native_json");
3178        assert!(caps.prefers_role_developer);
3179        assert_eq!(caps.thinking_block_style, "reasoning_summary");
3180        clear_user_overrides();
3181    }
3182
3183    #[test]
3184    fn version_min_requires_parseable_model() {
3185        reset();
3186        let toml_src = r#"
3187[[provider.custom]]
3188model_match = "*"
3189version_min = [5, 4]
3190native_tools = true
3191"#;
3192        set_user_overrides_toml(toml_src).unwrap();
3193        // Unparseable model ID + version_min → rule doesn't match.
3194        let caps = lookup("custom", "mystery-model");
3195        assert!(!caps.native_tools);
3196        clear_user_overrides();
3197    }
3198
3199    #[test]
3200    fn glob_match_substring() {
3201        assert!(glob_match("*gpt*", "openai/gpt-5.4"));
3202        assert!(glob_match("*claude*", "anthropic/claude-opus-4-7"));
3203        assert!(!glob_match("*xyz*", "openai/gpt-5.4"));
3204    }
3205
3206    #[test]
3207    fn openrouter_namespaced_anthropic_model() {
3208        reset();
3209        let caps = lookup("anthropic", "anthropic/claude-opus-4-7");
3210        assert!(caps.defer_loading);
3211    }
3212
3213    #[test]
3214    fn matrix_rows_include_provider_patterns_and_sources() {
3215        reset();
3216        let rows = matrix_rows();
3217        assert!(rows.iter().any(|row| {
3218            row.provider == "openai"
3219                && row.model == "gpt-4o*"
3220                && row.vision
3221                && row.audio
3222                && row.json_schema.as_deref() == Some("native")
3223                && row.source == "builtin"
3224        }));
3225    }
3226
3227    #[test]
3228    fn validate_tool_format_autocorrects_native_pin_on_native_unreliable_route() {
3229        reset();
3230        // DeepSeek V3.2 on OpenRouter: tool_mode_parity = native_unreliable,
3231        // preferred_tool_format = text. A `native` request is the footgun — it
3232        // drops to unparsed DSML text and gets rejected. The gate must steer it
3233        // to the route's preferred text-channel format and explain why.
3234        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3.2", "native");
3235        assert_eq!(
3236            decision.effective, "text",
3237            "native must be auto-corrected to the route's preferred text format"
3238        );
3239        let reason = decision.correction.expect("a correction must be reported");
3240        assert!(reason.contains("native"), "names the rejected format");
3241        assert!(reason.contains("native_unreliable"), "names the parity");
3242        assert!(reason.contains("text"), "names the working alternative");
3243    }
3244
3245    #[test]
3246    fn validate_tool_format_passes_through_safe_combos() {
3247        reset();
3248        // A native-capable route with no adverse parity keeps the requested
3249        // native format untouched (no spurious correction).
3250        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3-base", "native");
3251        assert_eq!(decision.effective, "native");
3252        assert!(decision.correction.is_none());
3253
3254        // The same native_unreliable route is fine when text is requested.
3255        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3.2", "text");
3256        assert_eq!(decision.effective, "text");
3257        assert!(decision.correction.is_none());
3258
3259        // json is also a text-channel grammar and is accepted on a text route.
3260        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3.2", "json");
3261        assert_eq!(decision.effective, "json");
3262        assert!(decision.correction.is_none());
3263    }
3264
3265    #[test]
3266    fn validate_tool_format_leaves_unknown_routes_and_formats_alone() {
3267        reset();
3268        // Unknown provider/model has parity = unknown -> no opinion, pass through.
3269        let decision = validate_tool_format("my-proxy", "mystery-1", "native");
3270        assert_eq!(decision.effective, "native");
3271        assert!(decision.correction.is_none());
3272
3273        // An unclassifiable tool_format string is not ours to rewrite.
3274        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3.2", "frobnicate");
3275        assert_eq!(decision.effective, "frobnicate");
3276        assert!(decision.correction.is_none());
3277    }
3278
3279    #[test]
3280    fn validate_tool_format_steers_off_text_on_native_only_route() {
3281        reset();
3282        // Synthesize a native_only route via a project override and confirm a
3283        // text request is steered to native (the symmetric direction).
3284        let overrides: CapabilitiesFile = toml::from_str(
3285            "[[provider.acme]]\n\
3286             model_match = \"native-only-*\"\n\
3287             native_tools = true\n\
3288             text_tool_wire_format_supported = false\n\
3289             tool_mode_parity = \"native_only\"\n\
3290             preferred_tool_format = \"native\"\n",
3291        )
3292        .expect("override parses");
3293        let caps = lookup_with_user_overrides("acme", "native-only-1", Some(&overrides));
3294        let decision = validate_tool_format_with_caps("acme", "native-only-1", "text", &caps);
3295        assert_eq!(decision.effective, "native");
3296        let reason = decision
3297            .correction
3298            .expect("text on native_only is corrected");
3299        assert!(reason.contains("native_only"));
3300    }
3301
3302    #[test]
3303    fn validate_tool_format_honors_structural_text_unsupported_bit() {
3304        reset();
3305        // Real shipping route: ollama/qwen3* declares native_tools = true and
3306        // text_tool_wire_format_supported = false with NO tool_mode_parity
3307        // string. The gate's contract ("always yields parseable tool calls")
3308        // must hold from the structural bit alone — a text/json request is
3309        // steered to native, not passed through onto an unsupported channel.
3310        let caps = lookup("ollama", "qwen3-coder:30b");
3311        assert!(!caps.text_tool_wire_format_supported);
3312        for requested in ["text", "json"] {
3313            let decision =
3314                validate_tool_format_with_caps("ollama", "qwen3-coder:30b", requested, &caps);
3315            assert_eq!(
3316                decision.effective, "native",
3317                "{requested} must be steered to native on a text-unsupported route"
3318            );
3319            assert!(decision.correction.is_some());
3320        }
3321        // native is the route's working channel — untouched.
3322        let native = validate_tool_format_with_caps("ollama", "qwen3-coder:30b", "native", &caps);
3323        assert_eq!(native.effective, "native");
3324        assert!(native.correction.is_none());
3325    }
3326
3327    #[test]
3328    fn tool_format_resolution_is_serving_stack_aware_for_same_weights() {
3329        // The (model x serving-stack) insight: the SAME Qwen3.6 weights resolve
3330        // to DIFFERENT working tool-call channels depending on who serves them.
3331        // This divergence lives in the capability matrix as data (provider rows),
3332        // NOT in alias pins — so an alias refactor must not be able to regress
3333        // it. Locking the three live serving stacks here makes that explicit.
3334        reset();
3335
3336        // llama.cpp (:8001) — native is probe-validated and trusted.
3337        let llamacpp = validate_tool_format("llamacpp", "qwen3.6-35b-a3b-ud-q4-k-xl", "native");
3338        assert_eq!(
3339            llamacpp.effective, "native",
3340            "llama.cpp serves qwen3.6 native"
3341        );
3342        assert!(llamacpp.correction.is_none());
3343
3344        // Ollama (/v1) — the embedded qwen tool-call parser 500s on text-mode
3345        // output, so this route is served on the text/json channel: a native
3346        // request must be auto-corrected to json (never silently dropped).
3347        let ollama = validate_tool_format("ollama", "qwen3.6-35b-a3b", "native");
3348        assert_eq!(
3349            ollama.effective, "json",
3350            "ollama qwen3.6 must steer native -> json (server-side parser 500 leak)"
3351        );
3352        assert!(
3353            ollama.correction.is_some(),
3354            "the native->json steer must be explained, not silent"
3355        );
3356
3357        // A native_unreliable cloud route (deepinfra GLM-5) carries the same
3358        // serving-stack verdict via tool_mode_parity + empirical notes, and is
3359        // likewise steered off native.
3360        let glm = validate_tool_format("deepinfra", "deepinfra/glm-5.2", "native");
3361        assert_eq!(glm.effective, "json");
3362        assert!(glm.correction.is_some());
3363    }
3364
3365    #[test]
3366    fn validate_tool_format_passes_through_when_no_channel_works() {
3367        reset();
3368        // A route with no working tool surface — text_only parity forbids the
3369        // native channel, and text_tool_wire_format_supported = false forbids
3370        // the text channel — so BOTH channels are forbidden. The gate has
3371        // nothing better to steer to; it must NOT rewrite to an equally broken
3372        // format under a misleading correction. Pass through unchanged.
3373        let overrides: CapabilitiesFile = toml::from_str(
3374            "[[provider.acme]]\n\
3375             model_match = \"no-tools-*\"\n\
3376             native_tools = false\n\
3377             tool_mode_parity = \"text_only\"\n\
3378             text_tool_wire_format_supported = false\n",
3379        )
3380        .expect("override parses");
3381        let caps = lookup_with_user_overrides("acme", "no-tools-1", Some(&overrides));
3382        for requested in ["native", "text", "json"] {
3383            let decision = validate_tool_format_with_caps("acme", "no-tools-1", requested, &caps);
3384            assert_eq!(
3385                decision.effective, requested,
3386                "{requested} passes through unchanged"
3387            );
3388            assert!(decision.correction.is_none());
3389        }
3390    }
3391
3392    /// FOOTGUN-REMOVAL — gpt-oss (Harmony) on the pay-per-token DeepInfra and
3393    /// SambaNova routes drops tool calls into the reasoning channel on native, so
3394    /// a `native` pin must auto-correct to the route's `text` channel with an
3395    /// explanatory correction. The known-good native routes (cerebras gpt-oss,
3396    /// sambanova minimax) must stay untouched.
3397    #[test]
3398    fn validate_tool_format_autocorrects_gpt_oss_native_pin_to_text() {
3399        reset();
3400        for (provider, model) in [
3401            ("deepinfra", "deepinfra/openai/gpt-oss-120b"),
3402            ("sambanova", "sambanova/gpt-oss-120b"),
3403        ] {
3404            let decision = validate_tool_format(provider, model, "native");
3405            assert_eq!(
3406                decision.effective, "text",
3407                "{provider}/{model}: native must auto-correct to text"
3408            );
3409            let reason = decision
3410                .correction
3411                .unwrap_or_else(|| panic!("{provider}/{model}: a correction must be reported"));
3412            assert!(
3413                reason.contains("native_unreliable"),
3414                "{provider}/{model}: names the parity"
3415            );
3416            assert!(
3417                reason.contains("text"),
3418                "{provider}/{model}: names the working alternative"
3419            );
3420            // text is already safe and passes through unchanged.
3421            let text = validate_tool_format(provider, model, "text");
3422            assert_eq!(text.effective, "text");
3423            assert!(text.correction.is_none());
3424        }
3425    }
3426
3427    /// FOOTGUN-REMOVAL — the GLM-5.x native channel emits `<tool_call>` markup
3428    /// instead of provider-native `tool_calls`, so the zai-direct GLM rows pin
3429    /// text and a `native` pin must auto-correct, matching the Fireworks/
3430    /// DeepInfra/Baseten precedents.
3431    #[test]
3432    fn validate_tool_format_autocorrects_zai_glm_native_pin_to_text() {
3433        reset();
3434        for model in ["glm-5.2", "glm-5.1", "glm-5"] {
3435            let decision = validate_tool_format("zai", model, "native");
3436            assert_eq!(
3437                decision.effective, "text",
3438                "zai/{model}: native must auto-correct to text"
3439            );
3440            let reason = decision
3441                .correction
3442                .unwrap_or_else(|| panic!("zai/{model}: a correction must be reported"));
3443            assert!(
3444                reason.contains("native_unreliable"),
3445                "zai/{model}: names the parity"
3446            );
3447        }
3448    }
3449
3450    /// The known-good native routes must NOT be touched by the gpt-oss/GLM
3451    /// pins above — a native pin stays native with no spurious correction.
3452    #[test]
3453    fn validate_tool_format_leaves_known_good_native_routes_unchanged() {
3454        reset();
3455        for (provider, model) in [
3456            // cerebras gpt-oss is native-clean (only throttled).
3457            ("cerebras", "gpt-oss-120b"),
3458            // sambanova deepseek-v3.2 is native and interchangeable; minimax is
3459            // native_unreliable upstream and is not a known-good native
3460            // exemplar.
3461            ("sambanova", "DeepSeek-V3.2"),
3462        ] {
3463            let decision = validate_tool_format(provider, model, "native");
3464            assert_eq!(
3465                decision.effective, "native",
3466                "{provider}/{model}: known-good native route must stay native"
3467            );
3468            assert!(
3469                decision.correction.is_none(),
3470                "{provider}/{model}: no spurious correction"
3471            );
3472        }
3473    }
3474
3475    /// FOOTGUN-REMOVAL — the first-class no-viable-channel guard fires when BOTH
3476    /// channels are forbidden (a route the registry trusts on neither native nor
3477    /// text), naming the bad combo and a suggested alternative — never a silent
3478    /// empty tool stream.
3479    #[test]
3480    fn no_viable_tool_channel_guard_fires_only_when_both_channels_forbidden() {
3481        reset();
3482        // Construct a gpt-oss route with NO working channel: native_unreliable
3483        // forbids native, and text_tool_wire_format_supported = false forbids the
3484        // text channel too.
3485        let overrides: CapabilitiesFile = toml::from_str(
3486            "[[provider.acme]]\n\
3487             model_match = \"acme/gpt-oss-stub\"\n\
3488             native_tools = false\n\
3489             tool_mode_parity = \"native_unreliable\"\n\
3490             text_tool_wire_format_supported = false\n",
3491        )
3492        .expect("override parses");
3493        let caps = lookup_with_user_overrides("acme", "acme/gpt-oss-stub", Some(&overrides));
3494        let message = no_viable_tool_channel_with_caps("acme", "acme/gpt-oss-stub", &caps)
3495            .expect("the guard must fire when neither channel works");
3496        assert!(
3497            message.contains("no viable tool-calling channel"),
3498            "names the failure: {message}"
3499        );
3500        assert!(
3501            message.contains("acme/gpt-oss-stub"),
3502            "names the bad combo: {message}"
3503        );
3504        // gpt-oss models get the Harmony-specific text-channel hint.
3505        assert!(
3506            message.contains("gpt-oss") && message.contains("text"),
3507            "suggests an alternative: {message}"
3508        );
3509
3510        // The DeepInfra/SambaNova gpt-oss rows keep a working text channel, so
3511        // the guard must NOT fire on them (they auto-correct instead).
3512        assert!(
3513            no_viable_tool_channel("deepinfra", "deepinfra/openai/gpt-oss-120b").is_none(),
3514            "auto-correctable route must not trip the fail-fast guard"
3515        );
3516        assert!(
3517            no_viable_tool_channel("sambanova", "sambanova/gpt-oss-120b").is_none(),
3518            "auto-correctable route must not trip the fail-fast guard"
3519        );
3520        // A healthy native-clean route never trips it.
3521        assert!(
3522            no_viable_tool_channel("cerebras", "gpt-oss-120b").is_none(),
3523            "healthy native route must not trip the guard"
3524        );
3525        // The generic (non-gpt-oss) no-channel case still fires with a generic
3526        // hint.
3527        let generic: CapabilitiesFile = toml::from_str(
3528            "[[provider.acme]]\n\
3529             model_match = \"mystery-1\"\n\
3530             native_tools = false\n\
3531             tool_mode_parity = \"text_only\"\n\
3532             text_tool_wire_format_supported = false\n",
3533        )
3534        .expect("override parses");
3535        let caps = lookup_with_user_overrides("acme", "mystery-1", Some(&generic));
3536        let message = no_viable_tool_channel_with_caps("acme", "mystery-1", &caps)
3537            .expect("guard fires on the generic no-channel route too");
3538        assert!(
3539            message.contains("harn provider catalog matrix"),
3540            "{message}"
3541        );
3542    }
3543}
harn_vm/llm/capabilities.rs

harn_vm/llm/
capabilities.rs