Skip to main content

harn_vm/llm/
capabilities.rs

1//! Data-driven provider capabilities.
2//!
3//! The per-(provider, model) capability matrix (native tools, deferred
4//! tool loading, tool-search variants, prompt caching, extended thinking,
5//! max tool count) lives in `capability_sources/**/*.toml`, which generates
6//! the shipped `capabilities.toml` snapshot, and is
7//! overridable per-project via `[[capabilities.provider.<name>]]` blocks
8//! in `harn.toml`. This module owns:
9//!
10//! - loading the built-in TOML (compiled in via `include_str!`);
11//! - merging user overrides on top;
12//! - matching a `(provider, model)` pair against the rule list with
13//!   glob + semver semantics;
14//! - exposing a stable `Capabilities` struct that the `LlmProvider`
15//!   trait delegates to as the single source of truth.
16//!
17//! Provider adapters still supply generation parsers for `version_min`, but
18//! feature gates live in this data table instead of adapter-specific boolean
19//! branches.
20
21use std::cell::RefCell;
22use std::collections::{BTreeMap, HashSet};
23use std::sync::OnceLock;
24
25use serde::{Deserialize, Serialize};
26
27use super::providers::anthropic::claude_generation;
28use super::providers::openai_compat::gpt_generation;
29
30/// Generated shipped default rules. Compiled into the binary at build time.
31const BUILTIN_TOML: &str = include_str!("capabilities.toml");
32/// Generated provider/model snapshot built from catalog_sources/**/*.toml.
33const BUILTIN_PROVIDERS_TOML: &str = include_str!("providers.toml");
34
35/// Parsed on-disk capabilities schema. Public so harn-cli can
36/// construct one directly when wiring harn.toml overrides.
37#[derive(Debug, Clone, Deserialize, Default)]
38pub struct CapabilitiesFile {
39    /// Per-provider ordered rule lists. The first matching rule wins; a
40    /// matching rule with `extends = true` contributes only the fields it
41    /// sets and lets resolution continue to later matching rules (see
42    /// [`ProviderRule::extends`]).
43    #[serde(default)]
44    pub provider: BTreeMap<String, Vec<ProviderRule>>,
45    /// Per-provider defaults applied to every matching row and to
46    /// provider/model pairs that have no model-specific row. This keeps
47    /// transport-shape facts in data without repeating them on every
48    /// generation-specific capability row.
49    #[serde(default)]
50    pub provider_defaults: BTreeMap<String, ProviderDefaults>,
51    /// Sibling → canonical family mapping. Providers with no rule of
52    /// their own fall through to the named family (recursively).
53    #[serde(default)]
54    pub provider_family: BTreeMap<String, String>,
55}
56
57/// Provider-wide default fields merged into matching rules.
58#[derive(Debug, Clone, Deserialize, Default)]
59pub struct ProviderDefaults {
60    /// Message/request/response wire format used by shared helpers.
61    /// Known values are `openai`, `anthropic`, `gemini`, and `ollama`.
62    #[serde(default)]
63    pub message_wire_format: Option<String>,
64    /// Native tool definition wire shape. Known values are `openai`
65    /// and `anthropic`.
66    #[serde(default)]
67    pub native_tool_wire_format: Option<String>,
68    /// Whether image content blocks may reference remote URLs.
69    #[serde(default)]
70    pub image_url_input_supported: Option<bool>,
71    /// File-upload transport used by `std/files.upload`. Known values
72    /// are `anthropic` and `gemini`.
73    #[serde(default)]
74    pub file_upload_wire_format: Option<String>,
75    /// Provider-specific reasoning request shape for OpenAI-compatible
76    /// transports. Known values are `openrouter` and `enabled`.
77    #[serde(default)]
78    pub reasoning_wire_format: Option<String>,
79    #[serde(default)]
80    pub files_api_supported: Option<bool>,
81    #[serde(default)]
82    pub seed_supported: Option<bool>,
83    #[serde(default)]
84    pub top_k_supported: Option<bool>,
85    #[serde(default)]
86    pub temperature_supported: Option<bool>,
87    #[serde(default)]
88    pub top_p_supported: Option<bool>,
89    #[serde(default)]
90    pub frequency_penalty_supported: Option<bool>,
91    #[serde(default)]
92    pub presence_penalty_supported: Option<bool>,
93}
94
95/// Copies `src` into `dst` when `src` is set (last-writer-wins overlay).
96fn overlay_opt<T: Clone>(dst: &mut Option<T>, src: &Option<T>) {
97    if src.is_some() {
98        dst.clone_from(src);
99    }
100}
101
102/// Copies `src` into `dst` only when `dst` is still unset (fill-the-gaps).
103fn fill_opt<T: Clone>(dst: &mut Option<T>, src: &Option<T>) {
104    if dst.is_none() {
105        dst.clone_from(src);
106    }
107}
108
109/// Visits every `ProviderDefaults` field once, applying `$op` (`overlay_opt`
110/// or `fill_opt`) to each `(dst, src)` pair. The field roster lives here only;
111/// `overlay`/`fill_missing_from` differ solely in the merge rule they pass.
112macro_rules! merge_provider_defaults {
113    ($dst:expr, $src:expr, $op:path) => {{
114        $op(&mut $dst.message_wire_format, &$src.message_wire_format);
115        $op(
116            &mut $dst.native_tool_wire_format,
117            &$src.native_tool_wire_format,
118        );
119        $op(
120            &mut $dst.image_url_input_supported,
121            &$src.image_url_input_supported,
122        );
123        $op(
124            &mut $dst.file_upload_wire_format,
125            &$src.file_upload_wire_format,
126        );
127        $op(&mut $dst.reasoning_wire_format, &$src.reasoning_wire_format);
128        $op(&mut $dst.files_api_supported, &$src.files_api_supported);
129        $op(&mut $dst.seed_supported, &$src.seed_supported);
130        $op(&mut $dst.top_k_supported, &$src.top_k_supported);
131        $op(&mut $dst.temperature_supported, &$src.temperature_supported);
132        $op(&mut $dst.top_p_supported, &$src.top_p_supported);
133        $op(
134            &mut $dst.frequency_penalty_supported,
135            &$src.frequency_penalty_supported,
136        );
137        $op(
138            &mut $dst.presence_penalty_supported,
139            &$src.presence_penalty_supported,
140        );
141    }};
142}
143
144impl ProviderDefaults {
145    fn overlay(&mut self, other: &ProviderDefaults) {
146        merge_provider_defaults!(self, other, overlay_opt);
147    }
148
149    fn fill_missing_from(&mut self, other: &ProviderDefaults) {
150        merge_provider_defaults!(self, other, fill_opt);
151    }
152
153    fn has_any_field(&self) -> bool {
154        self.message_wire_format.is_some()
155            || self.native_tool_wire_format.is_some()
156            || self.image_url_input_supported.is_some()
157            || self.file_upload_wire_format.is_some()
158            || self.reasoning_wire_format.is_some()
159            || self.files_api_supported.is_some()
160            || self.seed_supported.is_some()
161            || self.top_k_supported.is_some()
162            || self.temperature_supported.is_some()
163            || self.top_p_supported.is_some()
164            || self.frequency_penalty_supported.is_some()
165            || self.presence_penalty_supported.is_some()
166    }
167}
168
169/// One row of the capability matrix.
170#[derive(Debug, Clone, Deserialize)]
171pub struct ProviderRule {
172    /// Glob pattern (supports leading / trailing `*` and a single mid-`*`).
173    /// Matched case-insensitively against the model ID.
174    pub model_match: String,
175    /// Optional `[major, minor]` lower bound. When set, the model ID
176    /// must parse via the provider's version extractor AND compare ≥
177    /// this tuple. Rules with an unparseable `version_min` for the
178    /// given model are skipped, not merged.
179    #[serde(default)]
180    pub version_min: Option<Vec<u32>>,
181    /// Per-rule fall-through. A matching rule with `extends = true`
182    /// contributes ONLY the fields it explicitly sets; resolution then
183    /// continues to later matching rules (user rules before built-in rules,
184    /// then the `provider_family` chain) and ultimately to provider /
185    /// built-in defaults to fill the rest. A matching rule without
186    /// `extends` (or with `extends = false`) terminates resolution exactly
187    /// as before this flag existed. This lets an overlay tweak one field of
188    /// a shipped row without copying the whole row verbatim (which drifts).
189    #[serde(default)]
190    pub extends: bool,
191    #[serde(default)]
192    pub native_tools: Option<bool>,
193    /// Message/request/response wire format used by shared helpers.
194    /// Known values are `openai`, `anthropic`, `gemini`, and `ollama`.
195    #[serde(default)]
196    pub message_wire_format: Option<String>,
197    /// Native tool definition wire shape. Known values are `openai`
198    /// and `anthropic`.
199    #[serde(default)]
200    pub native_tool_wire_format: Option<String>,
201    #[serde(default)]
202    pub defer_loading: Option<bool>,
203    #[serde(default)]
204    pub tool_search: Option<Vec<String>>,
205    /// Whether Harn supports this route through the provider's native
206    /// Responses-style API instead of generic chat completions.
207    #[serde(default)]
208    pub responses_api: Option<bool>,
209    /// Provider-hosted tools Harn can pass through without local execution.
210    #[serde(default)]
211    pub hosted_tools: Option<Vec<String>>,
212    /// Whether provider-hosted remote MCP connectors can be mediated by the
213    /// provider for this route.
214    #[serde(default)]
215    pub remote_mcp: Option<bool>,
216    /// Whether provider-managed previous-response conversation state is
217    /// available.
218    #[serde(default)]
219    pub conversation_state: Option<bool>,
220    /// Whether provider-side truncation/compaction controls are available.
221    #[serde(default)]
222    pub compaction: Option<bool>,
223    /// Whether provider-side background Responses jobs are available.
224    #[serde(default)]
225    pub background_mode: Option<bool>,
226    /// Approval policy modes available when provider-hosted tools execute.
227    #[serde(default)]
228    pub tool_approval_policy: Option<String>,
229    #[serde(default)]
230    pub max_tools: Option<u32>,
231    #[serde(default)]
232    pub prompt_caching: Option<bool>,
233    /// Request-side cache breakpoint strategy for routes that require
234    /// `cache_control` to opt into provider prompt caching. Known values are
235    /// `none`, `top_level`, and `last_block`.
236    #[serde(default)]
237    pub cache_breakpoint_style: Option<String>,
238    /// Whether this provider/model route accepts image or other visual
239    /// input blocks through Harn's LLM message path.
240    #[serde(default)]
241    pub vision: Option<bool>,
242    /// Whether this provider/model route accepts audio input blocks
243    /// through Harn's LLM message path.
244    #[serde(default, alias = "audio_supported")]
245    pub audio: Option<bool>,
246    /// Whether this provider/model route accepts PDF/document input blocks
247    /// through Harn's LLM message path.
248    #[serde(default, alias = "pdf_supported")]
249    pub pdf: Option<bool>,
250    /// Whether this provider/model route accepts video input blocks
251    /// through Harn's LLM message path.
252    #[serde(default, alias = "video_supported")]
253    pub video: Option<bool>,
254    /// Whether uploaded file references can be reused in message content.
255    #[serde(default)]
256    pub files_api_supported: Option<bool>,
257    /// File-upload transport used by `std/files.upload`. Known values
258    /// are `anthropic` and `gemini`.
259    #[serde(default)]
260    pub file_upload_wire_format: Option<String>,
261    /// Structured-output transport strategy. Known values are:
262    /// `native`, `tool_use`, `format_kw`, and `none`.
263    #[serde(default)]
264    pub structured_output: Option<String>,
265    /// Legacy name retained for project overrides written before
266    /// `structured_output` became the canonical capability.
267    #[serde(default)]
268    pub json_schema: Option<String>,
269    /// Whether prompt sections should prefer XML-style tags such as
270    /// `<task>` / `<examples>` over Markdown headings.
271    #[serde(default)]
272    pub prefers_xml_scaffolding: Option<bool>,
273    /// Whether this model's tokenizer reserves `<tool_call>` / `</tool_call>`
274    /// as single special tokens (the native Hermes tool-call markers). When
275    /// true, harn remaps those delimiters to a non-special bracket form on the
276    /// wire to avoid degenerate opener repetition; see [`crate::llm::tool_delimiter`].
277    #[serde(default)]
278    pub reserved_tool_call_token: Option<bool>,
279    /// Whether prompt sections should prefer Markdown headings such as
280    /// `## Task` / `## Examples`.
281    #[serde(default)]
282    pub prefers_markdown_scaffolding: Option<bool>,
283    /// Preferred logical structured-output prompt shape. This is separate
284    /// from the transport-level `structured_output` strategy above.
285    /// Known values are `native_json`, `delimited`, and `xml_tagged`.
286    #[serde(default)]
287    pub structured_output_mode: Option<String>,
288    /// Whether the route accepts an assistant-role prefill message.
289    #[serde(default)]
290    pub supports_assistant_prefill: Option<bool>,
291    /// Whether durable instructions should use OpenAI's `developer` role
292    /// instead of `system`.
293    #[serde(default)]
294    pub prefers_role_developer: Option<bool>,
295    /// Whether text-rendered tool specifications should use XML wrappers
296    /// instead of JSON-schema prose.
297    #[serde(default)]
298    pub prefers_xml_tools: Option<bool>,
299    /// Preferred representation for model thinking/reasoning blocks in
300    /// transcript-like prompt context. Known values are `none`,
301    /// `thinking_blocks`, `reasoning_summary`, and `inline`.
302    #[serde(default)]
303    pub thinking_block_style: Option<String>,
304    /// Supported thinking/reasoning modes for this rule. Values are
305    /// script-facing mode names: `enabled`, `adaptive`, and `effort`.
306    #[serde(default)]
307    pub thinking_modes: Option<Vec<String>>,
308    /// Whether Anthropic interleaved thinking is supported for this
309    /// provider/model route.
310    #[serde(default)]
311    pub interleaved_thinking_supported: Option<bool>,
312    /// Anthropic beta features that should be requested for this route.
313    #[serde(default)]
314    pub anthropic_beta_features: Option<Vec<String>>,
315    /// Legacy override compatibility. New built-in rules should use
316    /// `thinking_modes` so the capability matrix preserves mode detail.
317    #[serde(default)]
318    pub thinking: Option<bool>,
319    /// Whether the model accepts image inputs in chat content.
320    #[serde(default)]
321    pub vision_supported: Option<bool>,
322    /// Whether image content blocks may reference remote URLs.
323    #[serde(default)]
324    pub image_url_input_supported: Option<bool>,
325    /// Carry `<think>...</think>` blocks in assistant history across turns.
326    /// Qwen3.6 exposes this as `chat_template_kwargs.preserve_thinking`;
327    /// Alibaba recommends enabling it for long-horizon agent loops so the
328    /// model doesn't re-derive context it already worked out in prior turns.
329    /// Anthropic's adaptive-thinking signature contract is stricter but plays
330    /// the same role there.
331    #[serde(default)]
332    pub preserve_thinking: Option<bool>,
333    /// Name of any server-side response parser that can transform model
334    /// bytes before Harn sees them. `none` means the provider returns the
335    /// model text/tool channel without an implicit parser.
336    #[serde(default)]
337    pub server_parser: Option<String>,
338    /// Whether provider-specific chat-template options are honored. Most
339    /// OpenAI-compatible servers call this `chat_template_kwargs`; Baseten's
340    /// Model APIs spell the same concept `chat_template_args`.
341    #[serde(default)]
342    pub honors_chat_template_kwargs: Option<bool>,
343    /// Request body field for provider-specific chat-template options when it
344    /// differs from the default `chat_template_kwargs`.
345    #[serde(default)]
346    pub chat_template_options_field: Option<String>,
347    /// Whether this route requires OpenAI's `max_completion_tokens`
348    /// request field instead of legacy `max_tokens`.
349    #[serde(default)]
350    pub requires_completion_tokens: Option<bool>,
351    /// Whether this route rejects non-streaming chat-completion requests.
352    /// Harn forces streaming for such routes so callers can keep provider-
353    /// neutral `stream` preferences.
354    #[serde(default)]
355    pub requires_streaming: Option<bool>,
356    /// Whether this route accepts Harn's provider-neutral reasoning effort
357    /// control. Providers project this to their native field (for example
358    /// OpenAI `reasoning_effort` or Anthropic `output_config.effort`).
359    #[serde(default)]
360    pub reasoning_effort_supported: Option<bool>,
361    /// Accepted effort values for routes that expose a narrower subset than
362    /// Harn's provider-neutral enum. Empty means "unknown/all".
363    #[serde(default)]
364    pub reasoning_effort_levels: Option<Vec<String>>,
365    /// Whether this route accepts effort "none" as a true reasoning-off
366    /// setting. Older GPT-5 variants support effort but only floor at
367    /// `minimal`.
368    #[serde(default)]
369    pub reasoning_none_supported: Option<bool>,
370    /// Maximum thinking-budget tokens this model accepts for its high/xhigh/max
371    /// reasoning levels, when the provider takes an explicit token budget rather
372    /// than an effort enum. The canonical case is the native Gemini API
373    /// `generationConfig.thinkingConfig.thinkingBudget` field, whose ceiling
374    /// differs by model (Gemini 2.5 Flash caps at 24576, Pro at 32768).
375    /// Declared alongside the model's other wire capabilities instead of a
376    /// hard-coded `model.contains("flash")` branch in the provider.
377    #[serde(default)]
378    pub max_thinking_budget: Option<i64>,
379    /// Whether this route accepts an explicit disabled/off reasoning switch.
380    /// Some routes require reasoning and reject the provider's disabled shape.
381    #[serde(default)]
382    pub reasoning_disable_supported: Option<bool>,
383    /// Whether this model performs *tool calls inside its reasoning channel*,
384    /// so disabling reasoning silently breaks tool calling. The canonical case
385    /// is the OpenAI gpt-oss (Harmony) family: with reasoning disabled it emits
386    /// 0 tool_calls and a tiny billed-noncommittal completion; with reasoning
387    /// enabled (even `low`) it emits clean native tool calls. This is the
388    /// *opposite* of the Qwen3 quirk (Qwen narrates tool intent in the
389    /// reasoning trace and emits zero `tool_calls`, so Qwen needs reasoning
390    /// OFF for tools). When set, `reasoning_policy` refuses to downgrade the
391    /// auto reasoning level to `off` for tool-bearing tasks (agent/code/verify)
392    /// — flooring instead to the lowest supported effort — so no future
393    /// auto-policy default or session pin can re-introduce the
394    /// billed-noncommittal failure at the data layer.
395    #[serde(default)]
396    pub reasoning_required_for_tools: Option<bool>,
397    /// Whether reasoning-only clean stops may be promoted into visible text.
398    /// Disable this for providers whose `reasoning` field is always private
399    /// trace, even when `content` is empty.
400    #[serde(default)]
401    pub reasoning_text_promotable: Option<bool>,
402    /// Provider-specific reasoning request shape for OpenAI-compatible
403    /// transports. Known values are `openrouter`, `enabled`, and `minimax`.
404    #[serde(default)]
405    pub reasoning_wire_format: Option<String>,
406    #[serde(default)]
407    pub seed_supported: Option<bool>,
408    #[serde(default)]
409    pub top_k_supported: Option<bool>,
410    #[serde(default)]
411    pub temperature_supported: Option<bool>,
412    #[serde(default)]
413    pub top_p_supported: Option<bool>,
414    #[serde(default)]
415    pub frequency_penalty_supported: Option<bool>,
416    #[serde(default)]
417    pub presence_penalty_supported: Option<bool>,
418    /// Accepted provider-native `tool_choice` modes. Empty means unrestricted
419    /// or unknown. Use this for routes whose native tools work, but whose API
420    /// rejects forced/specified tool choices.
421    #[serde(default)]
422    pub allowed_tool_choice_modes: Option<Vec<String>>,
423    /// Whether an assistant `tool_calls` message must be followed immediately
424    /// by `role=tool` messages for every emitted `tool_call_id`.
425    #[serde(default)]
426    pub requires_tool_result_adjacency: Option<bool>,
427    /// Whether a single assistant message may contain multiple tool calls.
428    /// Some OpenAI-compatible providers reject replayed history with more than
429    /// one `tool_calls[]` entry even when the calls were parsed from Harn's text
430    /// tool protocol, so the request builder must serialize history as
431    /// one-call assistant turns for those routes.
432    #[serde(default)]
433    pub supports_parallel_tool_calls: Option<bool>,
434    /// Whether the route rejects `response_format` when native `tools` are
435    /// present. Strict OpenAI-compatible servers such as Cerebras accept each
436    /// feature alone but reject the pair together.
437    #[serde(default)]
438    pub tools_exclude_response_format: Option<bool>,
439    /// Preferred endpoint family for this provider/model route. Values
440    /// are descriptive labels consumed by providers, e.g.
441    /// `/api/generate-raw` for Ollama raw prompt bypass.
442    #[serde(default)]
443    pub recommended_endpoint: Option<String>,
444    /// Whether Harn's text-tool protocol (`<tool_call>name({...})`) can
445    /// survive the provider route and return in the visible response body.
446    #[serde(default)]
447    pub text_tool_wire_format_supported: Option<bool>,
448    /// Preferred tool-calling mode for this provider/model route when
449    /// callers do not explicitly choose `tool_format`. This lets the
450    /// capability matrix route around known provider-native regressions
451    /// without making presets branch on model names.
452    #[serde(default)]
453    pub preferred_tool_format: Option<String>,
454    /// Empirical native/text interchangeability status for this route.
455    /// Known values are descriptive, not gates: `interchangeable`,
456    /// `native_unreliable`, `text_unreliable`, `native_only`,
457    /// `text_only`, and `unknown`.
458    #[serde(default)]
459    pub tool_mode_parity: Option<String>,
460    /// Short human-readable note explaining `tool_mode_parity`.
461    #[serde(default)]
462    pub tool_mode_parity_notes: Option<String>,
463    /// In-prompt directive that disables this model's "thinking" mode when
464    /// the API doesn't expose a first-class field (or exposes it
465    /// inconsistently across templates / quantizations). For Qwen3 family
466    /// chat templates this is `/no_think`. When `thinking: false` is
467    /// requested and this is set, Harn auto-prepends the directive to the
468    /// system message so script authors don't need to know it exists.
469    #[serde(default)]
470    pub thinking_disable_directive: Option<String>,
471    /// Per-task auto-policy reasoning-level overrides for this route.
472    /// Keys are task labels (`agent`, `verify`, `chat`, `summarize`,
473    /// `code`); values are reasoning levels (`off`, `minimal`, `low`,
474    /// `medium`, `high`, `xhigh`, `max`). Consulted by `reasoning_policy` only
475    /// when policy resolves to `auto` — explicit policies always win.
476    ///
477    /// Use this to declare known per-model regressions that should
478    /// flip the auto-policy default, instead of hard-coding the model/
479    /// provider pattern in resolver code. The canonical example is the
480    /// Qwen3 tool-call regression — `{ agent = "off" }` disables
481    /// reasoning whenever a script registers tools with that route,
482    /// matching Qwen's own published guidance.
483    #[serde(default)]
484    pub auto_reasoning_overrides: Option<BTreeMap<String, String>>,
485    /// OpenRouter upstream provider names that must be excluded from routing
486    /// for this `(provider, model)` row. Materialized into the request body's
487    /// `provider.ignore` array (see
488    /// [`crate::llm::providers::openai_compat::apply_openrouter_route_denylist`]).
489    /// This is a data-driven route-around for upstreams that serve a route
490    /// incorrectly while still advertising the model — the canonical case is
491    /// OpenRouter's `Ambient` upstream billing reasoning tokens for
492    /// `qwen/qwen3.6-35b-a3b` and then finishing with empty `tool_calls`,
493    /// while Parasail / AtlasCloud / AkashML serve the identical request
494    /// natively. Only consulted for the `openrouter` provider.
495    #[serde(default)]
496    pub provider_route_denylist: Option<Vec<String>>,
497    /// OpenRouter upstream provider names this `(provider, model)` row is
498    /// PINNED to, in preference order. Materialized into the request body's
499    /// `provider.order` array with `allow_fallbacks = false` (see
500    /// [`crate::llm::providers::openai_compat::apply_openrouter_provider_order`]),
501    /// so OpenRouter only ever routes the model to these known-clean upstreams
502    /// and never silently falls back to a sketchier one. This is the
503    /// *allowlist* counterpart to [`Self::provider_route_denylist`]: prefer it
504    /// when the bad upstreams are intermittent / hard to enumerate but the
505    /// clean ones are few and stable. The canonical case is OpenRouter's
506    /// `openai/gpt-oss-*` route, which fans out across ~17 upstreams in a
507    /// sub-provider lottery; some mis-serialize the Harmony tool call even with
508    /// reasoning ON (billed-noncommittal: 0 tool_calls), while Cerebras and
509    /// Groq serve it cleanly. Only consulted for the `openrouter` provider. An
510    /// empty / unset list means "no pin" (free OpenRouter routing). When both a
511    /// pin and a denylist are present the pin wins (a closed allowlist already
512    /// excludes everything not on it). Validated by the footgun gate in
513    /// [`crate::llm::capability_audit`].
514    #[serde(default)]
515    pub openrouter_provider_order: Option<Vec<String>>,
516    /// Serving-quality / precision trust verdict for this `(provider, model)`
517    /// route. A provider can be live and fast yet still serve a model at
518    /// DEGRADED quality (e.g. an undocumented quantization) or reject otherwise
519    /// valid requests, silently contaminating any eval/meter that trusts its
520    /// numbers. This is the data-driven sibling of [`Self::provider_route_denylist`]
521    /// / [`Self::openrouter_provider_order`]: instead of routing *around* a bad
522    /// upstream, it labels the route's measured precision so tooling (the
523    /// meter precision canary) can refuse to trust a `degraded` route and flag a
524    /// `throttled` one. Known values are `trusted` (full-precision verified
525    /// against a reference), `degraded` (proven to serve at reduced quality),
526    /// `throttled` (full-precision but rate-limited to unusable timing), and
527    /// `unverified` (no verdict — treated the same as unset). Unset means
528    /// `unverified`.
529    #[serde(default)]
530    pub serving_precision: Option<String>,
531}
532
533impl ProviderRule {
534    /// Fill every capability field that `self` (the accumulated `extends`
535    /// fall-through chain so far) has NOT explicitly set from `other`, a
536    /// later matching rule with lower precedence. "Explicitly set" is the
537    /// serde `Option` raw-deserialization state — never inferred from a
538    /// field's value equaling the default.
539    ///
540    /// The destructure of `other` is deliberately exhaustive (no `..`
541    /// catch-all): adding a new capability field to [`ProviderRule`] fails
542    /// to compile here until the merge handles it.
543    fn fill_missing_from(&mut self, other: &ProviderRule) {
544        let ProviderRule {
545            // Rule-matching metadata, not capability payload: the merged
546            // chain keeps the first (highest-precedence) rule's identity.
547            model_match: _,
548            version_min: _,
549            extends: _,
550            native_tools,
551            message_wire_format,
552            native_tool_wire_format,
553            defer_loading,
554            tool_search,
555            responses_api,
556            hosted_tools,
557            remote_mcp,
558            conversation_state,
559            compaction,
560            background_mode,
561            tool_approval_policy,
562            max_tools,
563            prompt_caching,
564            cache_breakpoint_style,
565            vision,
566            audio,
567            pdf,
568            video,
569            files_api_supported,
570            file_upload_wire_format,
571            structured_output,
572            json_schema,
573            prefers_xml_scaffolding,
574            reserved_tool_call_token,
575            prefers_markdown_scaffolding,
576            structured_output_mode,
577            supports_assistant_prefill,
578            prefers_role_developer,
579            prefers_xml_tools,
580            thinking_block_style,
581            thinking_modes,
582            interleaved_thinking_supported,
583            anthropic_beta_features,
584            thinking,
585            vision_supported,
586            image_url_input_supported,
587            preserve_thinking,
588            server_parser,
589            honors_chat_template_kwargs,
590            chat_template_options_field,
591            requires_completion_tokens,
592            requires_streaming,
593            reasoning_effort_supported,
594            reasoning_effort_levels,
595            reasoning_none_supported,
596            max_thinking_budget,
597            reasoning_disable_supported,
598            reasoning_required_for_tools,
599            reasoning_text_promotable,
600            reasoning_wire_format,
601            seed_supported,
602            top_k_supported,
603            temperature_supported,
604            top_p_supported,
605            frequency_penalty_supported,
606            presence_penalty_supported,
607            allowed_tool_choice_modes,
608            requires_tool_result_adjacency,
609            supports_parallel_tool_calls,
610            tools_exclude_response_format,
611            recommended_endpoint,
612            text_tool_wire_format_supported,
613            preferred_tool_format,
614            tool_mode_parity,
615            tool_mode_parity_notes,
616            thinking_disable_directive,
617            auto_reasoning_overrides,
618            provider_route_denylist,
619            openrouter_provider_order,
620            serving_precision,
621        } = other;
622        fill_opt(&mut self.native_tools, native_tools);
623        fill_opt(&mut self.message_wire_format, message_wire_format);
624        fill_opt(&mut self.native_tool_wire_format, native_tool_wire_format);
625        fill_opt(&mut self.defer_loading, defer_loading);
626        fill_opt(&mut self.tool_search, tool_search);
627        fill_opt(&mut self.responses_api, responses_api);
628        fill_opt(&mut self.hosted_tools, hosted_tools);
629        fill_opt(&mut self.remote_mcp, remote_mcp);
630        fill_opt(&mut self.conversation_state, conversation_state);
631        fill_opt(&mut self.compaction, compaction);
632        fill_opt(&mut self.background_mode, background_mode);
633        fill_opt(&mut self.tool_approval_policy, tool_approval_policy);
634        fill_opt(&mut self.max_tools, max_tools);
635        fill_opt(&mut self.prompt_caching, prompt_caching);
636        fill_opt(&mut self.cache_breakpoint_style, cache_breakpoint_style);
637        fill_opt(&mut self.audio, audio);
638        fill_opt(&mut self.pdf, pdf);
639        fill_opt(&mut self.video, video);
640        fill_opt(&mut self.files_api_supported, files_api_supported);
641        fill_opt(&mut self.file_upload_wire_format, file_upload_wire_format);
642        fill_opt(&mut self.prefers_xml_scaffolding, prefers_xml_scaffolding);
643        fill_opt(&mut self.reserved_tool_call_token, reserved_tool_call_token);
644        fill_opt(
645            &mut self.prefers_markdown_scaffolding,
646            prefers_markdown_scaffolding,
647        );
648        fill_opt(&mut self.structured_output_mode, structured_output_mode);
649        fill_opt(
650            &mut self.supports_assistant_prefill,
651            supports_assistant_prefill,
652        );
653        fill_opt(&mut self.prefers_role_developer, prefers_role_developer);
654        fill_opt(&mut self.prefers_xml_tools, prefers_xml_tools);
655        fill_opt(&mut self.thinking_block_style, thinking_block_style);
656        fill_opt(
657            &mut self.interleaved_thinking_supported,
658            interleaved_thinking_supported,
659        );
660        fill_opt(&mut self.anthropic_beta_features, anthropic_beta_features);
661        fill_opt(
662            &mut self.image_url_input_supported,
663            image_url_input_supported,
664        );
665        fill_opt(&mut self.preserve_thinking, preserve_thinking);
666        fill_opt(&mut self.server_parser, server_parser);
667        fill_opt(
668            &mut self.honors_chat_template_kwargs,
669            honors_chat_template_kwargs,
670        );
671        fill_opt(
672            &mut self.chat_template_options_field,
673            chat_template_options_field,
674        );
675        fill_opt(
676            &mut self.requires_completion_tokens,
677            requires_completion_tokens,
678        );
679        fill_opt(&mut self.requires_streaming, requires_streaming);
680        fill_opt(
681            &mut self.reasoning_effort_supported,
682            reasoning_effort_supported,
683        );
684        fill_opt(&mut self.reasoning_effort_levels, reasoning_effort_levels);
685        fill_opt(&mut self.reasoning_none_supported, reasoning_none_supported);
686        fill_opt(&mut self.max_thinking_budget, max_thinking_budget);
687        fill_opt(
688            &mut self.reasoning_disable_supported,
689            reasoning_disable_supported,
690        );
691        fill_opt(
692            &mut self.reasoning_required_for_tools,
693            reasoning_required_for_tools,
694        );
695        fill_opt(
696            &mut self.reasoning_text_promotable,
697            reasoning_text_promotable,
698        );
699        fill_opt(&mut self.reasoning_wire_format, reasoning_wire_format);
700        fill_opt(&mut self.seed_supported, seed_supported);
701        fill_opt(&mut self.top_k_supported, top_k_supported);
702        fill_opt(&mut self.temperature_supported, temperature_supported);
703        fill_opt(&mut self.top_p_supported, top_p_supported);
704        fill_opt(
705            &mut self.frequency_penalty_supported,
706            frequency_penalty_supported,
707        );
708        fill_opt(
709            &mut self.presence_penalty_supported,
710            presence_penalty_supported,
711        );
712        fill_opt(
713            &mut self.allowed_tool_choice_modes,
714            allowed_tool_choice_modes,
715        );
716        fill_opt(
717            &mut self.requires_tool_result_adjacency,
718            requires_tool_result_adjacency,
719        );
720        fill_opt(
721            &mut self.supports_parallel_tool_calls,
722            supports_parallel_tool_calls,
723        );
724        fill_opt(
725            &mut self.tools_exclude_response_format,
726            tools_exclude_response_format,
727        );
728        fill_opt(&mut self.recommended_endpoint, recommended_endpoint);
729        fill_opt(
730            &mut self.text_tool_wire_format_supported,
731            text_tool_wire_format_supported,
732        );
733        fill_opt(&mut self.preferred_tool_format, preferred_tool_format);
734        fill_opt(&mut self.tool_mode_parity, tool_mode_parity);
735        fill_opt(&mut self.tool_mode_parity_notes, tool_mode_parity_notes);
736        fill_opt(
737            &mut self.thinking_disable_directive,
738            thinking_disable_directive,
739        );
740        fill_opt(&mut self.auto_reasoning_overrides, auto_reasoning_overrides);
741        fill_opt(&mut self.provider_route_denylist, provider_route_denylist);
742        fill_opt(
743            &mut self.openrouter_provider_order,
744            openrouter_provider_order,
745        );
746        fill_opt(&mut self.serving_precision, serving_precision);
747        // Legacy alias pairs resolve as ONE logical capability
748        // (`rule_structured_output`, `rule_thinking_modes`, `rule_vision`),
749        // so they fill as a unit: when the accumulated chain has explicitly
750        // set either spelling, the later rule's pair must not leak through
751        // the other spelling and override that explicit choice.
752        if self.structured_output.is_none() && self.json_schema.is_none() {
753            self.structured_output.clone_from(structured_output);
754            self.json_schema.clone_from(json_schema);
755        }
756        if self.thinking_modes.is_none() && self.thinking.is_none() {
757            self.thinking_modes.clone_from(thinking_modes);
758            self.thinking.clone_from(thinking);
759        }
760        if self.vision.is_none() && self.vision_supported.is_none() {
761            self.vision.clone_from(vision);
762            self.vision_supported.clone_from(vision_supported);
763        }
764    }
765}
766
767/// The message/request/response wire dialect a route speaks.
768///
769/// This is the single typed representation of what used to be encoded two
770/// different, drift-prone ways: the stringly `Capabilities.message_wire_format`
771/// field (compared against `"anthropic"`/`"gemini"`/`"ollama"` literals at a
772/// dozen call sites) and the `(is_anthropic_style, is_ollama)` boolean pair
773/// threaded independently through the transport/response layers. A closed enum
774/// makes an unhandled or mistyped dialect a compile error and removes the
775/// boolean-blindness where two `bool`s could silently disagree.
776#[derive(Debug, Clone, Copy, PartialEq, Eq)]
777pub enum WireDialect {
778    /// Anthropic native Messages API (`/v1/messages`). The only dialect that
779    /// surfaces Claude's extended-thinking stream. `message_wire_format =
780    /// "anthropic"`.
781    Anthropic,
782    /// OpenAI-compatible Chat Completions (`/v1/chat/completions`). The default
783    /// for hosted/openai-shape routes. `message_wire_format = "openai"`.
784    OpenAiCompat,
785    /// Ollama native `/api/chat`. `message_wire_format = "ollama"`.
786    Ollama,
787    /// Google Gemini `generateContent`. `message_wire_format = "gemini"`.
788    Gemini,
789}
790
791impl WireDialect {
792    /// Parse the catalog's `message_wire_format` string. Unrecognized values
793    /// (including the explicit `"openai"`) resolve to [`WireDialect::OpenAiCompat`],
794    /// exactly matching the pre-cutover behavior where every
795    /// `== "anthropic"/"gemini"/"ollama"` check fell through to the
796    /// OpenAI-compatible path.
797    pub fn from_message_wire_format(value: &str) -> WireDialect {
798        match value {
799            "anthropic" => WireDialect::Anthropic,
800            "ollama" => WireDialect::Ollama,
801            "gemini" => WireDialect::Gemini,
802            _ => WireDialect::OpenAiCompat,
803        }
804    }
805
806    /// The canonical `message_wire_format` string for display and round-trip.
807    pub fn as_str(self) -> &'static str {
808        match self {
809            WireDialect::Anthropic => "anthropic",
810            WireDialect::OpenAiCompat => "openai",
811            WireDialect::Ollama => "ollama",
812            WireDialect::Gemini => "gemini",
813        }
814    }
815
816    /// Whether this route speaks Anthropic's native Messages shape.
817    pub fn is_anthropic(self) -> bool {
818        matches!(self, WireDialect::Anthropic)
819    }
820
821    /// Whether this route speaks Ollama's native `/api/chat` shape.
822    pub fn is_ollama(self) -> bool {
823        matches!(self, WireDialect::Ollama)
824    }
825
826    /// Whether this route speaks Google Gemini's `generateContent` shape.
827    pub fn is_gemini(self) -> bool {
828        matches!(self, WireDialect::Gemini)
829    }
830}
831
832/// Resolved capabilities for a `(provider, model)` pair. Unset rule
833/// fields resolve to `false` / empty / `None` so callers never have to
834/// unwrap an `Option<bool>` for what are really boolean gates.
835#[derive(Debug, Clone, PartialEq, Eq)]
836pub struct Capabilities {
837    pub native_tools: bool,
838    pub message_wire_format: WireDialect,
839    pub native_tool_wire_format: String,
840    pub defer_loading: bool,
841    pub tool_search: Vec<String>,
842    pub responses_api: bool,
843    pub hosted_tools: Vec<String>,
844    pub remote_mcp: bool,
845    pub conversation_state: bool,
846    pub compaction: bool,
847    pub background_mode: bool,
848    pub tool_approval_policy: Option<String>,
849    pub max_tools: Option<u32>,
850    pub prompt_caching: bool,
851    pub cache_breakpoint_style: String,
852    pub vision: bool,
853    pub audio: bool,
854    pub pdf: bool,
855    pub video: bool,
856    pub files_api_supported: bool,
857    pub file_upload_wire_format: Option<String>,
858    pub structured_output: Option<String>,
859    /// Legacy mirror for CLI display and older callers.
860    pub json_schema: Option<String>,
861    pub prefers_xml_scaffolding: bool,
862    /// See [`ProviderRule::reserved_tool_call_token`].
863    pub reserved_tool_call_token: bool,
864    pub prefers_markdown_scaffolding: bool,
865    pub structured_output_mode: String,
866    pub supports_assistant_prefill: bool,
867    pub prefers_role_developer: bool,
868    pub prefers_xml_tools: bool,
869    pub thinking_block_style: String,
870    pub thinking_modes: Vec<String>,
871    pub interleaved_thinking_supported: bool,
872    pub anthropic_beta_features: Vec<String>,
873    pub vision_supported: bool,
874    pub image_url_input_supported: bool,
875    pub preserve_thinking: bool,
876    pub server_parser: String,
877    pub honors_chat_template_kwargs: bool,
878    pub chat_template_options_field: Option<String>,
879    pub requires_completion_tokens: bool,
880    pub requires_streaming: bool,
881    pub reasoning_effort_supported: bool,
882    pub reasoning_effort_levels: Vec<String>,
883    pub reasoning_none_supported: bool,
884    /// See [`ProviderRule::max_thinking_budget`]. `None` means the model uses
885    /// the provider's own default ceiling.
886    pub max_thinking_budget: Option<i64>,
887    pub reasoning_disable_supported: bool,
888    /// See [`ProviderRule::reasoning_required_for_tools`].
889    pub reasoning_required_for_tools: bool,
890    pub reasoning_text_promotable: bool,
891    pub reasoning_wire_format: Option<String>,
892    pub seed_supported: bool,
893    pub top_k_supported: bool,
894    pub temperature_supported: bool,
895    pub top_p_supported: bool,
896    pub frequency_penalty_supported: bool,
897    pub presence_penalty_supported: bool,
898    pub allowed_tool_choice_modes: Vec<String>,
899    pub requires_tool_result_adjacency: bool,
900    pub supports_parallel_tool_calls: bool,
901    pub tools_exclude_response_format: bool,
902    pub recommended_endpoint: Option<String>,
903    pub text_tool_wire_format_supported: bool,
904    pub preferred_tool_format: Option<String>,
905    pub tool_mode_parity: Option<String>,
906    pub tool_mode_parity_notes: Option<String>,
907    pub thinking_disable_directive: Option<String>,
908    /// Per-task auto-policy reasoning-level overrides for this route.
909    /// See [`ProviderRule::auto_reasoning_overrides`].
910    pub auto_reasoning_overrides: BTreeMap<String, String>,
911    /// OpenRouter upstream provider names to exclude from routing for this
912    /// row. See [`ProviderRule::provider_route_denylist`]. Empty means "no
913    /// route restriction".
914    pub provider_route_denylist: Vec<String>,
915    /// OpenRouter upstream provider names this row is PINNED to (allowlist), in
916    /// preference order. See [`ProviderRule::openrouter_provider_order`]. Empty
917    /// means "no pin" (free OpenRouter routing).
918    pub openrouter_provider_order: Vec<String>,
919    /// Serving-quality / precision trust verdict for this route. See
920    /// [`ProviderRule::serving_precision`]. `"unverified"` when unset.
921    pub serving_precision: String,
922}
923
924impl Default for Capabilities {
925    fn default() -> Self {
926        Self {
927            native_tools: false,
928            message_wire_format: WireDialect::OpenAiCompat,
929            native_tool_wire_format: "openai".to_string(),
930            defer_loading: false,
931            tool_search: Vec::new(),
932            responses_api: false,
933            hosted_tools: Vec::new(),
934            remote_mcp: false,
935            conversation_state: false,
936            compaction: false,
937            background_mode: false,
938            tool_approval_policy: None,
939            max_tools: None,
940            prompt_caching: false,
941            cache_breakpoint_style: "none".to_string(),
942            vision: false,
943            audio: false,
944            pdf: false,
945            video: false,
946            files_api_supported: false,
947            file_upload_wire_format: None,
948            structured_output: None,
949            json_schema: None,
950            prefers_xml_scaffolding: false,
951            reserved_tool_call_token: false,
952            prefers_markdown_scaffolding: false,
953            structured_output_mode: "none".to_string(),
954            supports_assistant_prefill: false,
955            prefers_role_developer: false,
956            prefers_xml_tools: false,
957            thinking_block_style: "none".to_string(),
958            thinking_modes: Vec::new(),
959            interleaved_thinking_supported: false,
960            anthropic_beta_features: Vec::new(),
961            vision_supported: false,
962            image_url_input_supported: true,
963            preserve_thinking: false,
964            server_parser: "none".to_string(),
965            honors_chat_template_kwargs: false,
966            chat_template_options_field: None,
967            requires_completion_tokens: false,
968            requires_streaming: false,
969            reasoning_effort_supported: false,
970            reasoning_effort_levels: Vec::new(),
971            reasoning_none_supported: false,
972            max_thinking_budget: None,
973            reasoning_disable_supported: true,
974            reasoning_required_for_tools: false,
975            reasoning_text_promotable: true,
976            reasoning_wire_format: None,
977            seed_supported: true,
978            top_k_supported: true,
979            temperature_supported: true,
980            top_p_supported: true,
981            frequency_penalty_supported: true,
982            presence_penalty_supported: true,
983            allowed_tool_choice_modes: Vec::new(),
984            requires_tool_result_adjacency: false,
985            supports_parallel_tool_calls: true,
986            tools_exclude_response_format: false,
987            recommended_endpoint: None,
988            text_tool_wire_format_supported: true,
989            preferred_tool_format: None,
990            tool_mode_parity: None,
991            tool_mode_parity_notes: None,
992            thinking_disable_directive: None,
993            auto_reasoning_overrides: BTreeMap::new(),
994            provider_route_denylist: Vec::new(),
995            openrouter_provider_order: Vec::new(),
996            serving_precision: "unverified".to_string(),
997        }
998    }
999}
1000
1001/// Display-oriented row for `harn provider catalog matrix`, the legacy
1002/// `harn check --provider-matrix` surface, and the generated docs page. Rows
1003/// are intentionally rule-shaped: `model` is the rule's `model_match` pattern,
1004/// because the shipped capability source of truth is a first-match rule table
1005/// rather than an exhaustive remote model inventory.
1006#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
1007pub struct ProviderCapabilityMatrixRow {
1008    pub provider: String,
1009    pub model: String,
1010    pub version_min: Option<Vec<u32>>,
1011    /// Whether this rule opts into field-wise fall-through
1012    /// ([`ProviderRule::extends`]). Rows in this matrix are rule-shaped, so
1013    /// an `extends` row honestly reports its OWN fields only — for a
1014    /// matching model, unset fields resolve from later matching rows and
1015    /// provider defaults rather than the printed per-rule values.
1016    pub extends: bool,
1017    pub thinking: Vec<String>,
1018    pub vision: bool,
1019    pub audio: bool,
1020    pub pdf: bool,
1021    pub video: bool,
1022    pub streaming: bool,
1023    pub files_api_supported: bool,
1024    pub json_schema: Option<String>,
1025    pub prefers_xml_scaffolding: bool,
1026    pub reserved_tool_call_token: bool,
1027    pub prefers_markdown_scaffolding: bool,
1028    pub structured_output_mode: String,
1029    pub supports_assistant_prefill: bool,
1030    pub prefers_role_developer: bool,
1031    pub prefers_xml_tools: bool,
1032    pub thinking_block_style: String,
1033    pub native_tools: bool,
1034    pub text_tools: bool,
1035    pub preferred_tool_format: String,
1036    pub tool_mode_parity: String,
1037    pub tools: bool,
1038    pub cache: bool,
1039    /// Serving-quality / precision trust verdict for this route. See
1040    /// [`ProviderRule::serving_precision`]. `"unverified"` when unset.
1041    pub serving_precision: String,
1042    pub source: String,
1043}
1044
1045#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
1046pub struct ToolCapabilityAuditReport {
1047    pub audited_models: usize,
1048    pub gaps: Vec<ToolCapabilityAuditGap>,
1049}
1050
1051impl ToolCapabilityAuditReport {
1052    pub fn ok(&self) -> bool {
1053        self.gaps.is_empty()
1054    }
1055
1056    pub fn render_human(&self) -> String {
1057        if self.gaps.is_empty() {
1058            return format!(
1059                "provider capability audit OK: {} priced chat models have explicit native_tools and preferred_tool_format rules",
1060                self.audited_models
1061            );
1062        }
1063
1064        let mut out = format!(
1065            "provider capability audit found {} catalog gaps among {} priced chat models:",
1066            self.gaps.len(),
1067            self.audited_models
1068        );
1069        for gap in &self.gaps {
1070            let matched = match (&gap.rule_provider, &gap.rule_model_match) {
1071                (Some(provider), Some(model_match)) => {
1072                    format!("provider.{provider} model_match=\"{model_match}\"")
1073                }
1074                _ => "no matching rule".to_string(),
1075            };
1076            out.push_str(&format!(
1077                "\n- {}:{} ({matched}) missing {}; suggest native_tools = {}, preferred_tool_format = \"{}\"",
1078                gap.provider,
1079                gap.model,
1080                gap.missing_fields.join(", "),
1081                gap.suggested_native_tools,
1082                gap.suggested_preferred_tool_format,
1083            ));
1084        }
1085        out
1086    }
1087}
1088
1089#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
1090pub struct ToolCapabilityAuditGap {
1091    pub provider: String,
1092    pub model: String,
1093    pub rule_provider: Option<String>,
1094    pub rule_model_match: Option<String>,
1095    pub missing_fields: Vec<String>,
1096    pub suggested_native_tools: bool,
1097    pub suggested_preferred_tool_format: String,
1098}
1099
1100thread_local! {
1101    /// Per-thread user overrides installed by the CLI at startup. Kept
1102    /// thread-local (not process-static) to match the rest of the VM
1103    /// state model — the VM is !Send and each VM thread owns its own
1104    /// configuration.
1105    static USER_OVERRIDES: RefCell<Option<CapabilitiesFile>> = const { RefCell::new(None) };
1106}
1107
1108/// Lazily-parsed built-in rules. The `include_str!` content is a static
1109/// constant; parsing it once per process is safe and free of ordering
1110/// hazards.
1111static BUILTIN: OnceLock<CapabilitiesFile> = OnceLock::new();
1112
1113fn builtin() -> &'static CapabilitiesFile {
1114    BUILTIN.get_or_init(|| {
1115        toml::from_str::<CapabilitiesFile>(BUILTIN_TOML)
1116            .expect("capabilities.toml must parse at build time")
1117    })
1118}
1119
1120/// The shipped (built-in) capability matrix. Public so the footgun gate in
1121/// [`crate::llm::capability_audit`] can audit exactly what Harn ships.
1122pub fn builtin_file() -> &'static CapabilitiesFile {
1123    builtin()
1124}
1125
1126/// Install project-level overrides for the current thread. Usually
1127/// called once at CLI bootstrap after reading `harn.toml`. Passing
1128/// `None` clears any prior override.
1129pub fn set_user_overrides(file: Option<CapabilitiesFile>) {
1130    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = file);
1131}
1132
1133/// Clear any thread-local user overrides. Used between test runs.
1134pub fn clear_user_overrides() {
1135    set_user_overrides(None);
1136}
1137
1138/// Parse a TOML string containing the capabilities section's own shape
1139/// (i.e. top-level `[[provider.X]]` + optional `[provider_family]`, the
1140/// same layout used by the built-in `capabilities.toml`) and install as
1141/// the current thread's override.
1142pub fn set_user_overrides_toml(src: &str) -> Result<(), String> {
1143    set_user_overrides(Some(parse_capabilities_toml(src)?));
1144    Ok(())
1145}
1146
1147/// Parse a capabilities TOML document (the same layout used by the built-in
1148/// `capabilities.toml`) without installing it anywhere, for callers that
1149/// thread an explicit capability overlay instead of mutating thread state
1150/// (e.g. `harn provider catalog export --capabilities-overlay`).
1151pub fn parse_capabilities_toml(src: &str) -> Result<CapabilitiesFile, String> {
1152    toml::from_str(src).map_err(|e| e.to_string())
1153}
1154
1155/// Extract the `[capabilities]` section from a full `harn.toml` source
1156/// and install it as the current thread's override. The schema inside
1157/// that section mirrors `CapabilitiesFile` but with every key prefixed
1158/// by `capabilities.`:
1159///
1160/// ```toml
1161/// [[capabilities.provider.my-proxy]]
1162/// model_match = "*"
1163/// native_tools = true
1164/// tool_search = ["hosted"]
1165/// ```
1166pub fn set_user_overrides_from_manifest_toml(src: &str) -> Result<(), String> {
1167    #[derive(Deserialize)]
1168    struct Manifest {
1169        #[serde(default)]
1170        capabilities: Option<CapabilitiesFile>,
1171    }
1172    let parsed: Manifest = toml::from_str(src).map_err(|e| e.to_string())?;
1173    set_user_overrides(parsed.capabilities);
1174    Ok(())
1175}
1176
1177/// Look up effective capabilities for a `(provider, model)` pair.
1178/// Walks the provider_family chain until it finds a rule list that
1179/// matches. Within any one provider's rule list, user overrides are
1180/// consulted before the built-in rules. The first matching rule wins —
1181/// later rules (and later layers in the family chain) are ignored —
1182/// unless it sets `extends = true`, in which case it contributes only the
1183/// fields it explicitly sets and resolution continues to later matching
1184/// rules (and ultimately provider / built-in defaults) to fill the rest.
1185pub fn lookup(provider: &str, model: &str) -> Capabilities {
1186    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1187    lookup_with_user_overrides(provider, model, user.as_ref())
1188}
1189
1190pub fn lookup_with_user_overrides(
1191    provider: &str,
1192    model: &str,
1193    user_overrides: Option<&CapabilitiesFile>,
1194) -> Capabilities {
1195    let mut caps = lookup_with(provider, model, builtin(), user_overrides);
1196    if provider != "openai" && provider != "mock" {
1197        caps.responses_api = false;
1198        caps.hosted_tools.clear();
1199        caps.remote_mcp = false;
1200        caps.conversation_state = false;
1201        caps.compaction = false;
1202        caps.background_mode = false;
1203        caps.tool_approval_policy = None;
1204    }
1205    caps
1206}
1207
1208/// The wire channel a `tool_format` string flows through. `native` is the
1209/// provider's structured `tool_calls` JSON channel; `text` and `json` are
1210/// text-channel grammars carried in assistant content. Mirrors
1211/// `llm_config::ToolFormatChannel`, kept local so the capability registry
1212/// (the single source of truth for tool-call dialect validity) has no
1213/// dependency on the resolver.
1214#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1215pub enum ToolFormatWire {
1216    /// Provider-native JSON tool calling (`tool_format = "native"`).
1217    Native,
1218    /// A text-channel grammar (`tool_format = "text"` or `"json"`).
1219    Text,
1220}
1221
1222impl ToolFormatWire {
1223    /// Classify a `tool_format` string. Returns `None` for unknown values so
1224    /// callers can reject typos loudly rather than guessing a channel.
1225    pub fn classify(tool_format: &str) -> Option<Self> {
1226        match tool_format {
1227            "native" => Some(Self::Native),
1228            "text" | "json" => Some(Self::Text),
1229            _ => None,
1230        }
1231    }
1232}
1233
1234/// Outcome of validating a requested `(provider, model, tool_format)` combo
1235/// against the capability registry's tool-call dialect validity model.
1236///
1237/// This is the FOOTGUN-REMOVAL contract: a harness developer can ask for any
1238/// tool_format, and the registry guarantees the resolved format is one that
1239/// actually yields parseable tool calls for that route — auto-correcting a
1240/// known-broken combo (e.g. a `native` pin on a `native_unreliable` route that
1241/// silently drops to unparsed DSML text) and explaining why.
1242#[derive(Debug, Clone, PartialEq, Eq)]
1243pub struct ToolFormatDecision {
1244    /// The tool_format that should actually be used on the wire. Equal to the
1245    /// requested format when the combo was already valid; otherwise the
1246    /// registry's `preferred_tool_format` for the route.
1247    pub effective: String,
1248    /// Set when the requested format was overridden. Human-readable, names the
1249    /// bad combo and the working alternative — surface this to the harness
1250    /// developer so vanishing tool calls are never silent.
1251    pub correction: Option<String>,
1252}
1253
1254impl ToolFormatDecision {
1255    fn accepted(format: String) -> Self {
1256        Self {
1257            effective: format,
1258            correction: None,
1259        }
1260    }
1261}
1262
1263/// True when a route's `tool_mode_parity` says the native (provider JSON)
1264/// channel cannot be trusted to yield parseable tool calls. `unsupported`
1265/// (no working channel) is intentionally excluded: there is no better format
1266/// to steer to, so the gate leaves such a route alone rather than rewriting to
1267/// another broken channel under a misleading "Using X instead" message.
1268fn parity_forbids_native(parity: &str) -> bool {
1269    matches!(parity, "native_unreliable" | "text_only")
1270}
1271
1272/// True when a route's `tool_mode_parity` says a text-channel grammar cannot be
1273/// trusted to yield parseable tool calls. See [`parity_forbids_native`] for why
1274/// `unsupported` is excluded.
1275fn parity_forbids_text(parity: &str) -> bool {
1276    matches!(parity, "text_unreliable" | "native_only")
1277}
1278
1279/// True when the requested wire channel is known not to return parseable tool
1280/// calls for a route. The gate auto-corrects only on *positive* evidence of
1281/// breakage, never on a "we don't know" default:
1282///
1283/// - `tool_mode_parity` is an explicit verdict (`parity_forbids_*`).
1284/// - `text_tool_wire_format_supported = false` is an explicit declaration that
1285///   the text channel does not survive this route (e.g. native-only local
1286///   Ollama Qwen3 rows that omit a parity string). It defaults to `true`, so an
1287///   unknown route is never wrongly judged text-broken.
1288///
1289/// `native_tools` is deliberately NOT consulted here: it defaults to `false`
1290/// for unknown providers, so treating `!native_tools` as "native is broken"
1291/// would wrongly rewrite a custom proxy that does support native tools. The
1292/// hard `native` + `!native_tools` capability gate in `extract_llm_options`
1293/// already rejects a genuine native-on-non-native mismatch loudly.
1294fn channel_forbidden(wire: ToolFormatWire, caps: &Capabilities) -> bool {
1295    let parity = caps.tool_mode_parity.as_deref().unwrap_or("unknown");
1296    match wire {
1297        ToolFormatWire::Native => parity_forbids_native(parity),
1298        ToolFormatWire::Text => {
1299            parity_forbids_text(parity) || !caps.text_tool_wire_format_supported
1300        }
1301    }
1302}
1303
1304/// Validate (and, where the registry knows better, auto-correct) a requested
1305/// `tool_format` for a `(provider, model)` route.
1306///
1307/// This is the single enforcement seam for tool-call dialect validity. The
1308/// capability registry already declares, per route, which channel actually
1309/// returns parseable tool calls (`tool_mode_parity`) and which format to use
1310/// (`preferred_tool_format`). Before this function those fields were advisory
1311/// metadata that any alias pin or explicit `--tool-format` flag could silently
1312/// override — the footgun behind the DeepSeek V3.2 DSML "vanishing tool calls"
1313/// dead-abstain. Now any combo whose requested channel is forbidden — by the
1314/// route's `tool_mode_parity` verdict OR by an explicit
1315/// `text_tool_wire_format_supported = false` declaration — is rewritten to a
1316/// working channel (preferring the route's `preferred_tool_format`), with a
1317/// `correction` message naming both. Unknown formats, routes with no adverse
1318/// signal (`unknown`/`interchangeable`), and routes with no working channel at
1319/// all pass through unchanged.
1320pub fn validate_tool_format(provider: &str, model: &str, requested: &str) -> ToolFormatDecision {
1321    let caps = lookup(provider, model);
1322    validate_tool_format_with_caps(provider, model, requested, &caps)
1323}
1324
1325/// `validate_tool_format` against an already-resolved [`Capabilities`], so hot
1326/// callers that already hold one avoid a second matrix lookup.
1327pub fn validate_tool_format_with_caps(
1328    provider: &str,
1329    model: &str,
1330    requested: &str,
1331    caps: &Capabilities,
1332) -> ToolFormatDecision {
1333    // Unknown / unclassifiable formats are not ours to second-guess — the
1334    // exhaustive-match guard elsewhere already rejects typos loudly.
1335    let Some(wire) = ToolFormatWire::classify(requested) else {
1336        return ToolFormatDecision::accepted(requested.to_string());
1337    };
1338
1339    if !channel_forbidden(wire, caps) {
1340        return ToolFormatDecision::accepted(requested.to_string());
1341    }
1342
1343    // The requested channel is known-broken for this route. Pick the opposite
1344    // channel as the steer target, preferring the route's declared
1345    // `preferred_tool_format` when it lands on a channel that is itself not
1346    // forbidden. If BOTH channels are forbidden (a route with no working tool
1347    // surface), there is nothing better to offer — pass the request through
1348    // unchanged rather than rewrite to an equally-broken format under a
1349    // misleading correction message.
1350    let opposite = match wire {
1351        ToolFormatWire::Native => ToolFormatWire::Text,
1352        ToolFormatWire::Text => ToolFormatWire::Native,
1353    };
1354    if channel_forbidden(opposite, caps) {
1355        return ToolFormatDecision::accepted(requested.to_string());
1356    }
1357    let preferred = caps
1358        .preferred_tool_format
1359        .clone()
1360        .filter(|fmt| ToolFormatWire::classify(fmt) == Some(opposite))
1361        .unwrap_or_else(|| match opposite {
1362            ToolFormatWire::Native => "native".to_string(),
1363            ToolFormatWire::Text => "json".to_string(),
1364        });
1365
1366    let parity = caps.tool_mode_parity.as_deref().unwrap_or("unknown");
1367    let mut correction = format!(
1368        "tool_format `{requested}` is not safe for {provider}/{model} \
1369         (tool_mode_parity = `{parity}`): this route does not return parseable \
1370         tool calls on the {} channel, so calls would silently vanish. \
1371         Using `{preferred}` instead.",
1372        match wire {
1373            ToolFormatWire::Native => "provider-native",
1374            ToolFormatWire::Text => "text",
1375        }
1376    );
1377    if let Some(note) = caps.tool_mode_parity_notes.as_deref() {
1378        if !note.is_empty() {
1379            correction.push_str(" (");
1380            correction.push_str(note);
1381            correction.push(')');
1382        }
1383    }
1384
1385    ToolFormatDecision {
1386        effective: preferred,
1387        correction: Some(correction),
1388    }
1389}
1390
1391/// FOOTGUN-REMOVAL — fail fast when a `(provider, model)` route has NO viable
1392/// tool channel at all: the registry forbids both the provider-native channel
1393/// AND every text-channel grammar. `validate_tool_format` deliberately passes
1394/// such a route through unchanged (it has no *better* format to steer to and
1395/// must not rewrite to an equally-broken one under a misleading "Using X
1396/// instead" message); but a tool-bearing call dispatched on a route with no
1397/// working channel can only produce a silent empty tool stream. This guard lets
1398/// the call seam reject that combo BEFORE dispatch with an actionable message —
1399/// naming the bad `(provider, model)` and a suggested alternative provider for
1400/// the same model family — instead of billing a noncommittal completion.
1401///
1402/// Returns `Some(message)` only when both channels are forbidden (e.g. a route
1403/// flagged `native_unreliable` whose text channel is also declared unsupported,
1404/// or one explicitly pinned `tool_mode_parity = "unsupported"`). Returns `None`
1405/// for every route that still has at least one working channel, so it never
1406/// fires on the auto-correctable DeepInfra/SambaNova gpt-oss rows (those keep a
1407/// working text channel) or on any healthy route. Modeled on the same
1408/// `channel_forbidden` machinery `validate_tool_format` uses, so the two stay in
1409/// lock-step: the gate auto-corrects when one channel works and fails fast when
1410/// neither does.
1411pub fn no_viable_tool_channel(provider: &str, model: &str) -> Option<String> {
1412    let caps = lookup(provider, model);
1413    no_viable_tool_channel_with_caps(provider, model, &caps)
1414}
1415
1416/// `no_viable_tool_channel` against an already-resolved [`Capabilities`], so hot
1417/// callers that already hold one avoid a second matrix lookup.
1418pub fn no_viable_tool_channel_with_caps(
1419    provider: &str,
1420    model: &str,
1421    caps: &Capabilities,
1422) -> Option<String> {
1423    let native_forbidden = channel_forbidden(ToolFormatWire::Native, caps);
1424    let text_forbidden = channel_forbidden(ToolFormatWire::Text, caps);
1425    if !(native_forbidden && text_forbidden) {
1426        return None;
1427    }
1428    let parity = caps.tool_mode_parity.as_deref().unwrap_or("unknown");
1429    let mut message = format!(
1430        "no viable tool-calling channel for {provider}/{model} \
1431         (tool_mode_parity = `{parity}`): the registry trusts neither the \
1432         provider-native `tool_calls` channel nor a text-channel grammar to \
1433         return parseable tool calls on this route, so a tool-bearing call here \
1434         can only emit a silent empty tool stream. {}",
1435        suggested_alternative_provider_hint(model)
1436    );
1437    if let Some(note) = caps.tool_mode_parity_notes.as_deref() {
1438        if !note.is_empty() {
1439            message.push_str(" (");
1440            message.push_str(note);
1441            message.push(')');
1442        }
1443    }
1444    Some(message)
1445}
1446
1447/// A short, actionable "try this provider instead" hint for a model whose
1448/// current route has no viable tool channel. gpt-oss (Harmony) is the canonical
1449/// case: its native channel is a footgun on several pay-per-token routes, so
1450/// steer callers to the channels Harn has proven clean (Fireworks/DeepInfra/
1451/// SambaNova on TEXT, or a native-clean route). Generic for everything else.
1452fn suggested_alternative_provider_hint(model: &str) -> String {
1453    if model.to_ascii_lowercase().contains("gpt-oss") {
1454        "For gpt-oss (Harmony), use a TEXT-channel route (e.g. \
1455         `fireworks`/`deepinfra`/`sambanova` gpt-oss, which Harn pins to \
1456         `tool_format = \"text\"`) or a native-clean route; the provider-native \
1457         Harmony channel drops tool calls into the reasoning channel."
1458            .to_string()
1459    } else {
1460        "Pick a provider whose route for this model has a working native or \
1461         text tool channel (see `harn provider catalog matrix`)."
1462            .to_string()
1463    }
1464}
1465
1466/// Return the currently-effective provider capability rule matrix. User
1467/// override rows, when installed for the current thread, are emitted before
1468/// built-in rows so the display mirrors lookup precedence.
1469pub fn matrix_rows() -> Vec<ProviderCapabilityMatrixRow> {
1470    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1471    let mut rows = Vec::new();
1472    if let Some(user) = user.as_ref() {
1473        push_matrix_rows(&mut rows, user, "project");
1474    }
1475    push_matrix_rows(&mut rows, builtin(), "builtin");
1476    rows
1477}
1478
1479/// Audit the currently effective provider/model catalog against the currently
1480/// effective capability rules. This is the user-facing path used by the CLI
1481/// when authors are adding provider catalog or capability override rows.
1482pub fn audit_catalogued_chat_model_tool_capabilities() -> ToolCapabilityAuditReport {
1483    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1484    audit_tool_capability_coverage(
1485        crate::llm_config::model_catalog_entries(),
1486        builtin(),
1487        user.as_ref(),
1488    )
1489}
1490
1491/// Audit the built-in catalog only. The CI test uses this path so external
1492/// provider config cannot hide a gap in the shipped TOML assets.
1493pub fn audit_builtin_catalogued_chat_model_tool_capabilities() -> ToolCapabilityAuditReport {
1494    let catalog = crate::llm_config::parse_config_toml(BUILTIN_PROVIDERS_TOML)
1495        .expect("providers.toml must parse at build time");
1496    audit_tool_capability_coverage(catalog.models, builtin(), None)
1497}
1498
1499fn audit_tool_capability_coverage<I>(
1500    models: I,
1501    builtin: &CapabilitiesFile,
1502    user: Option<&CapabilitiesFile>,
1503) -> ToolCapabilityAuditReport
1504where
1505    I: IntoIterator<Item = (String, crate::llm_config::ModelDef)>,
1506{
1507    let mut gaps = Vec::new();
1508    let mut audited_models = 0;
1509
1510    for (model_id, model) in models {
1511        if model.pricing.is_none() {
1512            continue;
1513        }
1514        audited_models += 1;
1515        let matched = first_matching_rule(user, builtin, &model.provider, &model_id);
1516        let mut missing_fields = Vec::new();
1517        match matched.as_ref().map(|matched| &matched.rule) {
1518            Some(rule) => {
1519                if rule.native_tools.is_none() {
1520                    missing_fields.push("native_tools".to_string());
1521                }
1522                if rule.preferred_tool_format.is_none() {
1523                    missing_fields.push("preferred_tool_format".to_string());
1524                }
1525            }
1526            None => {
1527                missing_fields.push("native_tools".to_string());
1528                missing_fields.push("preferred_tool_format".to_string());
1529            }
1530        }
1531        if missing_fields.is_empty() {
1532            continue;
1533        }
1534
1535        let (suggested_native_tools, suggested_preferred_tool_format) =
1536            suggested_tool_capability_defaults(
1537                &model.provider,
1538                &model_id,
1539                &model,
1540                matched.as_ref(),
1541            );
1542        gaps.push(ToolCapabilityAuditGap {
1543            provider: model.provider,
1544            model: model_id,
1545            rule_provider: matched.as_ref().map(|matched| matched.provider.clone()),
1546            // Honest per-rule provenance: an `extends` fall-through chain
1547            // reports every absorbed rule pattern in precedence order, not a
1548            // fake single source row.
1549            rule_model_match: matched.map(|matched| matched.matched_patterns.join(" -> ")),
1550            missing_fields,
1551            suggested_native_tools,
1552            suggested_preferred_tool_format,
1553        });
1554    }
1555
1556    gaps.sort_by(|left, right| {
1557        left.provider
1558            .cmp(&right.provider)
1559            .then_with(|| left.model.cmp(&right.model))
1560    });
1561    ToolCapabilityAuditReport {
1562        audited_models,
1563        gaps,
1564    }
1565}
1566
1567struct MatchedCapabilityRule {
1568    /// Provider layer of the first (highest-precedence) matched rule.
1569    provider: String,
1570    /// Effective rule: the first match, with fields it left unset filled from
1571    /// later matching rules while the chain opted into `extends` fall-through.
1572    rule: ProviderRule,
1573    /// `model_match` patterns of every absorbed rule, in precedence order.
1574    /// A single entry unless the first match set `extends = true`.
1575    matched_patterns: Vec<String>,
1576}
1577
1578/// Accumulates matching rules along the resolution walk (user rules before
1579/// built-in rules within a layer, then the `provider_family` chain). The
1580/// first matched rule has the highest precedence; later matches only fill
1581/// fields the accumulated chain left unset, and only while every absorbed
1582/// rule so far opted into `extends` fall-through.
1583#[derive(Default)]
1584struct RuleResolution {
1585    /// Provider layer of the first matched rule.
1586    provider: Option<String>,
1587    merged: Option<ProviderRule>,
1588    /// `model_match` provenance of every absorbed rule, in precedence order.
1589    matched_patterns: Vec<String>,
1590}
1591
1592impl RuleResolution {
1593    /// Merge `rule` into the accumulator. Returns `true` when the walk must
1594    /// terminate: the rule does not opt into `extends` fall-through, which is
1595    /// exactly the pre-`extends` first-match-wins behavior.
1596    fn absorb(&mut self, layer_provider: &str, rule: &ProviderRule) -> bool {
1597        if self.provider.is_none() {
1598            self.provider = Some(layer_provider.to_string());
1599        }
1600        self.matched_patterns.push(rule.model_match.clone());
1601        match &mut self.merged {
1602            None => self.merged = Some(rule.clone()),
1603            Some(merged) => merged.fill_missing_from(rule),
1604        }
1605        !rule.extends
1606    }
1607
1608    fn into_matched(self) -> Option<MatchedCapabilityRule> {
1609        Some(MatchedCapabilityRule {
1610            provider: self.provider?,
1611            rule: self.merged.expect("merged is set whenever provider is set"),
1612            matched_patterns: self.matched_patterns,
1613        })
1614    }
1615}
1616
1617/// Scan the ordered rule list for `layer_provider` (user rules first, then
1618/// built-in rules), absorbing every matching rule into `resolution` until a
1619/// terminating (non-`extends`) match. Returns `true` when resolution
1620/// terminated within this layer.
1621fn absorb_layer_matches(
1622    user: Option<&CapabilitiesFile>,
1623    builtin: &CapabilitiesFile,
1624    layer_provider: &str,
1625    model: &str,
1626    resolution: &mut RuleResolution,
1627) -> bool {
1628    for file in user.into_iter().chain(std::iter::once(builtin)) {
1629        if let Some(rules) = file.provider.get(layer_provider) {
1630            for rule in rules {
1631                if rule_matches(rule, model) && resolution.absorb(layer_provider, rule) {
1632                    return true;
1633                }
1634            }
1635        }
1636    }
1637    false
1638}
1639
1640/// Walk provider → family(provider) → … with a visited-guard, absorbing
1641/// matching rules into a [`RuleResolution`] and accumulating per-layer
1642/// provider defaults (earlier layers win) exactly as far as the walk gets.
1643/// Stops at the first non-`extends` match, so a terminating match at layer N
1644/// never consults defaults from layers past N — the pre-`extends` behavior.
1645/// An unterminated `extends` chain keeps walking so later layers can fill
1646/// its gaps.
1647fn resolve_rule_chain(
1648    user: Option<&CapabilitiesFile>,
1649    builtin: &CapabilitiesFile,
1650    provider: &str,
1651    model: &str,
1652) -> (RuleResolution, ProviderDefaults) {
1653    let mut resolution = RuleResolution::default();
1654    let mut effective_defaults = ProviderDefaults::default();
1655    let mut current = provider.to_string();
1656    let mut visited = HashSet::new();
1657    while visited.insert(current.clone()) {
1658        let layer_defaults = merged_provider_defaults(user, builtin, &current);
1659        if effective_defaults.has_any_field() {
1660            effective_defaults.fill_missing_from(&layer_defaults);
1661        } else {
1662            effective_defaults.overlay(&layer_defaults);
1663        }
1664        if absorb_layer_matches(user, builtin, &current, model, &mut resolution) {
1665            break;
1666        }
1667        let next = user
1668            .and_then(|file| file.provider_family.get(&current))
1669            .or_else(|| builtin.provider_family.get(&current))
1670            .cloned();
1671        match next {
1672            Some(parent) => current = parent,
1673            None => break,
1674        }
1675    }
1676    (resolution, effective_defaults)
1677}
1678
1679fn first_matching_rule(
1680    user: Option<&CapabilitiesFile>,
1681    builtin: &CapabilitiesFile,
1682    provider: &str,
1683    model: &str,
1684) -> Option<MatchedCapabilityRule> {
1685    resolve_rule_chain(user, builtin, provider, model)
1686        .0
1687        .into_matched()
1688}
1689
1690fn suggested_tool_capability_defaults(
1691    provider: &str,
1692    model_id: &str,
1693    model: &crate::llm_config::ModelDef,
1694    matched: Option<&MatchedCapabilityRule>,
1695) -> (bool, String) {
1696    if let Some(rule) = matched.map(|matched| &matched.rule) {
1697        let native_tools = rule.native_tools.unwrap_or_else(|| {
1698            // Resolve native_tools from the pinned tool_format via its channel
1699            // so `json` (a TEXT-channel format) correctly implies
1700            // native_tools = false, identically to `text`. Falling through to
1701            // the provider heuristic for `json` would wrongly mark a gemini /
1702            // cerebras row native. Unknown formats keep the heuristic.
1703            match rule
1704                .preferred_tool_format
1705                .as_deref()
1706                .and_then(crate::llm_config::tool_format_channel)
1707            {
1708                Some(crate::llm_config::ToolFormatChannel::Native) => true,
1709                Some(crate::llm_config::ToolFormatChannel::Text) => false,
1710                None => suggested_native_tools(provider, model_id, model),
1711            }
1712        });
1713        let preferred_tool_format = rule
1714            .preferred_tool_format
1715            .clone()
1716            .unwrap_or_else(|| tool_format_for_native(native_tools));
1717        return (native_tools, preferred_tool_format);
1718    }
1719
1720    let native_tools = suggested_native_tools(provider, model_id, model);
1721    (native_tools, tool_format_for_native(native_tools))
1722}
1723
1724fn suggested_native_tools(
1725    provider: &str,
1726    model_id: &str,
1727    model: &crate::llm_config::ModelDef,
1728) -> bool {
1729    if provider == "anthropic" || model_id.contains("claude") {
1730        return true;
1731    }
1732    if matches!(
1733        provider,
1734        "openai" | "gemini" | "cerebras" | "bedrock" | "azure_openai" | "vertex"
1735    ) {
1736        return true;
1737    }
1738    model
1739        .capabilities
1740        .iter()
1741        .any(|capability| capability == "tools")
1742}
1743
1744/// The derived `preferred_tool_format` for a capability row (or unmatched
1745/// model) that does not pin one. Native-capable models derive `native`;
1746/// text-channel models derive `json` (fenced-JSON), the GLOBAL text-channel
1747/// default. Heredoc (`text`) is never auto-derived — it is reachable only via
1748/// an explicit `preferred_tool_format = "text"` pin or an explicit request (the
1749/// reverse safety valve). This is the primary default site: it fires for every
1750/// model that matches a capability row without an explicit format pin.
1751fn tool_format_for_native(native_tools: bool) -> String {
1752    if native_tools {
1753        "native".to_string()
1754    } else {
1755        "json".to_string()
1756    }
1757}
1758
1759fn push_matrix_rows(
1760    rows: &mut Vec<ProviderCapabilityMatrixRow>,
1761    file: &CapabilitiesFile,
1762    source: &str,
1763) {
1764    for (provider, rules) in &file.provider {
1765        for rule in rules {
1766            rows.push(rule_to_matrix_row(provider, rule, source));
1767        }
1768    }
1769}
1770
1771fn rule_to_matrix_row(
1772    provider: &str,
1773    rule: &ProviderRule,
1774    source: &str,
1775) -> ProviderCapabilityMatrixRow {
1776    ProviderCapabilityMatrixRow {
1777        provider: provider.to_string(),
1778        model: rule.model_match.clone(),
1779        version_min: rule.version_min.clone(),
1780        extends: rule.extends,
1781        thinking: rule_thinking_modes(rule),
1782        vision: rule_vision(rule),
1783        audio: rule.audio.unwrap_or(false),
1784        pdf: rule.pdf.unwrap_or(false),
1785        video: rule.video.unwrap_or(false),
1786        streaming: true,
1787        files_api_supported: rule.files_api_supported.unwrap_or(false),
1788        json_schema: rule_structured_output(rule),
1789        prefers_xml_scaffolding: rule.prefers_xml_scaffolding.unwrap_or(false),
1790        reserved_tool_call_token: rule.reserved_tool_call_token.unwrap_or(false),
1791        prefers_markdown_scaffolding: rule.prefers_markdown_scaffolding.unwrap_or(false),
1792        structured_output_mode: rule_structured_output_mode(rule),
1793        supports_assistant_prefill: rule.supports_assistant_prefill.unwrap_or(false),
1794        prefers_role_developer: rule
1795            .prefers_role_developer
1796            .unwrap_or_else(|| rule.requires_completion_tokens.unwrap_or(false)),
1797        prefers_xml_tools: rule.prefers_xml_tools.unwrap_or(false),
1798        thinking_block_style: rule_thinking_block_style(rule),
1799        native_tools: rule.native_tools.unwrap_or(false),
1800        text_tools: rule.text_tool_wire_format_supported.unwrap_or(true),
1801        preferred_tool_format: rule_preferred_tool_format(rule),
1802        tool_mode_parity: rule_tool_mode_parity(rule),
1803        tools: rule.native_tools.unwrap_or(false)
1804            || rule.text_tool_wire_format_supported.unwrap_or(true),
1805        cache: rule.prompt_caching.unwrap_or(false),
1806        serving_precision: rule
1807            .serving_precision
1808            .clone()
1809            .unwrap_or_else(|| "unverified".to_string()),
1810        source: source.to_string(),
1811    }
1812}
1813
1814fn rule_thinking_modes(rule: &ProviderRule) -> Vec<String> {
1815    rule.thinking_modes.clone().unwrap_or_else(|| {
1816        if rule.thinking.unwrap_or(false) {
1817            vec!["enabled".to_string()]
1818        } else {
1819            Vec::new()
1820        }
1821    })
1822}
1823
1824fn rule_vision(rule: &ProviderRule) -> bool {
1825    rule.vision.or(rule.vision_supported).unwrap_or(false)
1826}
1827
1828fn lookup_with(
1829    provider: &str,
1830    model: &str,
1831    builtin: &CapabilitiesFile,
1832    user: Option<&CapabilitiesFile>,
1833) -> Capabilities {
1834    // Special case: mock spoofs either shape. Try anthropic first
1835    // (Claude-shape model strings) so `mock` + `claude-opus-4-7`
1836    // resolves to the Anthropic capability row — the same behaviour
1837    // the hardcoded dispatch gave before this refactor. The native
1838    // tool-definition wire shape is pinned to OpenAI so existing
1839    // mock-based tests keep observing `t.function.name` regardless of
1840    // which family's capability row matched; per-message wire format
1841    // still tracks the matched family so Anthropic-specific request
1842    // plumbing (beta headers, file-id passthrough) is exercised when
1843    // a Claude model is mocked.
1844    if provider == "mock" {
1845        for family in ["anthropic", "openai", "gemini"] {
1846            let defaults = merged_provider_defaults(user, builtin, family);
1847            let mut resolution = RuleResolution::default();
1848            absorb_layer_matches(user, builtin, family, model, &mut resolution);
1849            if let Some(rule) = resolution.merged.as_ref() {
1850                let mut caps = rule_to_caps(rule, &defaults);
1851                if family == "anthropic" {
1852                    caps.native_tool_wire_format = "openai".to_string();
1853                }
1854                return caps;
1855            }
1856        }
1857        return Capabilities::default();
1858    }
1859
1860    // Normal chain: walk provider → family(provider) → ... with a
1861    // visited-guard to avoid cycles in malformed user overrides.
1862    let (resolution, effective_defaults) = resolve_rule_chain(user, builtin, provider, model);
1863    if let Some(rule) = resolution.merged.as_ref() {
1864        return rule_to_caps(rule, &effective_defaults);
1865    }
1866    if effective_defaults.has_any_field() {
1867        return defaults_to_caps(&effective_defaults);
1868    }
1869    Capabilities::default()
1870}
1871
1872fn merged_provider_defaults(
1873    user: Option<&CapabilitiesFile>,
1874    builtin: &CapabilitiesFile,
1875    provider: &str,
1876) -> ProviderDefaults {
1877    let mut defaults = builtin
1878        .provider_defaults
1879        .get(provider)
1880        .cloned()
1881        .unwrap_or_default();
1882    if let Some(user_defaults) = user.and_then(|file| file.provider_defaults.get(provider)) {
1883        defaults.overlay(user_defaults);
1884    }
1885    defaults
1886}
1887
1888fn defaults_to_caps(defaults: &ProviderDefaults) -> Capabilities {
1889    let empty = ProviderRule {
1890        model_match: "*".to_string(),
1891        version_min: None,
1892        extends: false,
1893        native_tools: None,
1894        message_wire_format: None,
1895        native_tool_wire_format: None,
1896        defer_loading: None,
1897        tool_search: None,
1898        responses_api: None,
1899        hosted_tools: None,
1900        remote_mcp: None,
1901        conversation_state: None,
1902        compaction: None,
1903        background_mode: None,
1904        tool_approval_policy: None,
1905        max_tools: None,
1906        prompt_caching: None,
1907        cache_breakpoint_style: None,
1908        vision: None,
1909        audio: None,
1910        pdf: None,
1911        video: None,
1912        files_api_supported: None,
1913        file_upload_wire_format: None,
1914        structured_output: None,
1915        prefers_xml_scaffolding: None,
1916        reserved_tool_call_token: None,
1917        prefers_markdown_scaffolding: None,
1918        structured_output_mode: None,
1919        supports_assistant_prefill: None,
1920        prefers_role_developer: None,
1921        prefers_xml_tools: None,
1922        thinking_block_style: None,
1923        json_schema: None,
1924        thinking_modes: None,
1925        interleaved_thinking_supported: None,
1926        anthropic_beta_features: None,
1927        thinking: None,
1928        vision_supported: None,
1929        image_url_input_supported: None,
1930        preserve_thinking: None,
1931        server_parser: None,
1932        honors_chat_template_kwargs: None,
1933        chat_template_options_field: None,
1934        requires_completion_tokens: None,
1935        requires_streaming: None,
1936        reasoning_effort_supported: None,
1937        reasoning_effort_levels: None,
1938        reasoning_none_supported: None,
1939        max_thinking_budget: None,
1940        reasoning_disable_supported: None,
1941        reasoning_required_for_tools: None,
1942        reasoning_text_promotable: None,
1943        reasoning_wire_format: None,
1944        seed_supported: None,
1945        top_k_supported: None,
1946        temperature_supported: None,
1947        top_p_supported: None,
1948        frequency_penalty_supported: None,
1949        presence_penalty_supported: None,
1950        allowed_tool_choice_modes: None,
1951        requires_tool_result_adjacency: None,
1952        supports_parallel_tool_calls: None,
1953        tools_exclude_response_format: None,
1954        recommended_endpoint: None,
1955        text_tool_wire_format_supported: None,
1956        preferred_tool_format: None,
1957        tool_mode_parity: None,
1958        tool_mode_parity_notes: None,
1959        thinking_disable_directive: None,
1960        auto_reasoning_overrides: None,
1961        provider_route_denylist: None,
1962        openrouter_provider_order: None,
1963        serving_precision: None,
1964    };
1965    let mut caps = rule_to_caps(&empty, defaults);
1966    caps.preferred_tool_format = None;
1967    caps.tool_mode_parity = None;
1968    caps
1969}
1970
1971fn rule_to_caps(rule: &ProviderRule, defaults: &ProviderDefaults) -> Capabilities {
1972    let thinking_modes = rule_thinking_modes(rule);
1973    Capabilities {
1974        native_tools: rule.native_tools.unwrap_or(false),
1975        message_wire_format: WireDialect::from_message_wire_format(
1976            &rule
1977                .message_wire_format
1978                .clone()
1979                .or_else(|| defaults.message_wire_format.clone())
1980                .unwrap_or_else(|| "openai".to_string()),
1981        ),
1982        native_tool_wire_format: rule
1983            .native_tool_wire_format
1984            .clone()
1985            .or_else(|| defaults.native_tool_wire_format.clone())
1986            .unwrap_or_else(|| "openai".to_string()),
1987        defer_loading: rule.defer_loading.unwrap_or(false),
1988        tool_search: rule.tool_search.clone().unwrap_or_default(),
1989        responses_api: rule.responses_api.unwrap_or(false),
1990        hosted_tools: rule.hosted_tools.clone().unwrap_or_default(),
1991        remote_mcp: rule.remote_mcp.unwrap_or(false),
1992        conversation_state: rule.conversation_state.unwrap_or(false),
1993        compaction: rule.compaction.unwrap_or(false),
1994        background_mode: rule.background_mode.unwrap_or(false),
1995        tool_approval_policy: rule.tool_approval_policy.clone(),
1996        max_tools: rule.max_tools,
1997        prompt_caching: rule.prompt_caching.unwrap_or(false),
1998        cache_breakpoint_style: rule
1999            .cache_breakpoint_style
2000            .clone()
2001            .unwrap_or_else(|| "none".to_string()),
2002        vision: rule_vision(rule),
2003        audio: rule.audio.unwrap_or(false),
2004        pdf: rule.pdf.unwrap_or(false),
2005        video: rule.video.unwrap_or(false),
2006        files_api_supported: rule
2007            .files_api_supported
2008            .or(defaults.files_api_supported)
2009            .unwrap_or(false),
2010        file_upload_wire_format: rule
2011            .file_upload_wire_format
2012            .clone()
2013            .or_else(|| defaults.file_upload_wire_format.clone()),
2014        structured_output: rule_structured_output(rule),
2015        json_schema: rule_structured_output(rule),
2016        prefers_xml_scaffolding: rule.prefers_xml_scaffolding.unwrap_or(false),
2017        reserved_tool_call_token: rule.reserved_tool_call_token.unwrap_or(false),
2018        prefers_markdown_scaffolding: rule.prefers_markdown_scaffolding.unwrap_or(false),
2019        structured_output_mode: rule_structured_output_mode(rule),
2020        supports_assistant_prefill: rule.supports_assistant_prefill.unwrap_or(false),
2021        prefers_role_developer: rule.prefers_role_developer.unwrap_or(false),
2022        prefers_xml_tools: rule.prefers_xml_tools.unwrap_or(false),
2023        thinking_block_style: rule_thinking_block_style(rule),
2024        thinking_modes,
2025        interleaved_thinking_supported: rule.interleaved_thinking_supported.unwrap_or(false),
2026        anthropic_beta_features: rule.anthropic_beta_features.clone().unwrap_or_default(),
2027        vision_supported: rule.vision_supported.unwrap_or(false),
2028        image_url_input_supported: rule
2029            .image_url_input_supported
2030            .or(defaults.image_url_input_supported)
2031            .unwrap_or(true),
2032        preserve_thinking: rule.preserve_thinking.unwrap_or(false),
2033        server_parser: rule
2034            .server_parser
2035            .clone()
2036            .unwrap_or_else(|| "none".to_string()),
2037        honors_chat_template_kwargs: rule.honors_chat_template_kwargs.unwrap_or(false),
2038        chat_template_options_field: rule.chat_template_options_field.clone(),
2039        requires_completion_tokens: rule.requires_completion_tokens.unwrap_or(false),
2040        requires_streaming: rule.requires_streaming.unwrap_or(false),
2041        reasoning_effort_supported: rule.reasoning_effort_supported.unwrap_or(false),
2042        reasoning_effort_levels: rule.reasoning_effort_levels.clone().unwrap_or_default(),
2043        reasoning_none_supported: rule.reasoning_none_supported.unwrap_or(false),
2044        max_thinking_budget: rule.max_thinking_budget,
2045        reasoning_disable_supported: rule.reasoning_disable_supported.unwrap_or(true),
2046        reasoning_required_for_tools: rule.reasoning_required_for_tools.unwrap_or(false),
2047        reasoning_text_promotable: rule.reasoning_text_promotable.unwrap_or(true),
2048        reasoning_wire_format: rule
2049            .reasoning_wire_format
2050            .clone()
2051            .or_else(|| defaults.reasoning_wire_format.clone()),
2052        seed_supported: rule
2053            .seed_supported
2054            .or(defaults.seed_supported)
2055            .unwrap_or(true),
2056        top_k_supported: rule
2057            .top_k_supported
2058            .or(defaults.top_k_supported)
2059            .unwrap_or(true),
2060        temperature_supported: rule
2061            .temperature_supported
2062            .or(defaults.temperature_supported)
2063            .unwrap_or(true),
2064        top_p_supported: rule
2065            .top_p_supported
2066            .or(defaults.top_p_supported)
2067            .unwrap_or(true),
2068        frequency_penalty_supported: rule
2069            .frequency_penalty_supported
2070            .or(defaults.frequency_penalty_supported)
2071            .unwrap_or(true),
2072        presence_penalty_supported: rule
2073            .presence_penalty_supported
2074            .or(defaults.presence_penalty_supported)
2075            .unwrap_or(true),
2076        allowed_tool_choice_modes: rule.allowed_tool_choice_modes.clone().unwrap_or_default(),
2077        requires_tool_result_adjacency: rule.requires_tool_result_adjacency.unwrap_or(false),
2078        supports_parallel_tool_calls: rule.supports_parallel_tool_calls.unwrap_or(true),
2079        tools_exclude_response_format: rule.tools_exclude_response_format.unwrap_or(false),
2080        recommended_endpoint: rule.recommended_endpoint.clone(),
2081        text_tool_wire_format_supported: rule.text_tool_wire_format_supported.unwrap_or(true),
2082        preferred_tool_format: Some(rule_preferred_tool_format(rule)),
2083        tool_mode_parity: Some(rule_tool_mode_parity(rule)),
2084        tool_mode_parity_notes: rule.tool_mode_parity_notes.clone(),
2085        thinking_disable_directive: rule.thinking_disable_directive.clone(),
2086        auto_reasoning_overrides: rule.auto_reasoning_overrides.clone().unwrap_or_default(),
2087        provider_route_denylist: rule.provider_route_denylist.clone().unwrap_or_default(),
2088        openrouter_provider_order: rule.openrouter_provider_order.clone().unwrap_or_default(),
2089        serving_precision: rule
2090            .serving_precision
2091            .clone()
2092            .unwrap_or_else(|| "unverified".to_string()),
2093    }
2094}
2095
2096fn rule_preferred_tool_format(rule: &ProviderRule) -> String {
2097    // This is the `caps.preferred_tool_format` the runtime `lookup` returns for
2098    // a matched capability row. When the row pins a format, honor it (including
2099    // an explicit `text` — the reverse safety valve). Otherwise derive: native
2100    // models get `native`, text-channel models get `json` (fenced-JSON), the
2101    // GLOBAL text-channel default. Heredoc `text` is never auto-derived.
2102    rule.preferred_tool_format.clone().unwrap_or_else(|| {
2103        if rule.native_tools.unwrap_or(false) {
2104            "native".to_string()
2105        } else {
2106            "json".to_string()
2107        }
2108    })
2109}
2110
2111fn rule_tool_mode_parity(rule: &ProviderRule) -> String {
2112    rule.tool_mode_parity.clone().unwrap_or_else(|| {
2113        match (
2114            rule.native_tools.unwrap_or(false),
2115            rule.text_tool_wire_format_supported.unwrap_or(true),
2116        ) {
2117            (true, true) => "unknown".to_string(),
2118            (true, false) => "native_only".to_string(),
2119            (false, true) => "text_only".to_string(),
2120            (false, false) => "unsupported".to_string(),
2121        }
2122    })
2123}
2124
2125fn rule_structured_output(rule: &ProviderRule) -> Option<String> {
2126    rule.structured_output
2127        .clone()
2128        .or_else(|| rule.json_schema.clone())
2129        .filter(|value| value != "none")
2130}
2131
2132fn rule_structured_output_mode(rule: &ProviderRule) -> String {
2133    if let Some(mode) = &rule.structured_output_mode {
2134        return mode.clone();
2135    }
2136    match rule_structured_output(rule).as_deref() {
2137        Some("native") | Some("format_kw") => "native_json".to_string(),
2138        Some("tool_use") => "xml_tagged".to_string(),
2139        _ => "none".to_string(),
2140    }
2141}
2142
2143fn rule_thinking_block_style(rule: &ProviderRule) -> String {
2144    rule.thinking_block_style.clone().unwrap_or_else(|| {
2145        if rule.reasoning_effort_supported.unwrap_or(false)
2146            || rule.requires_completion_tokens.unwrap_or(false)
2147        {
2148            "reasoning_summary".to_string()
2149        } else {
2150            "none".to_string()
2151        }
2152    })
2153}
2154
2155pub(crate) fn rule_matches(rule: &ProviderRule, model: &str) -> bool {
2156    let lower = model.to_lowercase();
2157    if !glob_match(&rule.model_match.to_lowercase(), &lower) {
2158        return false;
2159    }
2160    if let Some(version_min) = &rule.version_min {
2161        if version_min.len() != 2 {
2162            return false;
2163        }
2164        let want = (version_min[0], version_min[1]);
2165        let have = match extract_version(model) {
2166            Some(v) => v,
2167            // `version_min` was set but the model ID can't be parsed.
2168            // Fail closed: skip this rule so more permissive catch-all
2169            // rules below can still match.
2170            None => return false,
2171        };
2172        if have < want {
2173            return false;
2174        }
2175    }
2176    true
2177}
2178
2179/// Extract `(major, minor)` from a model ID by trying the Anthropic
2180/// parser first (for `claude-*` shapes) then the OpenAI parser (`gpt-*`).
2181/// Both parsers return `None` for shapes they don't recognise so this
2182/// never mis-parses across families.
2183fn extract_version(model: &str) -> Option<(u32, u32)> {
2184    claude_generation(model).or_else(|| gpt_generation(model))
2185}
2186
2187// Model-pattern matching for capability rules. Shared workspace semantics live
2188// in `harn-glob`; keep capability and provider matching on that helper instead
2189// of mirroring glob behavior locally.
2190use harn_glob::match_name as glob_match;
2191
2192#[cfg(test)]
2193mod tests {
2194    use super::*;
2195
2196    fn reset() {
2197        clear_user_overrides();
2198    }
2199
2200    fn assert_cerebras_effort_reasoning(model: &str, thinking_block_style: &str) {
2201        let caps = lookup("cerebras", model);
2202        assert_eq!(caps.thinking_modes, vec!["effort"]);
2203        assert!(caps.reasoning_effort_supported);
2204        // tool_format is NOT asserted here: cerebras gpt-oss and zai-glm have
2205        // different defaults (gpt-oss harmonized to `json`, glm stays
2206        // `native`), and this shared helper is about reasoning-effort
2207        // behavior. Tool-format resolution is asserted in the dedicated
2208        // harmonization tests.
2209        assert_eq!(caps.structured_output.as_deref(), Some("native"));
2210        assert_eq!(caps.structured_output_mode, "native_json");
2211        assert_eq!(caps.thinking_block_style, thinking_block_style);
2212    }
2213
2214    fn assert_openrouter_anthropic_runtime_parity(model: &str) {
2215        let direct = lookup("anthropic", model);
2216        let routed = lookup("openrouter", model);
2217
2218        assert_eq!(
2219            routed.native_tools, direct.native_tools,
2220            "{model}: native tool support should match direct Anthropic"
2221        );
2222        assert_eq!(
2223            routed.preferred_tool_format, direct.preferred_tool_format,
2224            "{model}: preferred tool format should match direct Anthropic"
2225        );
2226        assert_eq!(
2227            routed.structured_output, direct.structured_output,
2228            "{model}: structured output transport should match direct Anthropic"
2229        );
2230        assert_eq!(
2231            routed.structured_output_mode, direct.structured_output_mode,
2232            "{model}: structured output mode should match direct Anthropic"
2233        );
2234        assert_eq!(
2235            routed.thinking_modes,
2236            Vec::<String>::new(),
2237            "{model}: OpenRouter Claude routes must not advertise direct Anthropic thinking controls"
2238        );
2239        assert!(
2240            !routed.reasoning_effort_supported,
2241            "{model}: OpenRouter Claude routes must not advertise direct Anthropic effort controls"
2242        );
2243        assert!(
2244            !routed.interleaved_thinking_supported,
2245            "{model}: OpenRouter Claude routes must not advertise interleaved thinking"
2246        );
2247        assert_eq!(
2248            routed.supports_assistant_prefill, direct.supports_assistant_prefill,
2249            "{model}: assistant prefill support should match direct Anthropic"
2250        );
2251        assert_eq!(
2252            routed.prompt_caching, direct.prompt_caching,
2253            "{model}: prompt cache support should match direct Anthropic"
2254        );
2255        assert_eq!(
2256            routed.prefers_xml_scaffolding, direct.prefers_xml_scaffolding,
2257            "{model}: XML scaffolding preference should match direct Anthropic"
2258        );
2259        assert_eq!(
2260            routed.prefers_markdown_scaffolding, direct.prefers_markdown_scaffolding,
2261            "{model}: Markdown scaffolding preference should match direct Anthropic"
2262        );
2263        assert_eq!(
2264            routed.prefers_role_developer, direct.prefers_role_developer,
2265            "{model}: developer role preference should match direct Anthropic"
2266        );
2267        assert_eq!(
2268            routed.prefers_xml_tools, direct.prefers_xml_tools,
2269            "{model}: XML tool preference should match direct Anthropic"
2270        );
2271        assert_eq!(
2272            routed.thinking_block_style, direct.thinking_block_style,
2273            "{model}: thinking block style should match direct Anthropic"
2274        );
2275        assert_eq!(
2276            routed.text_tool_wire_format_supported, direct.text_tool_wire_format_supported,
2277            "{model}: text-tool fallback support should match direct Anthropic"
2278        );
2279    }
2280
2281    #[test]
2282    fn every_catalogued_chat_model_has_explicit_tool_capabilities() {
2283        reset();
2284        let report = audit_builtin_catalogued_chat_model_tool_capabilities();
2285        assert!(report.ok(), "{}", report.render_human());
2286    }
2287
2288    #[test]
2289    fn every_catalogued_alias_has_explicit_tool_capabilities() {
2290        // The model-level audit only covers priced catalog `models`, so a
2291        // `[[provider.local]]` / Ollama alias (e.g. the local gemma-4 route in
2292        // Fix A) could omit native_tools/preferred_tool_format and silently
2293        // degrade to text tools without tripping a test. Walk every alias's
2294        // (provider, id) through the same matcher and require explicit fields.
2295        reset();
2296        let catalog = crate::llm_config::parse_config_toml(BUILTIN_PROVIDERS_TOML)
2297            .expect("providers.toml must parse at build time");
2298        let builtin = builtin();
2299        let mut gaps = Vec::new();
2300        for (alias, def) in &catalog.aliases {
2301            let matched = first_matching_rule(None, builtin, &def.provider, &def.id);
2302            let explicit = matched
2303                .as_ref()
2304                .map(|matched| {
2305                    matched.rule.native_tools.is_some()
2306                        && matched.rule.preferred_tool_format.is_some()
2307                })
2308                .unwrap_or(false);
2309            if !explicit {
2310                gaps.push(format!(
2311                    "{alias} -> {}:{} (rule={})",
2312                    def.provider,
2313                    def.id,
2314                    matched
2315                        .as_ref()
2316                        .map(|matched| matched.rule.model_match.as_str())
2317                        .unwrap_or("<none>")
2318                ));
2319            }
2320        }
2321        assert!(
2322            gaps.is_empty(),
2323            "aliases missing explicit native_tools/preferred_tool_format:\n- {}",
2324            gaps.join("\n- ")
2325        );
2326    }
2327
2328    #[test]
2329    fn every_catalogued_alias_tool_format_pin_is_safe_for_route() {
2330        // Alias pins are consumed directly by downstream catalogs and CLI
2331        // routing. They must not encode a known-broken channel that the
2332        // central runtime guard would have to correct later.
2333        reset();
2334        let catalog = crate::llm_config::parse_config_toml(BUILTIN_PROVIDERS_TOML)
2335            .expect("providers.toml must parse at build time");
2336        let mut unsafe_pins = Vec::new();
2337        for (alias, def) in &catalog.aliases {
2338            let Some(tool_format) = def.tool_format.as_deref() else {
2339                continue;
2340            };
2341            let decision = validate_tool_format(&def.provider, &def.id, tool_format);
2342            if let Some(correction) = decision.correction.as_deref() {
2343                unsafe_pins.push(format!(
2344                    "{alias} -> {}:{} pins {tool_format}, would be corrected to {} ({correction})",
2345                    def.provider, def.id, decision.effective
2346                ));
2347            }
2348        }
2349        assert!(
2350            unsafe_pins.is_empty(),
2351            "aliases pin unsafe tool_format values:\n- {}",
2352            unsafe_pins.join("\n- ")
2353        );
2354    }
2355
2356    #[test]
2357    fn tool_capability_audit_reports_suggested_defaults() {
2358        reset();
2359        let capabilities: CapabilitiesFile = toml::from_str(
2360            r#"
2361[[provider.acme]]
2362model_match = "acme-good-*"
2363preferred_tool_format = "native"
2364"#,
2365        )
2366        .unwrap();
2367        let report = audit_tool_capability_coverage(
2368            vec![(
2369                "acme-good-1".to_string(),
2370                crate::llm_config::ModelDef {
2371                    name: "Acme Good".to_string(),
2372                    provider: "acme".to_string(),
2373                    context_window: 128_000,
2374                    logical_model: None,
2375                    equivalence_group: None,
2376                    served_variant: None,
2377                    wire_model: None,
2378                    api_dialect: None,
2379                    rate_limits: None,
2380                    performance: None,
2381                    architecture: None,
2382                    local_memory: None,
2383                    runtime_context_window: None,
2384                    stream_timeout: None,
2385                    capabilities: Vec::new(),
2386                    pricing: Some(crate::llm_config::ModelPricing {
2387                        input_per_mtok: 1.0,
2388                        output_per_mtok: 2.0,
2389                        cache_read_per_mtok: None,
2390                        cache_write_per_mtok: None,
2391                    }),
2392                    deprecated: false,
2393                    deprecation_note: None,
2394                    superseded_by: None,
2395                    fast_mode: None,
2396                    quality_tags: Vec::new(),
2397                    availability: crate::llm_config::ModelAvailability::Serverless,
2398                    tier: None,
2399                    open_weight: None,
2400                    strengths: Vec::new(),
2401                    benchmarks: std::collections::BTreeMap::new(),
2402                    family: None,
2403                    lineage: None,
2404                    complementary_with: Vec::new(),
2405                    avoid_as_reviewer_for: Vec::new(),
2406                },
2407            )],
2408            &capabilities,
2409            None,
2410        );
2411
2412        assert!(!report.ok());
2413        assert_eq!(report.audited_models, 1);
2414        assert_eq!(report.gaps.len(), 1);
2415        assert_eq!(report.gaps[0].missing_fields, ["native_tools"]);
2416        assert!(report.gaps[0].suggested_native_tools);
2417        assert_eq!(report.gaps[0].suggested_preferred_tool_format, "native");
2418        assert!(report.render_human().contains(
2419            "acme:acme-good-1 (provider.acme model_match=\"acme-good-*\") missing native_tools; suggest native_tools = true, preferred_tool_format = \"native\""
2420        ));
2421    }
2422
2423    #[test]
2424    fn openrouter_qwen36_keeps_native_and_denies_ambient_upstream() {
2425        reset();
2426        for model in [
2427            "qwen/qwen3.6-flash",
2428            "qwen/qwen3.6-plus",
2429            "qwen/qwen3.6-35b-a3b",
2430        ] {
2431            let caps = lookup("openrouter", model);
2432            // The route-around must NOT downgrade the tool format: native stays on.
2433            assert!(caps.native_tools, "{model}: native tools");
2434            assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
2435            // The broken Ambient upstream is denied via the data-driven denylist.
2436            assert_eq!(
2437                caps.provider_route_denylist,
2438                vec!["Ambient".to_string()],
2439                "{model}: denylist",
2440            );
2441        }
2442    }
2443
2444    #[test]
2445    fn provider_route_denylist_defaults_empty_for_unmarked_rows() {
2446        reset();
2447        let caps = lookup("anthropic", "claude-opus-4-7");
2448        assert!(caps.provider_route_denylist.is_empty());
2449    }
2450
2451    #[test]
2452    fn strict_openai_compat_rows_require_tool_result_adjacency() {
2453        reset();
2454        assert!(lookup("moonshot", "moonshot/kimi-k2.6").requires_tool_result_adjacency);
2455        assert!(lookup("moonshot", "moonshot/kimi-k2.7-code").requires_tool_result_adjacency);
2456        assert!(lookup("minimax", "MiniMax-M2").requires_tool_result_adjacency);
2457        assert!(lookup("minimax", "MiniMax-M2.7").requires_tool_result_adjacency);
2458        assert!(!lookup("openai", "gpt-4o").requires_tool_result_adjacency);
2459    }
2460
2461    #[test]
2462    fn fireworks_gpt_oss_disables_parallel_tool_call_history() {
2463        reset();
2464        assert!(
2465            !lookup("fireworks", "accounts/fireworks/models/gpt-oss-120b")
2466                .supports_parallel_tool_calls
2467        );
2468        assert!(lookup("openai", "gpt-4o").supports_parallel_tool_calls);
2469    }
2470
2471    #[test]
2472    fn cerebras_tools_exclude_response_format() {
2473        reset();
2474        assert!(lookup("cerebras", "gpt-oss-120b").tools_exclude_response_format);
2475        assert!(lookup("cerebras", "zai-glm-4.7").tools_exclude_response_format);
2476        assert!(!lookup("openai", "gpt-4o").tools_exclude_response_format);
2477    }
2478
2479    #[test]
2480    fn serving_precision_seeds_known_gpt_oss_verdicts() {
2481        reset();
2482        // Full-precision routes verified during the 2026-06 meter effort.
2483        assert_eq!(
2484            lookup("fireworks", "accounts/fireworks/models/gpt-oss-120b").serving_precision,
2485            "trusted"
2486        );
2487        assert_eq!(
2488            lookup("openrouter", "openai/gpt-oss-120b").serving_precision,
2489            "trusted"
2490        );
2491        // SambaNova serves gpt-oss quantized (proven 0/5 vs reference 3/3).
2492        assert_eq!(
2493            lookup("sambanova", "gpt-oss-120b").serving_precision,
2494            "degraded"
2495        );
2496        // Cerebras is full precision but rate-throttled to unusable timing.
2497        assert_eq!(
2498            lookup("cerebras", "gpt-oss-120b").serving_precision,
2499            "throttled"
2500        );
2501    }
2502
2503    #[test]
2504    fn serving_precision_defaults_unverified_for_unmarked_rows() {
2505        reset();
2506        // A route with no serving_precision verdict resolves to "unverified",
2507        // never an empty string, so callers can branch on a stable enum.
2508        assert_eq!(
2509            lookup("anthropic", "claude-opus-4-7").serving_precision,
2510            "unverified"
2511        );
2512    }
2513
2514    #[test]
2515    fn anthropic_opus_47_gets_full_capabilities() {
2516        reset();
2517        let caps = lookup("anthropic", "claude-opus-4-7");
2518        assert!(caps.native_tools);
2519        assert!(caps.defer_loading);
2520        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
2521        assert!(caps.prompt_caching);
2522        assert_eq!(caps.thinking_modes, vec!["adaptive", "effort"]);
2523        assert!(caps.reasoning_effort_supported);
2524        assert_eq!(
2525            caps.reasoning_effort_levels,
2526            vec!["low", "medium", "high", "xhigh", "max"]
2527        );
2528        assert!(caps.interleaved_thinking_supported);
2529        assert!(caps.vision_supported);
2530        assert!(caps.audio);
2531        assert!(caps.pdf);
2532        assert!(caps.files_api_supported);
2533        assert_eq!(caps.max_tools, Some(10000));
2534        assert!(caps.prefers_xml_scaffolding);
2535        assert!(!caps.prefers_markdown_scaffolding);
2536        assert_eq!(caps.structured_output_mode, "xml_tagged");
2537        assert!(!caps.supports_assistant_prefill);
2538        assert!(!caps.prefers_role_developer);
2539        assert!(caps.prefers_xml_tools);
2540        assert_eq!(caps.thinking_block_style, "thinking_blocks");
2541    }
2542
2543    #[test]
2544    fn anthropic_sonnet_5_gets_adaptive_effort_capabilities() {
2545        reset();
2546        let caps = lookup("anthropic", "claude-sonnet-5");
2547        assert!(caps.native_tools);
2548        assert!(caps.defer_loading);
2549        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
2550        assert!(caps.prompt_caching);
2551        assert_eq!(caps.thinking_modes, vec!["adaptive", "effort"]);
2552        assert!(caps.reasoning_effort_supported);
2553        assert_eq!(
2554            caps.reasoning_effort_levels,
2555            vec!["low", "medium", "high", "xhigh", "max"]
2556        );
2557        assert!(caps.reasoning_disable_supported);
2558        assert!(!caps.reasoning_none_supported);
2559        assert!(caps.interleaved_thinking_supported);
2560        assert!(!caps.supports_assistant_prefill);
2561        assert_eq!(caps.thinking_block_style, "thinking_blocks");
2562    }
2563
2564    #[test]
2565    fn anthropic_fable_effort_cannot_be_disabled() {
2566        reset();
2567        for model in ["claude-fable-5", "anthropic/claude-fable-5"] {
2568            let caps = lookup("anthropic", model);
2569            assert_eq!(caps.thinking_modes, vec!["adaptive", "effort"]);
2570            assert!(caps.reasoning_effort_supported);
2571            assert_eq!(
2572                caps.reasoning_effort_levels,
2573                vec!["low", "medium", "high", "xhigh", "max"]
2574            );
2575            assert!(!caps.reasoning_disable_supported);
2576            assert!(!caps.supports_assistant_prefill);
2577        }
2578    }
2579
2580    #[test]
2581    fn anthropic_opus_46_uses_budgeted_thinking() {
2582        reset();
2583        let caps = lookup("anthropic", "claude-opus-4-6");
2584        assert_eq!(caps.thinking_modes, vec!["enabled"]);
2585        assert!(caps.interleaved_thinking_supported);
2586        assert!(!caps.supports_assistant_prefill);
2587    }
2588
2589    #[test]
2590    fn anthropic_opus_45_does_not_support_interleaved_thinking() {
2591        reset();
2592        let caps = lookup("anthropic", "claude-opus-4-5");
2593        assert_eq!(caps.thinking_modes, vec!["enabled"]);
2594        assert!(!caps.interleaved_thinking_supported);
2595        assert!(caps.supports_assistant_prefill);
2596    }
2597
2598    #[test]
2599    fn openrouter_claude_rows_track_direct_anthropic_runtime_quirks() {
2600        reset();
2601        for model in [
2602            "anthropic/claude-fable-5-0",
2603            "anthropic/claude-mythos-5-0",
2604            "anthropic/claude-haiku-4-5",
2605            "anthropic/claude-haiku-4-7",
2606            "anthropic/claude-sonnet-4-6",
2607            "anthropic/claude-sonnet-4-7",
2608            "anthropic/claude-sonnet-5",
2609            "anthropic/claude-opus-4-6",
2610            "anthropic/claude-opus-4-7",
2611        ] {
2612            assert_openrouter_anthropic_runtime_parity(model);
2613        }
2614    }
2615
2616    #[test]
2617    fn override_can_supply_anthropic_beta_features() {
2618        reset();
2619        let toml_src = r#"
2620[[provider.anthropic]]
2621model_match = "claude-custom-*"
2622native_tools = true
2623anthropic_beta_features = ["fine-grained-tool-streaming-2025-05-14"]
2624"#;
2625        set_user_overrides_toml(toml_src).unwrap();
2626        let caps = lookup("anthropic", "claude-custom-1");
2627        assert_eq!(
2628            caps.anthropic_beta_features,
2629            vec!["fine-grained-tool-streaming-2025-05-14"]
2630        );
2631        reset();
2632    }
2633
2634    #[test]
2635    fn anthropic_haiku_44_has_no_tool_search() {
2636        reset();
2637        let caps = lookup("anthropic", "claude-haiku-4-4");
2638        // Haiku 4.4 falls through to the `claude-*` catch-all row.
2639        assert!(caps.native_tools);
2640        assert!(caps.prompt_caching);
2641        assert!(!caps.defer_loading);
2642        assert!(caps.tool_search.is_empty());
2643    }
2644
2645    #[test]
2646    fn anthropic_haiku_45_supports_tool_search() {
2647        reset();
2648        let caps = lookup("anthropic", "claude-haiku-4-5");
2649        assert!(caps.defer_loading);
2650        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
2651    }
2652
2653    #[test]
2654    fn old_claude_gets_catchall() {
2655        reset();
2656        let caps = lookup("anthropic", "claude-opus-3-5");
2657        assert!(caps.native_tools);
2658        assert!(caps.prompt_caching);
2659        assert!(!caps.defer_loading);
2660        assert!(caps.tool_search.is_empty());
2661    }
2662
2663    #[test]
2664    fn openai_gpt_54_supports_tool_search() {
2665        reset();
2666        let caps = lookup("openai", "gpt-5.4");
2667        assert!(caps.defer_loading);
2668        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
2669        assert_eq!(caps.json_schema.as_deref(), Some("native"));
2670        assert_eq!(caps.thinking_modes, vec!["effort"]);
2671        assert!(caps.reasoning_effort_supported);
2672        assert!(caps.reasoning_none_supported);
2673        assert!(!caps.prefers_xml_scaffolding);
2674        assert!(caps.prefers_markdown_scaffolding);
2675        assert_eq!(caps.structured_output_mode, "native_json");
2676        assert!(!caps.supports_assistant_prefill);
2677        assert!(!caps.prefers_role_developer);
2678        assert!(!caps.prefers_xml_tools);
2679        assert_eq!(caps.thinking_block_style, "reasoning_summary");
2680    }
2681
2682    #[test]
2683    fn openai_gpt_53_has_reasoning_none_without_tool_search() {
2684        reset();
2685        let caps = lookup("openai", "gpt-5.3");
2686        assert!(caps.native_tools);
2687        assert!(!caps.defer_loading);
2688        assert!(caps.vision_supported);
2689        assert!(caps.tool_search.is_empty());
2690        assert_eq!(caps.thinking_modes, vec!["effort"]);
2691        assert!(caps.reasoning_effort_supported);
2692        assert!(caps.reasoning_none_supported);
2693    }
2694
2695    #[test]
2696    fn openai_original_gpt_5_has_reasoning_floor_without_none() {
2697        reset();
2698        let caps = lookup("openai", "gpt-5");
2699        assert!(caps.native_tools);
2700        assert!(!caps.defer_loading);
2701        assert_eq!(caps.thinking_modes, vec!["effort"]);
2702        assert!(caps.reasoning_effort_supported);
2703        assert!(!caps.reasoning_none_supported);
2704    }
2705
2706    #[test]
2707    fn gemini_thinking_budget_quirks_are_declared_in_matrix() {
2708        reset();
2709        // Flash: 24576 ceiling, can disable thinking.
2710        let flash = lookup("gemini", "gemini-2.5-flash");
2711        assert_eq!(flash.max_thinking_budget, Some(24_576));
2712        assert!(flash.reasoning_disable_supported);
2713        assert!(flash.thinking_modes.iter().any(|m| m == "effort"));
2714        // Pro: 32768 ceiling, cannot disable thinking.
2715        let pro = lookup("gemini", "gemini-2.5-pro");
2716        assert_eq!(pro.max_thinking_budget, Some(32_768));
2717        assert!(!pro.reasoning_disable_supported);
2718        assert!(pro.thinking_modes.iter().any(|m| m == "effort"));
2719        // The `models/` REST resource name resolves the same.
2720        let flash_resource = lookup("gemini", "models/gemini-2.5-flash");
2721        assert_eq!(flash_resource.max_thinking_budget, Some(24_576));
2722        assert!(flash_resource.reasoning_disable_supported);
2723        // Non-2.5 gemini has no effort thinking support -> provider sends no
2724        // thinkingConfig (unchanged behavior).
2725        let legacy = lookup("gemini", "gemini-1.5-pro");
2726        assert!(!legacy.thinking_modes.iter().any(|m| m == "effort"));
2727    }
2728
2729    #[test]
2730    fn openai_gpt_4o_matrix_fields_include_multimodal_support() {
2731        reset();
2732        let caps = lookup("openai", "gpt-4o");
2733        assert!(caps.native_tools);
2734        assert!(caps.vision);
2735        assert!(caps.audio);
2736        assert!(!caps.pdf);
2737        assert_eq!(caps.json_schema.as_deref(), Some("native"));
2738    }
2739
2740    #[test]
2741    fn openai_reasoning_models_support_effort() {
2742        reset();
2743        let caps = lookup("openai", "o3");
2744        assert_eq!(caps.thinking_modes, vec!["effort"]);
2745        assert!(caps.requires_completion_tokens);
2746        assert!(caps.reasoning_effort_supported);
2747        assert!(caps.prefers_role_developer);
2748        assert_eq!(caps.thinking_block_style, "reasoning_summary");
2749        let prefixed = lookup("openrouter", "openai/o4-mini");
2750        assert!(prefixed.requires_completion_tokens);
2751        assert!(prefixed.reasoning_effort_supported);
2752    }
2753
2754    #[test]
2755    fn vision_capability_gates_known_multimodal_models() {
2756        reset();
2757        let minimax_m3 = lookup("minimax", "MiniMax-M3");
2758        assert!(minimax_m3.vision_supported);
2759        assert!(minimax_m3.video);
2760        assert_eq!(minimax_m3.thinking_modes, vec!["adaptive"]);
2761        assert_eq!(minimax_m3.reasoning_wire_format.as_deref(), Some("minimax"));
2762        assert!(minimax_m3.requires_completion_tokens);
2763        let openrouter_m3 = lookup("openrouter", "minimax/minimax-m3");
2764        assert!(openrouter_m3.vision_supported);
2765        assert!(openrouter_m3.video);
2766        assert!(lookup("openai", "gpt-4o").vision_supported);
2767        assert!(lookup("openai", "gpt-5.4-preview").vision_supported);
2768        assert!(lookup("anthropic", "claude-sonnet-4-6").vision_supported);
2769        assert!(lookup("anthropic", "claude-sonnet-4-6").pdf);
2770        assert!(lookup("anthropic", "claude-sonnet-4-6").files_api_supported);
2771        assert!(lookup("openrouter", "google/gemini-2.5-flash").vision_supported);
2772        assert!(lookup("gemini", "gemini-2.5-flash").vision_supported);
2773        assert!(lookup("gemini", "gemini-2.5-flash").audio);
2774        assert!(lookup("gemini", "gemini-2.5-flash").pdf);
2775        assert_eq!(
2776            lookup("gemini", "gemini-2.5-flash").structured_output_mode,
2777            "native_json"
2778        );
2779        assert!(lookup("ollama", "llava:latest").vision_supported);
2780        assert!(lookup("ollama", "gemma4:26b").vision_supported);
2781        assert!(lookup("ollama", "gemma4-128k:latest").vision_supported);
2782        assert!(!lookup("openai", "gpt-3.5-turbo").vision_supported);
2783        assert!(!lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4").vision_supported);
2784    }
2785
2786    #[test]
2787    fn openrouter_gemini_explicit_cache_uses_block_breakpoints() {
2788        reset();
2789        let caps = lookup("openrouter", "google/gemini-2.5-flash");
2790        assert!(caps.prompt_caching);
2791        assert_eq!(caps.cache_breakpoint_style, "last_block");
2792    }
2793
2794    #[test]
2795    fn local_gemma4_exposes_native_tools_and_structured_output() {
2796        // Fix A: vLLM/SGLang serve Gemma 4 over the OpenAI-compatible surface,
2797        // so the local route must declare native tools + native structured
2798        // output like its hosted gemma-4 siblings — not silently fall back to
2799        // text tools.
2800        reset();
2801        let caps = lookup("local", "gemma-4-26b-a4b-it");
2802        assert!(caps.native_tools);
2803        assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
2804        assert_eq!(caps.structured_output.as_deref(), Some("native"));
2805    }
2806
2807    #[test]
2808    fn local_gemma4_exposes_vision_like_hosted_siblings() {
2809        // harn#3585: Gemma 4 is multimodal on every served surface. The local
2810        // OpenAI-compat route must declare vision so the derived structured
2811        // caps and emitted `capability_tags` agree with the gemini/openrouter/
2812        // together siblings.
2813        reset();
2814        for model in ["gemma-4-e4b-it", "gemma-4-e2b-it", "gemma-4-26b-a4b-it"] {
2815            let caps = lookup("local", model);
2816            assert!(
2817                caps.vision_supported,
2818                "local {model} should expose vision_supported"
2819            );
2820            let tags = crate::llm_config::capability_tags_from_capabilities(&caps);
2821            assert!(
2822                tags.iter().any(|t| t == "vision"),
2823                "local {model} emitted capability_tags should include `vision`, got {tags:?}"
2824            );
2825        }
2826    }
2827
2828    #[test]
2829    fn ollama_vision_models_have_no_reasoning_scaffold() {
2830        // Fix B: bakllava / llama3.2-vision / gemma3 are caption/vision models
2831        // with no reasoning capability; they must resolve to the "none" thinking
2832        // block style (like the llava sibling) so the template does not emit a
2833        // spurious "## Reasoning" scaffold.
2834        reset();
2835        for model in ["bakllava:latest", "llama3.2-vision:11b", "gemma3:27b"] {
2836            assert_eq!(
2837                lookup("ollama", model).thinking_block_style,
2838                "none",
2839                "{model} should resolve to thinking_block_style=\"none\""
2840            );
2841        }
2842        // Sibling sanity check.
2843        assert_eq!(
2844            lookup("ollama", "llava:latest").thinking_block_style,
2845            "none"
2846        );
2847    }
2848
2849    #[test]
2850    fn ollama_gemma4_supports_structured_output_and_text_tools() {
2851        // Fix C: Ollama honors the `format` kwarg, so both gemma4 rules must
2852        // declare structured_output="format_kw" (otherwise JSON/schema output
2853        // was blocked) plus explicit text tools for parity with the qwen rules.
2854        reset();
2855        for model in ["gemma4:12b-mlx", "gemma4:26b"] {
2856            let caps = lookup("ollama", model);
2857            assert_eq!(
2858                caps.structured_output.as_deref(),
2859                Some("format_kw"),
2860                "{model} should resolve structured_output=\"format_kw\""
2861            );
2862            assert!(!caps.native_tools, "{model} should use text tools");
2863            assert_eq!(
2864                caps.preferred_tool_format.as_deref(),
2865                Some("text"),
2866                "{model} should prefer text tool format"
2867            );
2868            assert_eq!(
2869                caps.thinking_block_style, "none",
2870                "{model} ships thinking-off"
2871            );
2872        }
2873    }
2874
2875    #[test]
2876    fn openrouter_inherits_openai() {
2877        reset();
2878        let caps = lookup("openrouter", "gpt-5.4");
2879        assert!(caps.defer_loading);
2880        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
2881        assert_eq!(caps.reasoning_wire_format.as_deref(), Some("openrouter"));
2882        assert!(!caps.top_k_supported);
2883    }
2884
2885    #[test]
2886    fn openrouter_kimi27_code_records_tool_choice_and_sampling_limits() {
2887        reset();
2888        let caps = lookup("openrouter", "moonshotai/kimi-k2.7-code");
2889        assert!(caps.native_tools);
2890        assert!(caps.prompt_caching);
2891        assert!(caps.vision_supported);
2892        assert!(caps.video);
2893        // 2026-06-24 forced-format sweep flipped this route native -> text:
2894        // native double-escaped backslash bodies (1/5) and fenced-JSON produced
2895        // no parseable Harn call (0/5); heredoc text was 5/5 byte-clean.
2896        assert_eq!(caps.preferred_tool_format.as_deref(), Some("text"));
2897        assert_eq!(caps.tool_mode_parity.as_deref(), Some("native_unreliable"));
2898        assert_eq!(caps.thinking_modes, vec!["enabled"]);
2899        assert_eq!(caps.allowed_tool_choice_modes, vec!["auto", "none"]);
2900        assert!(!caps.temperature_supported);
2901        assert!(!caps.top_p_supported);
2902        assert!(!caps.frequency_penalty_supported);
2903        assert!(!caps.presence_penalty_supported);
2904
2905        let prior = lookup("openrouter", "moonshotai/kimi-k2.6");
2906        assert!(prior.prompt_caching);
2907        assert!(prior.vision_supported);
2908        assert!(!prior.video);
2909        assert!(prior.allowed_tool_choice_modes.is_empty());
2910        assert!(prior.temperature_supported);
2911    }
2912
2913    #[test]
2914    fn qwen37_routes_record_prompt_cache_vision_and_streaming_quirks() {
2915        reset();
2916        let plus = lookup("openrouter", "qwen/qwen3.7-plus");
2917        assert!(plus.native_tools);
2918        assert!(plus.prompt_caching);
2919        assert!(plus.vision_supported);
2920        assert_eq!(plus.preferred_tool_format.as_deref(), Some("native"));
2921        assert_eq!(plus.thinking_modes, vec!["enabled"]);
2922        assert_eq!(
2923            plus.auto_reasoning_overrides
2924                .get("agent")
2925                .map(String::as_str),
2926            Some("off"),
2927            "Qwen tool-bearing agent turns should disable reasoning automatically",
2928        );
2929
2930        let max = lookup("openrouter", "qwen/qwen3.7-max");
2931        assert!(max.native_tools);
2932        assert!(max.prompt_caching);
2933        assert!(!max.vision_supported);
2934        assert_eq!(max.thinking_modes, vec!["enabled"]);
2935
2936        let together = lookup("together", "Qwen/Qwen3.7-Max");
2937        assert!(together.native_tools);
2938        assert!(together.prompt_caching);
2939        assert!(together.requires_streaming);
2940        assert!(!together.honors_chat_template_kwargs);
2941
2942        let glm = lookup("together", "zai-org/GLM-5.1");
2943        assert!(glm.native_tools);
2944        assert!(glm.prompt_caching);
2945        assert_eq!(glm.preferred_tool_format.as_deref(), Some("text"));
2946        assert_eq!(glm.tool_mode_parity.as_deref(), Some("native_unreliable"));
2947        assert_eq!(
2948            glm.auto_reasoning_overrides
2949                .get("agent")
2950                .map(String::as_str),
2951            Some("off"),
2952        );
2953
2954        let openrouter_glm = lookup("openrouter", "z-ai/glm-5.2");
2955        assert!(openrouter_glm.reasoning_effort_supported);
2956        assert_eq!(
2957            openrouter_glm.reasoning_effort_levels,
2958            vec!["high", "xhigh", "max"]
2959        );
2960        assert_eq!(
2961            openrouter_glm.preferred_tool_format.as_deref(),
2962            Some("text")
2963        );
2964
2965        let minimax = lookup("together", "MiniMaxAI/MiniMax-M2.7");
2966        assert!(minimax.native_tools);
2967        assert!(minimax.prompt_caching);
2968        // 2026-06-24 forced-format sweep flipped this route json -> text: heredoc
2969        // beat fenced-JSON on both dispatch and backslash-body fidelity at N=5.
2970        assert_eq!(minimax.preferred_tool_format.as_deref(), Some("text"));
2971        assert_eq!(
2972            minimax.tool_mode_parity.as_deref(),
2973            Some("native_unreliable")
2974        );
2975        assert!(!minimax.reasoning_text_promotable);
2976
2977        let step = lookup("openrouter", "stepfun/step-3.7-flash");
2978        assert!(step.native_tools);
2979        assert!(step.prompt_caching);
2980        assert!(!step.reasoning_disable_supported);
2981        assert_eq!(step.thinking_modes, vec!["enabled"]);
2982    }
2983
2984    #[test]
2985    fn openrouter_structured_routes_cover_current_open_models() {
2986        reset();
2987        for model in [
2988            "deepseek/deepseek-v4-flash",
2989            "mistralai/devstral-small",
2990            "meta-llama/llama-4-scout",
2991            "kwaipilot/kat-coder-pro-v2",
2992        ] {
2993            let caps = lookup("openrouter", model);
2994            assert!(caps.native_tools, "{model} should expose native tools");
2995            assert_eq!(caps.structured_output.as_deref(), Some("native"));
2996            assert_eq!(caps.structured_output_mode, "native_json");
2997        }
2998        assert!(lookup("openrouter", "deepseek/deepseek-v4-flash").top_k_supported);
2999        assert!(lookup("openrouter", "meta-llama/llama-4-scout").top_k_supported);
3000        assert!(!lookup("openrouter", "mistralai/devstral-small").top_k_supported);
3001        assert!(lookup("openrouter", "google/gemma-4-26b-a4b-it").top_k_supported);
3002    }
3003
3004    #[test]
3005    fn openrouter_anthropic_claude_models_support_native_tools() {
3006        // Regression for #2319: OpenRouter Anthropic slugs must match the
3007        // Anthropic capability rules before the OpenRouter -> OpenAI family
3008        // chain, otherwise native-tool requests get rejected as unsupported.
3009        reset();
3010        for model in [
3011            "anthropic/claude-haiku-4-5",
3012            "anthropic/claude-haiku-4-5-20251001",
3013            "anthropic/claude-sonnet-4-6",
3014            "anthropic/claude-sonnet-4-7",
3015            "anthropic/claude-opus-4-7",
3016        ] {
3017            let caps = lookup("openrouter", model);
3018            assert!(
3019                caps.native_tools,
3020                "{model} via openrouter should report native_tools=true",
3021            );
3022            assert!(
3023                caps.prompt_caching,
3024                "{model} via openrouter should report prompt_caching=true",
3025            );
3026            assert_eq!(
3027                caps.cache_breakpoint_style, "top_level",
3028                "{model} via openrouter should use top-level cache_control",
3029            );
3030            assert_eq!(
3031                caps.structured_output.as_deref(),
3032                Some("tool_use"),
3033                "{model} via openrouter should structured_output=tool_use (matches direct anthropic)",
3034            );
3035        }
3036    }
3037
3038    #[test]
3039    fn openrouter_deepseek_v32_defaults_to_text_tools() {
3040        reset();
3041        let caps = lookup("openrouter", "deepseek/deepseek-v3.2");
3042        assert!(caps.native_tools);
3043        assert!(caps.text_tool_wire_format_supported);
3044        assert_eq!(caps.preferred_tool_format.as_deref(), Some("text"));
3045        assert_eq!(caps.tool_mode_parity.as_deref(), Some("native_unreliable"));
3046        assert_eq!(caps.structured_output.as_deref(), Some("native"));
3047        assert!(caps.prompt_caching);
3048        assert_eq!(caps.cache_breakpoint_style, "last_block");
3049
3050        let automated = lookup("openrouter", "deepseek/deepseek-v3");
3051        assert!(automated.prompt_caching);
3052        assert_eq!(automated.cache_breakpoint_style, "none");
3053    }
3054
3055    #[test]
3056    fn openrouter_explicit_cache_routes_get_block_breakpoints() {
3057        reset();
3058        for model in [
3059            "qwen/qwen3.6-plus",
3060            "qwen/qwen3-coder-plus",
3061            "qwen/qwen3-coder-flash",
3062            "qwen/qwen3-max",
3063            "qwen/qwen-plus",
3064        ] {
3065            let caps = lookup("openrouter", model);
3066            assert!(caps.prompt_caching, "{model} should support prompt cache");
3067            assert_eq!(
3068                caps.cache_breakpoint_style, "last_block",
3069                "{model} should request explicit content-block cache breakpoints",
3070            );
3071        }
3072
3073        let open_weight = lookup("openrouter", "qwen/qwen3.6-35b-a3b");
3074        assert!(!open_weight.prompt_caching);
3075        assert_eq!(open_weight.cache_breakpoint_style, "none");
3076    }
3077
3078    #[test]
3079    fn openrouter_deepseek_alias_slugs_support_native_tools() {
3080        reset();
3081        for model in ["deepseek/deepseek-chat", "deepseek/deepseek-chat-v3-0324"] {
3082            let caps = lookup("openrouter", model);
3083            assert!(caps.native_tools, "{model} should expose native tools");
3084            assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
3085            assert_eq!(caps.structured_output.as_deref(), Some("native"));
3086            assert!(
3087                caps.thinking_modes.is_empty(),
3088                "{model} is not a reasoning route"
3089            );
3090            assert_eq!(caps.thinking_block_style, "none");
3091            assert!(
3092                caps.top_k_supported,
3093                "{model} should accept top_k through OpenRouter"
3094            );
3095        }
3096
3097        for model in [
3098            "deepseek/deepseek-chat-v3.1",
3099            "deepseek/deepseek-r1",
3100            "deepseek/deepseek-r1-0528",
3101        ] {
3102            let caps = lookup("openrouter", model);
3103            assert!(caps.native_tools, "{model} should expose native tools");
3104            assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
3105            assert_eq!(caps.structured_output.as_deref(), Some("native"));
3106            assert_eq!(caps.thinking_modes, vec!["enabled", "effort"]);
3107            assert_eq!(caps.thinking_block_style, "reasoning_summary");
3108            assert!(
3109                caps.top_k_supported,
3110                "{model} should accept top_k through OpenRouter"
3111            );
3112        }
3113
3114        assert!(!lookup("openrouter", "deepseek/deepseek-r1-distill-qwen-32b").native_tools);
3115    }
3116
3117    #[test]
3118    fn openrouter_qwen_coder_defaults_to_text_tools() {
3119        reset();
3120        let caps = lookup("openrouter", "qwen/qwen3-coder-flash");
3121        assert!(caps.native_tools);
3122        assert!(caps.text_tool_wire_format_supported);
3123        assert_eq!(caps.preferred_tool_format.as_deref(), Some("text"));
3124        assert_eq!(caps.tool_mode_parity.as_deref(), Some("native_unreliable"));
3125    }
3126
3127    #[test]
3128    fn bedrock_claude_uses_anthropic_wire_capabilities() {
3129        reset();
3130        let caps = lookup("bedrock", "anthropic.claude-3-5-sonnet-20240620-v1:0");
3131        assert!(caps.native_tools);
3132        assert_eq!(caps.message_wire_format, WireDialect::Anthropic);
3133        assert_eq!(caps.native_tool_wire_format, "anthropic");
3134    }
3135
3136    #[test]
3137    fn groq_inherits_openai_family_only() {
3138        reset();
3139        let caps = lookup("groq", "gpt-5.5-preview");
3140        assert!(caps.defer_loading);
3141    }
3142
3143    #[test]
3144    fn cerebras_inherits_openai_family() {
3145        reset();
3146        let caps = lookup("cerebras", "gpt-oss-120b");
3147        assert_eq!(caps.message_wire_format, WireDialect::OpenAiCompat);
3148        assert_eq!(caps.native_tool_wire_format, "openai");
3149        // gpt-oss uses NATIVE tool calls across cerebras/groq/together. Under
3150        // json/text it emits a bare {"tool","arguments"} dialect the
3151        // fenced-JSON parser rejects (zero parsed calls), so native is the only
3152        // working channel.
3153        assert!(caps.native_tools);
3154        assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
3155    }
3156
3157    #[test]
3158    fn cerebras_gpt_oss_declares_supported_reasoning_efforts() {
3159        // Cerebras GPT-OSS accepts low/medium/high only. The policy resolver
3160        // uses this list to floor `reasoning_policy: "off"` to `low` instead
3161        // of sending unsupported `none` or `minimal` values.
3162        reset();
3163        let caps = lookup("cerebras", "gpt-oss-120b");
3164        assert_cerebras_effort_reasoning("gpt-oss-120b", "reasoning_summary");
3165        assert!(!caps.reasoning_none_supported);
3166        assert_eq!(caps.reasoning_effort_levels, vec!["low", "medium", "high"]);
3167    }
3168
3169    #[test]
3170    fn gpt_oss_requires_reasoning_for_tools_with_provider_specific_tool_wire() {
3171        // gpt-oss (Harmony) calls tools INSIDE the chain-of-thought channel, so
3172        // reasoning-off breaks tool calling. Provider catch-all rules carry no
3173        // reasoning fields, so without a dedicated `*gpt-oss*` row gpt-oss
3174        // would fall through to reasoning-OFF and the eval loop would bill a
3175        // noncommittal. Tool wire support is provider-specific: the pay-per-token
3176        // routes (OpenRouter, Fireworks, DeepInfra, SambaNova) ride Harn's TEXT
3177        // channel — their provider-native Harmony path drops tool calls into the
3178        // reasoning/commentary channel (empty `tool_calls` / billed-noncommittal,
3179        // see the DeepInfra/SambaNova rows + vLLM #22578/#44216, SGLang
3180        // #8976/#10738, openai/harmony #68). Within the text channel they use the
3181        // escape-free heredoc (`text`) grammar rather than fenced-JSON, because
3182        // gpt-oss double-escapes the backslashes a JSON string arg requires and
3183        // corrupts `\\`-heavy code bodies (empirical A/B 2026-06-21: text beats
3184        // json on both dispatch and byte-fidelity). Only the native-clean direct
3185        // routes (Cerebras, Groq) still use provider-native tools.
3186        reset();
3187        for (provider, model, native_tools, preferred_tool_format) in [
3188            ("openrouter", "openai/gpt-oss-120b", false, "text"),
3189            (
3190                "fireworks",
3191                "accounts/fireworks/models/gpt-oss-120b",
3192                false,
3193                "text",
3194            ),
3195            ("deepinfra", "openai/gpt-oss-120b", false, "text"),
3196            ("sambanova", "sambanova/gpt-oss-120b", false, "text"),
3197            ("cerebras", "gpt-oss-120b", true, "native"),
3198            ("groq", "openai/gpt-oss-120b", true, "native"),
3199        ] {
3200            let caps = lookup(provider, model);
3201            assert!(
3202                caps.reasoning_required_for_tools,
3203                "{provider}/{model}: reasoning_required_for_tools must be true"
3204            );
3205            assert!(
3206                caps.reasoning_effort_supported,
3207                "{provider}/{model}: reasoning_effort_supported must be true"
3208            );
3209            assert_eq!(
3210                caps.reasoning_effort_levels,
3211                vec!["low", "medium", "high"],
3212                "{provider}/{model}: effort levels"
3213            );
3214            assert_eq!(caps.thinking_modes, vec!["effort"], "{provider}/{model}");
3215            assert_eq!(
3216                caps.native_tools, native_tools,
3217                "{provider}/{model}: native_tools"
3218            );
3219            assert_eq!(
3220                caps.preferred_tool_format.as_deref(),
3221                Some(preferred_tool_format),
3222                "{provider}/{model}: preferred tool format"
3223            );
3224            assert_eq!(
3225                caps.thinking_block_style, "reasoning_summary",
3226                "{provider}/{model}"
3227            );
3228        }
3229    }
3230
3231    #[test]
3232    fn cerebras_glm_47_supports_reasoning_none() {
3233        // Cerebras documents GLM 4.7's no-reasoning value as
3234        // reasoning_effort="none"; the older disable_reasoning knob is
3235        // deprecated. Keep the route on the same policy path as GPT-OSS.
3236        reset();
3237        let caps = lookup("cerebras", "zai-glm-4.7");
3238        assert_cerebras_effort_reasoning("zai-glm-4.7", "inline");
3239        assert!(caps.reasoning_none_supported);
3240    }
3241
3242    #[test]
3243    fn mock_with_claude_model_routes_to_anthropic() {
3244        reset();
3245        let caps = lookup("mock", "claude-sonnet-4-7");
3246        assert!(caps.defer_loading);
3247        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
3248    }
3249
3250    #[test]
3251    fn mock_with_gpt_model_routes_to_openai() {
3252        reset();
3253        let caps = lookup("mock", "gpt-5.4-preview");
3254        assert!(caps.defer_loading);
3255        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
3256    }
3257
3258    #[test]
3259    fn mock_with_gemini_model_routes_to_gemini() {
3260        reset();
3261        let caps = lookup("mock", "gemini-2.5-flash");
3262        assert_eq!(caps.message_wire_format, WireDialect::Gemini);
3263        assert_eq!(caps.native_tool_wire_format, "openai");
3264        assert!(caps.prefers_xml_scaffolding);
3265    }
3266
3267    #[test]
3268    fn qwen36_ollama_preserves_thinking() {
3269        reset();
3270        let caps = lookup("ollama", "qwen3.6:35b-a3b-coding-nvfp4");
3271        assert!(!caps.native_tools);
3272        assert_eq!(caps.json_schema.as_deref(), Some("format_kw"));
3273        assert!(!caps.thinking_modes.is_empty());
3274        assert!(
3275            caps.preserve_thinking,
3276            "Qwen3.6 should enable preserve_thinking by default for long-horizon loops"
3277        );
3278        assert_eq!(caps.server_parser, "none");
3279        assert!(!caps.honors_chat_template_kwargs);
3280        assert_eq!(caps.recommended_endpoint.as_deref(), Some("/api/chat"));
3281        assert!(caps.text_tool_wire_format_supported);
3282        assert!(caps.prefers_markdown_scaffolding);
3283        assert_eq!(caps.structured_output_mode, "delimited");
3284        assert!(!caps.prefers_xml_tools);
3285        assert_eq!(caps.thinking_block_style, "inline");
3286    }
3287
3288    #[test]
3289    fn qwen35_ollama_does_not_preserve_thinking() {
3290        reset();
3291        let caps = lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4");
3292        assert!(caps.native_tools);
3293        assert!(!caps.thinking_modes.is_empty());
3294        assert!(
3295            !caps.preserve_thinking,
3296            "Qwen3.5 lacks the preserve_thinking kwarg — rely on the chat template's rolling checkpoint instead"
3297        );
3298        assert_eq!(caps.server_parser, "ollama_qwen3coder");
3299        assert!(!caps.text_tool_wire_format_supported);
3300    }
3301
3302    #[test]
3303    fn qwen36_routed_providers_all_preserve_thinking() {
3304        reset();
3305        for (provider, model) in [
3306            ("openrouter", "qwen/qwen3.6-plus"),
3307            ("together", "Qwen/Qwen3.6-Plus"),
3308            ("huggingface", "Qwen/Qwen3.6-35B-A3B"),
3309            ("fireworks", "accounts/fireworks/models/qwen3p6-plus"),
3310            ("dashscope", "qwen3.6-plus"),
3311            ("local", "Qwen3.6-35B-A3B"),
3312            ("mlx", "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit"),
3313            ("mlx", "Qwen/Qwen3.6-35B-A3B"),
3314        ] {
3315            let caps = lookup(provider, model);
3316            assert!(
3317                !caps.thinking_modes.is_empty(),
3318                "{provider}/{model}: thinking"
3319            );
3320            assert!(
3321                caps.preserve_thinking,
3322                "{provider}/{model}: preserve_thinking must be on for Qwen3.6"
3323            );
3324            assert!(caps.native_tools, "{provider}/{model}: native_tools");
3325            assert_ne!(
3326                caps.server_parser, "ollama_qwen3coder",
3327                "{provider}/{model}: only Ollama routes through the qwen3coder response parser"
3328            );
3329        }
3330
3331        let caps = lookup("llamacpp", "unsloth/Qwen3.6-35B-A3B-GGUF");
3332        assert!(!caps.thinking_modes.is_empty());
3333        assert!(caps.preserve_thinking);
3334        assert!(!caps.native_tools);
3335        assert!(caps.text_tool_wire_format_supported);
3336        assert_eq!(caps.server_parser, "none");
3337    }
3338
3339    #[test]
3340    fn qwen_coder_models_do_not_claim_thinking_modes() {
3341        reset();
3342        for (provider, model) in [
3343            ("together", "Qwen/Qwen3-Coder-Next-FP8"),
3344            ("together", "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"),
3345            ("openrouter", "qwen/qwen3-coder-next"),
3346            ("huggingface", "Qwen/Qwen3-Coder-Next"),
3347        ] {
3348            let caps = lookup(provider, model);
3349            assert!(caps.native_tools, "{provider}/{model}: native_tools");
3350            assert!(
3351                caps.thinking_modes.is_empty(),
3352                "{provider}/{model}: coder models are non-thinking routes"
3353            );
3354            assert!(
3355                !caps.preserve_thinking,
3356                "{provider}/{model}: preserve_thinking must stay off"
3357            );
3358            assert!(
3359                caps.thinking_disable_directive.is_none(),
3360                "{provider}/{model}: no /no_think shim should be needed"
3361            );
3362        }
3363    }
3364
3365    #[test]
3366    fn llamacpp_qwen_keeps_text_tool_wire_format() {
3367        reset();
3368        let caps = lookup("llamacpp", "unsloth/Qwen3.5-Coder-GGUF");
3369        assert_eq!(caps.server_parser, "none");
3370        assert!(caps.honors_chat_template_kwargs);
3371        assert!(!caps.native_tools);
3372        assert!(caps.text_tool_wire_format_supported);
3373        assert_eq!(
3374            caps.recommended_endpoint.as_deref(),
3375            Some("/v1/chat/completions")
3376        );
3377    }
3378
3379    #[test]
3380    fn devstral_local_routes_default_to_json_tools() {
3381        reset();
3382        for provider in ["ollama", "llamacpp"] {
3383            let caps = lookup(provider, "devstral-small-2:24b");
3384            assert!(!caps.native_tools, "{provider}: native tools stay opt-in");
3385            assert!(
3386                caps.text_tool_wire_format_supported,
3387                "{provider}: text tools should remain available"
3388            );
3389            // devstral has no reserved-token constraint, so it uses the global
3390            // `json` (fenced-JSON) text-channel default. Heredoc stays
3391            // reachable via an explicit `preferred_tool_format = "text"` pin.
3392            assert_eq!(
3393                caps.preferred_tool_format.as_deref(),
3394                Some("json"),
3395                "{provider}: devstral inherits the global json default"
3396            );
3397        }
3398    }
3399
3400    #[test]
3401    fn openrouter_mistral_routes_use_native_tools() {
3402        reset();
3403        let caps = lookup("openrouter", "mistralai/mistral-small-2603");
3404        assert!(caps.native_tools);
3405        assert!(caps.text_tool_wire_format_supported);
3406        assert_eq!(caps.structured_output.as_deref(), Some("native"));
3407        assert_eq!(caps.structured_output_mode, "native_json");
3408    }
3409
3410    #[test]
3411    fn dashscope_and_llamacpp_resolve_capabilities() {
3412        reset();
3413        // New sibling providers should fall through to `openai` for
3414        // gpt-*  models even without dedicated rules.
3415        let caps = lookup("dashscope", "gpt-5.4-preview");
3416        assert!(caps.defer_loading);
3417        let caps = lookup("llamacpp", "gpt-5.4-preview");
3418        assert!(caps.defer_loading);
3419    }
3420
3421    #[test]
3422    fn unknown_provider_has_no_capabilities() {
3423        reset();
3424        let caps = lookup("my-custom-proxy", "foo-bar-1");
3425        assert!(!caps.native_tools);
3426        assert!(!caps.defer_loading);
3427        assert!(caps.tool_search.is_empty());
3428    }
3429
3430    #[test]
3431    fn openrouter_specific_rules_win_and_family_inheritance_is_preserved() {
3432        // Capability resolution is first-match-wins over fragment order
3433        // (`first_matching_rule_in_file` -> `Iterator::find`), and when no
3434        // `provider.openrouter` rule matches it walks the `[provider_family]`
3435        // chain (openrouter -> openai). Both contracts must hold so that:
3436        //   1. a specific OpenRouter carve-out beats a broader OpenRouter rule,
3437        //   2. gpt-/o-family slugs routed through OpenRouter still inherit the
3438        //      rich openai-family capability set (a blanket `*` openrouter row
3439        //      would shadow this — see the catalog-or-defaults report).
3440        reset();
3441
3442        // 1. Specific carve-out wins: deepseek/deepseek-v3.2 is pinned to the
3443        // Harn text-tool channel even though the broader deepseek/deepseek-v3*
3444        // rule below it would otherwise resolve `native`.
3445        let deepseek = lookup("openrouter", "deepseek/deepseek-v3.2");
3446        assert_eq!(
3447            deepseek.preferred_tool_format.as_deref(),
3448            Some("text"),
3449            "deepseek-v3.2 text carve-out must win over the broader deepseek-v3* rule"
3450        );
3451        assert_eq!(
3452            deepseek.tool_mode_parity.as_deref(),
3453            Some("native_unreliable")
3454        );
3455        // The broader sibling still resolves native for non-3.2 v3 slugs.
3456        assert_eq!(
3457            lookup("openrouter", "deepseek/deepseek-v3-base")
3458                .preferred_tool_format
3459                .as_deref(),
3460            Some("native")
3461        );
3462
3463        // 2. Family inheritance preserved: an openai-prefixed slug routed via
3464        // OpenRouter still picks up openai-family reasoning fields.
3465        let prefixed = lookup("openrouter", "openai/o4-mini");
3466        assert!(prefixed.requires_completion_tokens);
3467        assert!(prefixed.reasoning_effort_supported);
3468
3469        // The newly added MiniMax M2.5 OR mirror resolves native via the
3470        // existing `minimax/minimax-m2*` rule.
3471        let m25 = lookup("openrouter", "minimax/minimax-m2.5");
3472        assert!(m25.native_tools);
3473        assert_eq!(m25.preferred_tool_format.as_deref(), Some("native"));
3474    }
3475
3476    #[test]
3477    fn enterprise_routes_expose_format_preferences() {
3478        reset();
3479        let bedrock_claude = lookup("bedrock", "anthropic.claude-opus-4-7-v1:0");
3480        assert!(bedrock_claude.prefers_xml_scaffolding);
3481        assert_eq!(bedrock_claude.structured_output_mode, "xml_tagged");
3482        assert!(!bedrock_claude.supports_assistant_prefill);
3483        assert!(bedrock_claude.prefers_xml_tools);
3484
3485        let azure_o = lookup("azure_openai", "o3-prod");
3486        assert!(azure_o.prefers_markdown_scaffolding);
3487        assert_eq!(azure_o.structured_output_mode, "native_json");
3488        assert!(azure_o.prefers_role_developer);
3489        assert_eq!(azure_o.thinking_block_style, "reasoning_summary");
3490    }
3491
3492    #[test]
3493    fn user_override_adds_new_provider() {
3494        reset();
3495        let toml_src = concat!(
3496            "[[provider.my-proxy]]\n",
3497            "model_match = \"*\"\n",
3498            "native_tools = true\n",
3499            "tool_search = [\"hosted\"]\n",
3500            "prefers_xml_scaffolding = true\n",
3501            "structured_output_mode = \"xml_tagged\"\n",
3502            "supports_assistant_prefill = true\n",
3503            "prefers_xml_tools = true\n",
3504            "thinking_block_style = \"thinking_blocks\"\n",
3505        );
3506        set_user_overrides_toml(toml_src).unwrap();
3507        let caps = lookup("my-proxy", "anything");
3508        assert!(caps.native_tools);
3509        assert_eq!(caps.tool_search, vec!["hosted"]);
3510        assert!(caps.prefers_xml_scaffolding);
3511        assert_eq!(caps.structured_output_mode, "xml_tagged");
3512        assert!(caps.supports_assistant_prefill);
3513        assert!(caps.prefers_xml_tools);
3514        assert_eq!(caps.thinking_block_style, "thinking_blocks");
3515        clear_user_overrides();
3516    }
3517
3518    #[test]
3519    fn user_override_takes_precedence_over_builtin() {
3520        reset();
3521        let toml_src = r#"
3522[[provider.anthropic]]
3523model_match = "claude-opus-*"
3524native_tools = true
3525defer_loading = false
3526tool_search = []
3527"#;
3528        set_user_overrides_toml(toml_src).unwrap();
3529        let caps = lookup("anthropic", "claude-opus-4-7");
3530        assert!(caps.native_tools);
3531        assert!(!caps.defer_loading);
3532        assert!(caps.tool_search.is_empty());
3533        clear_user_overrides();
3534    }
3535
3536    #[test]
3537    fn user_override_from_manifest_toml() {
3538        reset();
3539        let manifest = concat!(
3540            "[package]\n",
3541            "name = \"demo\"\n\n",
3542            "[[capabilities.provider.my-proxy]]\n",
3543            "model_match = \"*\"\n",
3544            "native_tools = true\n",
3545            "tool_search = [\"hosted\"]\n",
3546            "prefers_markdown_scaffolding = true\n",
3547            "structured_output_mode = \"native_json\"\n",
3548            "prefers_role_developer = true\n",
3549            "thinking_block_style = \"reasoning_summary\"\n",
3550        );
3551        set_user_overrides_from_manifest_toml(manifest).unwrap();
3552        let caps = lookup("my-proxy", "foo");
3553        assert!(caps.native_tools);
3554        assert_eq!(caps.tool_search, vec!["hosted"]);
3555        assert!(caps.prefers_markdown_scaffolding);
3556        assert_eq!(caps.structured_output_mode, "native_json");
3557        assert!(caps.prefers_role_developer);
3558        assert_eq!(caps.thinking_block_style, "reasoning_summary");
3559        clear_user_overrides();
3560    }
3561
3562    #[test]
3563    fn version_min_requires_parseable_model() {
3564        reset();
3565        let toml_src = r#"
3566[[provider.custom]]
3567model_match = "*"
3568version_min = [5, 4]
3569native_tools = true
3570"#;
3571        set_user_overrides_toml(toml_src).unwrap();
3572        // Unparseable model ID + version_min → rule doesn't match.
3573        let caps = lookup("custom", "mystery-model");
3574        assert!(!caps.native_tools);
3575        clear_user_overrides();
3576    }
3577
3578    #[test]
3579    fn glob_match_substring() {
3580        assert!(glob_match("*gpt*", "openai/gpt-5.4"));
3581        assert!(glob_match("*claude*", "anthropic/claude-opus-4-7"));
3582        assert!(!glob_match("*xyz*", "openai/gpt-5.4"));
3583    }
3584
3585    #[test]
3586    fn openrouter_namespaced_anthropic_model() {
3587        reset();
3588        let caps = lookup("anthropic", "anthropic/claude-opus-4-7");
3589        assert!(caps.defer_loading);
3590    }
3591
3592    #[test]
3593    fn matrix_rows_include_provider_patterns_and_sources() {
3594        reset();
3595        let rows = matrix_rows();
3596        assert!(rows.iter().any(|row| {
3597            row.provider == "openai"
3598                && row.model == "gpt-4o*"
3599                && row.vision
3600                && row.audio
3601                && row.json_schema.as_deref() == Some("native")
3602                && row.source == "builtin"
3603        }));
3604    }
3605
3606    #[test]
3607    fn validate_tool_format_autocorrects_native_pin_on_native_unreliable_route() {
3608        reset();
3609        // DeepSeek V3.2 on OpenRouter: tool_mode_parity = native_unreliable,
3610        // preferred_tool_format = text. A `native` request is the footgun — it
3611        // drops to unparsed DSML text and gets rejected. The gate must steer it
3612        // to the route's preferred text-channel format and explain why.
3613        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3.2", "native");
3614        assert_eq!(
3615            decision.effective, "text",
3616            "native must be auto-corrected to the route's preferred text format"
3617        );
3618        let reason = decision.correction.expect("a correction must be reported");
3619        assert!(reason.contains("native"), "names the rejected format");
3620        assert!(reason.contains("native_unreliable"), "names the parity");
3621        assert!(reason.contains("text"), "names the working alternative");
3622    }
3623
3624    #[test]
3625    fn validate_tool_format_passes_through_safe_combos() {
3626        reset();
3627        // A native-capable route with no adverse parity keeps the requested
3628        // native format untouched (no spurious correction).
3629        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3-base", "native");
3630        assert_eq!(decision.effective, "native");
3631        assert!(decision.correction.is_none());
3632
3633        // The same native_unreliable route is fine when text is requested.
3634        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3.2", "text");
3635        assert_eq!(decision.effective, "text");
3636        assert!(decision.correction.is_none());
3637
3638        // json is also a text-channel grammar and is accepted on a text route.
3639        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3.2", "json");
3640        assert_eq!(decision.effective, "json");
3641        assert!(decision.correction.is_none());
3642    }
3643
3644    #[test]
3645    fn validate_tool_format_leaves_unknown_routes_and_formats_alone() {
3646        reset();
3647        // Unknown provider/model has parity = unknown -> no opinion, pass through.
3648        let decision = validate_tool_format("my-proxy", "mystery-1", "native");
3649        assert_eq!(decision.effective, "native");
3650        assert!(decision.correction.is_none());
3651
3652        // An unclassifiable tool_format string is not ours to rewrite.
3653        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3.2", "frobnicate");
3654        assert_eq!(decision.effective, "frobnicate");
3655        assert!(decision.correction.is_none());
3656    }
3657
3658    #[test]
3659    fn validate_tool_format_steers_off_text_on_native_only_route() {
3660        reset();
3661        // Synthesize a native_only route via a project override and confirm a
3662        // text request is steered to native (the symmetric direction).
3663        let overrides: CapabilitiesFile = toml::from_str(
3664            "[[provider.acme]]\n\
3665             model_match = \"native-only-*\"\n\
3666             native_tools = true\n\
3667             text_tool_wire_format_supported = false\n\
3668             tool_mode_parity = \"native_only\"\n\
3669             preferred_tool_format = \"native\"\n",
3670        )
3671        .expect("override parses");
3672        let caps = lookup_with_user_overrides("acme", "native-only-1", Some(&overrides));
3673        let decision = validate_tool_format_with_caps("acme", "native-only-1", "text", &caps);
3674        assert_eq!(decision.effective, "native");
3675        let reason = decision
3676            .correction
3677            .expect("text on native_only is corrected");
3678        assert!(reason.contains("native_only"));
3679    }
3680
3681    #[test]
3682    fn validate_tool_format_honors_structural_text_unsupported_bit() {
3683        reset();
3684        // Real shipping route: ollama/qwen3* declares native_tools = true and
3685        // text_tool_wire_format_supported = false with NO tool_mode_parity
3686        // string. The gate's contract ("always yields parseable tool calls")
3687        // must hold from the structural bit alone — a text/json request is
3688        // steered to native, not passed through onto an unsupported channel.
3689        let caps = lookup("ollama", "qwen3-coder:30b");
3690        assert!(!caps.text_tool_wire_format_supported);
3691        for requested in ["text", "json"] {
3692            let decision =
3693                validate_tool_format_with_caps("ollama", "qwen3-coder:30b", requested, &caps);
3694            assert_eq!(
3695                decision.effective, "native",
3696                "{requested} must be steered to native on a text-unsupported route"
3697            );
3698            assert!(decision.correction.is_some());
3699        }
3700        // native is the route's working channel — untouched.
3701        let native = validate_tool_format_with_caps("ollama", "qwen3-coder:30b", "native", &caps);
3702        assert_eq!(native.effective, "native");
3703        assert!(native.correction.is_none());
3704    }
3705
3706    #[test]
3707    fn tool_format_resolution_is_serving_stack_aware_for_same_weights() {
3708        // The (model x serving-stack) insight: the SAME Qwen3.6 weights resolve
3709        // to DIFFERENT working tool-call channels depending on who serves them.
3710        // This divergence lives in the capability matrix as data (provider rows),
3711        // NOT in alias pins — so an alias refactor must not be able to regress
3712        // it. Locking the three live serving stacks here makes that explicit.
3713        reset();
3714
3715        // llama.cpp (:8001) — native is probe-validated and trusted.
3716        let llamacpp = validate_tool_format("llamacpp", "qwen3.6-35b-a3b-ud-q4-k-xl", "native");
3717        assert_eq!(
3718            llamacpp.effective, "native",
3719            "llama.cpp serves qwen3.6 native"
3720        );
3721        assert!(llamacpp.correction.is_none());
3722
3723        // Ollama (/v1) — the embedded qwen tool-call parser 500s on text-mode
3724        // output, so this route is served on the text/json channel: a native
3725        // request must be auto-corrected to json (never silently dropped).
3726        let ollama = validate_tool_format("ollama", "qwen3.6-35b-a3b", "native");
3727        assert_eq!(
3728            ollama.effective, "json",
3729            "ollama qwen3.6 must steer native -> json (server-side parser 500 leak)"
3730        );
3731        assert!(
3732            ollama.correction.is_some(),
3733            "the native->json steer must be explained, not silent"
3734        );
3735
3736        // A native_unreliable cloud route (deepinfra GLM-5) carries the same
3737        // serving-stack verdict via tool_mode_parity + empirical notes, and is
3738        // likewise steered off native.
3739        let glm = validate_tool_format("deepinfra", "deepinfra/glm-5.2", "native");
3740        assert_eq!(glm.effective, "json");
3741        assert!(glm.correction.is_some());
3742    }
3743
3744    #[test]
3745    fn validate_tool_format_passes_through_when_no_channel_works() {
3746        reset();
3747        // A route with no working tool surface — text_only parity forbids the
3748        // native channel, and text_tool_wire_format_supported = false forbids
3749        // the text channel — so BOTH channels are forbidden. The gate has
3750        // nothing better to steer to; it must NOT rewrite to an equally broken
3751        // format under a misleading correction. Pass through unchanged.
3752        let overrides: CapabilitiesFile = toml::from_str(
3753            "[[provider.acme]]\n\
3754             model_match = \"no-tools-*\"\n\
3755             native_tools = false\n\
3756             tool_mode_parity = \"text_only\"\n\
3757             text_tool_wire_format_supported = false\n",
3758        )
3759        .expect("override parses");
3760        let caps = lookup_with_user_overrides("acme", "no-tools-1", Some(&overrides));
3761        for requested in ["native", "text", "json"] {
3762            let decision = validate_tool_format_with_caps("acme", "no-tools-1", requested, &caps);
3763            assert_eq!(
3764                decision.effective, requested,
3765                "{requested} passes through unchanged"
3766            );
3767            assert!(decision.correction.is_none());
3768        }
3769    }
3770
3771    /// FOOTGUN-REMOVAL — gpt-oss (Harmony) on the pay-per-token DeepInfra and
3772    /// SambaNova routes drops tool calls into the reasoning channel on native, so
3773    /// a `native` pin must auto-correct to the route's `text` channel with an
3774    /// explanatory correction. The known-good native routes (cerebras gpt-oss,
3775    /// sambanova minimax) must stay untouched.
3776    #[test]
3777    fn validate_tool_format_autocorrects_gpt_oss_native_pin_to_text() {
3778        reset();
3779        for (provider, model) in [
3780            ("deepinfra", "deepinfra/openai/gpt-oss-120b"),
3781            ("sambanova", "sambanova/gpt-oss-120b"),
3782        ] {
3783            let decision = validate_tool_format(provider, model, "native");
3784            assert_eq!(
3785                decision.effective, "text",
3786                "{provider}/{model}: native must auto-correct to text"
3787            );
3788            let reason = decision
3789                .correction
3790                .unwrap_or_else(|| panic!("{provider}/{model}: a correction must be reported"));
3791            assert!(
3792                reason.contains("native_unreliable"),
3793                "{provider}/{model}: names the parity"
3794            );
3795            assert!(
3796                reason.contains("text"),
3797                "{provider}/{model}: names the working alternative"
3798            );
3799            // text is already safe and passes through unchanged.
3800            let text = validate_tool_format(provider, model, "text");
3801            assert_eq!(text.effective, "text");
3802            assert!(text.correction.is_none());
3803        }
3804    }
3805
3806    /// FOOTGUN-REMOVAL — the GLM-5.x native channel emits `<tool_call>` markup
3807    /// instead of provider-native `tool_calls`, so the zai-direct GLM rows pin
3808    /// text and a `native` pin must auto-correct, matching the Fireworks/
3809    /// DeepInfra/Baseten precedents.
3810    #[test]
3811    fn validate_tool_format_autocorrects_zai_glm_native_pin_to_text() {
3812        reset();
3813        for model in ["glm-5.2", "glm-5.1", "glm-5"] {
3814            let decision = validate_tool_format("zai", model, "native");
3815            assert_eq!(
3816                decision.effective, "text",
3817                "zai/{model}: native must auto-correct to text"
3818            );
3819            let reason = decision
3820                .correction
3821                .unwrap_or_else(|| panic!("zai/{model}: a correction must be reported"));
3822            assert!(
3823                reason.contains("native_unreliable"),
3824                "zai/{model}: names the parity"
3825            );
3826        }
3827    }
3828
3829    /// The known-good native routes must NOT be touched by the gpt-oss/GLM
3830    /// pins above — a native pin stays native with no spurious correction.
3831    #[test]
3832    fn validate_tool_format_leaves_known_good_native_routes_unchanged() {
3833        reset();
3834        for (provider, model) in [
3835            // cerebras gpt-oss is native-clean (only throttled).
3836            ("cerebras", "gpt-oss-120b"),
3837            // sambanova deepseek-v3.2 is native and interchangeable; minimax is
3838            // native_unreliable upstream and is not a known-good native
3839            // exemplar.
3840            ("sambanova", "DeepSeek-V3.2"),
3841        ] {
3842            let decision = validate_tool_format(provider, model, "native");
3843            assert_eq!(
3844                decision.effective, "native",
3845                "{provider}/{model}: known-good native route must stay native"
3846            );
3847            assert!(
3848                decision.correction.is_none(),
3849                "{provider}/{model}: no spurious correction"
3850            );
3851        }
3852    }
3853
3854    /// FOOTGUN-REMOVAL — the first-class no-viable-channel guard fires when BOTH
3855    /// channels are forbidden (a route the registry trusts on neither native nor
3856    /// text), naming the bad combo and a suggested alternative — never a silent
3857    /// empty tool stream.
3858    #[test]
3859    fn no_viable_tool_channel_guard_fires_only_when_both_channels_forbidden() {
3860        reset();
3861        // Construct a gpt-oss route with NO working channel: native_unreliable
3862        // forbids native, and text_tool_wire_format_supported = false forbids the
3863        // text channel too.
3864        let overrides: CapabilitiesFile = toml::from_str(
3865            "[[provider.acme]]\n\
3866             model_match = \"acme/gpt-oss-stub\"\n\
3867             native_tools = false\n\
3868             tool_mode_parity = \"native_unreliable\"\n\
3869             text_tool_wire_format_supported = false\n",
3870        )
3871        .expect("override parses");
3872        let caps = lookup_with_user_overrides("acme", "acme/gpt-oss-stub", Some(&overrides));
3873        let message = no_viable_tool_channel_with_caps("acme", "acme/gpt-oss-stub", &caps)
3874            .expect("the guard must fire when neither channel works");
3875        assert!(
3876            message.contains("no viable tool-calling channel"),
3877            "names the failure: {message}"
3878        );
3879        assert!(
3880            message.contains("acme/gpt-oss-stub"),
3881            "names the bad combo: {message}"
3882        );
3883        // gpt-oss models get the Harmony-specific text-channel hint.
3884        assert!(
3885            message.contains("gpt-oss") && message.contains("text"),
3886            "suggests an alternative: {message}"
3887        );
3888
3889        // The DeepInfra/SambaNova gpt-oss rows keep a working text channel, so
3890        // the guard must NOT fire on them (they auto-correct instead).
3891        assert!(
3892            no_viable_tool_channel("deepinfra", "deepinfra/openai/gpt-oss-120b").is_none(),
3893            "auto-correctable route must not trip the fail-fast guard"
3894        );
3895        assert!(
3896            no_viable_tool_channel("sambanova", "sambanova/gpt-oss-120b").is_none(),
3897            "auto-correctable route must not trip the fail-fast guard"
3898        );
3899        // A healthy native-clean route never trips it.
3900        assert!(
3901            no_viable_tool_channel("cerebras", "gpt-oss-120b").is_none(),
3902            "healthy native route must not trip the guard"
3903        );
3904        // The generic (non-gpt-oss) no-channel case still fires with a generic
3905        // hint.
3906        let generic: CapabilitiesFile = toml::from_str(
3907            "[[provider.acme]]\n\
3908             model_match = \"mystery-1\"\n\
3909             native_tools = false\n\
3910             tool_mode_parity = \"text_only\"\n\
3911             text_tool_wire_format_supported = false\n",
3912        )
3913        .expect("override parses");
3914        let caps = lookup_with_user_overrides("acme", "mystery-1", Some(&generic));
3915        let message = no_viable_tool_channel_with_caps("acme", "mystery-1", &caps)
3916            .expect("guard fires on the generic no-channel route too");
3917        assert!(
3918            message.contains("harn provider catalog matrix"),
3919            "{message}"
3920        );
3921    }
3922
3923    // --- `extends = true` field-wise fall-through ---
3924
3925    /// Resolve capabilities for a synthetic provider whose rules come entirely
3926    /// from `src`: the parsed file is passed as the builtin base with no user
3927    /// layer, so no shipped rule interferes with the `extends` assertions.
3928    fn extends_caps(src: &str) -> Capabilities {
3929        let file = parse_capabilities_toml(src).expect("test capabilities toml parses");
3930        lookup_with("testprov", "test-model", &file, None)
3931    }
3932
3933    #[test]
3934    fn extends_rule_fills_unset_fields_from_later_matching_rule() {
3935        // Rule 1 opts into `extends` and sets only native_tools; rule 2 (lower
3936        // precedence, same match) supplies the fields the chain left unset.
3937        let caps = extends_caps(
3938            r#"
3939[[provider.testprov]]
3940model_match = "test-*"
3941extends = true
3942native_tools = true
3943
3944[[provider.testprov]]
3945model_match = "test-*"
3946vision = true
3947message_wire_format = "anthropic"
3948"#,
3949        );
3950        assert!(caps.native_tools, "field from the extends rule applies");
3951        assert!(
3952            caps.vision,
3953            "unset field filled from the later matching rule"
3954        );
3955        assert_eq!(caps.message_wire_format, WireDialect::Anthropic);
3956    }
3957
3958    #[test]
3959    fn non_extends_rule_terminates_resolution_unchanged() {
3960        // Without `extends`, the first match wins outright and the later
3961        // rule's vision never applies — the pre-`extends` first-match-wins
3962        // behavior is preserved.
3963        let caps = extends_caps(
3964            r#"
3965[[provider.testprov]]
3966model_match = "test-*"
3967native_tools = true
3968
3969[[provider.testprov]]
3970model_match = "test-*"
3971vision = true
3972"#,
3973        );
3974        assert!(caps.native_tools);
3975        assert!(
3976            !caps.vision,
3977            "a non-extends first match must not absorb later rules"
3978        );
3979    }
3980
3981    #[test]
3982    fn extends_rule_does_not_override_explicitly_set_field() {
3983        // The higher-precedence extends rule's explicit native_tools = true
3984        // wins; the later rule only fills fields the chain left unset, so its
3985        // native_tools = false is ignored while its vision still applies.
3986        let caps = extends_caps(
3987            r#"
3988[[provider.testprov]]
3989model_match = "test-*"
3990extends = true
3991native_tools = true
3992
3993[[provider.testprov]]
3994model_match = "test-*"
3995native_tools = false
3996vision = true
3997"#,
3998        );
3999        assert!(
4000            caps.native_tools,
4001            "the extends rule's explicit value is not overridden by a lower rule"
4002        );
4003        assert!(caps.vision, "still fills the field the chain left unset");
4004    }
4005
4006    #[test]
4007    fn extends_chain_falls_through_to_provider_defaults() {
4008        // An unterminated extends chain (no later matching rule) fills its
4009        // remaining gaps from provider defaults.
4010        let caps = extends_caps(
4011            r#"
4012[provider_defaults.testprov]
4013seed_supported = true
4014
4015[[provider.testprov]]
4016model_match = "test-*"
4017extends = true
4018native_tools = true
4019"#,
4020        );
4021        assert!(caps.native_tools, "field from the extends rule applies");
4022        assert!(
4023            caps.seed_supported,
4024            "unset field filled from provider defaults"
4025        );
4026    }
4027}