harn_vm/llm/
capabilities.rs

1//! Data-driven provider capabilities.
2//!
3//! The per-(provider, model) capability matrix (native tools, deferred
4//! tool loading, tool-search variants, prompt caching, extended thinking,
5//! max tool count) lives in `capability_sources/**/*.toml`, which generates
6//! the shipped `capabilities.toml` snapshot, and is
7//! overridable per-project via `[[capabilities.provider.<name>]]` blocks
8//! in `harn.toml`. This module owns:
9//!
10//! - loading the built-in TOML (compiled in via `include_str!`);
11//! - merging user overrides on top;
12//! - matching a `(provider, model)` pair against the rule list with
13//!   glob + semver semantics;
14//! - exposing a stable `Capabilities` struct that the `LlmProvider`
15//!   trait delegates to as the single source of truth.
16//!
17//! Provider adapters still supply generation parsers for `version_min`, but
18//! feature gates live in this data table instead of adapter-specific boolean
19//! branches.
20
21use std::cell::RefCell;
22use std::collections::{BTreeMap, HashSet};
23use std::sync::OnceLock;
24
25use serde::{Deserialize, Serialize};
26
27use super::providers::anthropic::claude_generation;
28use super::providers::openai_compat::gpt_generation;
29
30/// Generated shipped default rules. Compiled into the binary at build time.
31const BUILTIN_TOML: &str = include_str!("capabilities.toml");
32/// Generated provider/model snapshot built from catalog_sources/**/*.toml.
33const BUILTIN_PROVIDERS_TOML: &str = include_str!("providers.toml");
34
35/// Parsed on-disk capabilities schema. Public so harn-cli can
36/// construct one directly when wiring harn.toml overrides.
37#[derive(Debug, Clone, Deserialize, Default)]
38pub struct CapabilitiesFile {
39    /// Per-provider ordered rule lists. The first matching rule wins; a
40    /// matching rule with `extends = true` contributes only the fields it
41    /// sets and lets resolution continue to later matching rules (see
42    /// [`ProviderRule::extends`]).
43    #[serde(default)]
44    pub provider: BTreeMap<String, Vec<ProviderRule>>,
45    /// Per-provider defaults applied to every matching row and to
46    /// provider/model pairs that have no model-specific row. This keeps
47    /// transport-shape facts in data without repeating them on every
48    /// generation-specific capability row.
49    #[serde(default)]
50    pub provider_defaults: BTreeMap<String, ProviderDefaults>,
51    /// Sibling → canonical family mapping. Providers with no rule of
52    /// their own fall through to the named family (recursively).
53    #[serde(default)]
54    pub provider_family: BTreeMap<String, String>,
55}
56
57/// Provider-wide default fields merged into matching rules.
58#[derive(Debug, Clone, Deserialize, Default)]
59pub struct ProviderDefaults {
60    /// Message/request/response wire format used by shared helpers.
61    /// Known values are `openai`, `anthropic`, `gemini`, and `ollama`.
62    #[serde(default)]
63    pub message_wire_format: Option<String>,
64    /// Native tool definition wire shape. Known values are `openai`
65    /// and `anthropic`.
66    #[serde(default)]
67    pub native_tool_wire_format: Option<String>,
68    /// Whether image content blocks may reference remote URLs.
69    #[serde(default)]
70    pub image_url_input_supported: Option<bool>,
71    /// File-upload transport used by `std/files.upload`. Known values
72    /// are `anthropic` and `gemini`.
73    #[serde(default)]
74    pub file_upload_wire_format: Option<String>,
75    /// Provider-specific reasoning request shape for OpenAI-compatible
76    /// transports. Known values are `openrouter` and `enabled`.
77    #[serde(default)]
78    pub reasoning_wire_format: Option<String>,
79    #[serde(default)]
80    pub files_api_supported: Option<bool>,
81    #[serde(default)]
82    pub seed_supported: Option<bool>,
83    #[serde(default)]
84    pub top_k_supported: Option<bool>,
85    #[serde(default)]
86    pub temperature_supported: Option<bool>,
87    #[serde(default)]
88    pub top_p_supported: Option<bool>,
89    #[serde(default)]
90    pub frequency_penalty_supported: Option<bool>,
91    #[serde(default)]
92    pub presence_penalty_supported: Option<bool>,
93}
94
95/// Copies `src` into `dst` when `src` is set (last-writer-wins overlay).
96fn overlay_opt<T: Clone>(dst: &mut Option<T>, src: &Option<T>) {
97    if src.is_some() {
98        dst.clone_from(src);
99    }
100}
101
102/// Copies `src` into `dst` only when `dst` is still unset (fill-the-gaps).
103fn fill_opt<T: Clone>(dst: &mut Option<T>, src: &Option<T>) {
104    if dst.is_none() {
105        dst.clone_from(src);
106    }
107}
108
109/// Visits every `ProviderDefaults` field once, applying `$op` (`overlay_opt`
110/// or `fill_opt`) to each `(dst, src)` pair. The field roster lives here only;
111/// `overlay`/`fill_missing_from` differ solely in the merge rule they pass.
112macro_rules! merge_provider_defaults {
113    ($dst:expr, $src:expr, $op:path) => {{
114        $op(&mut $dst.message_wire_format, &$src.message_wire_format);
115        $op(
116            &mut $dst.native_tool_wire_format,
117            &$src.native_tool_wire_format,
118        );
119        $op(
120            &mut $dst.image_url_input_supported,
121            &$src.image_url_input_supported,
122        );
123        $op(
124            &mut $dst.file_upload_wire_format,
125            &$src.file_upload_wire_format,
126        );
127        $op(&mut $dst.reasoning_wire_format, &$src.reasoning_wire_format);
128        $op(&mut $dst.files_api_supported, &$src.files_api_supported);
129        $op(&mut $dst.seed_supported, &$src.seed_supported);
130        $op(&mut $dst.top_k_supported, &$src.top_k_supported);
131        $op(&mut $dst.temperature_supported, &$src.temperature_supported);
132        $op(&mut $dst.top_p_supported, &$src.top_p_supported);
133        $op(
134            &mut $dst.frequency_penalty_supported,
135            &$src.frequency_penalty_supported,
136        );
137        $op(
138            &mut $dst.presence_penalty_supported,
139            &$src.presence_penalty_supported,
140        );
141    }};
142}
143
144impl ProviderDefaults {
145    fn overlay(&mut self, other: &ProviderDefaults) {
146        merge_provider_defaults!(self, other, overlay_opt);
147    }
148
149    fn fill_missing_from(&mut self, other: &ProviderDefaults) {
150        merge_provider_defaults!(self, other, fill_opt);
151    }
152
153    fn has_any_field(&self) -> bool {
154        self.message_wire_format.is_some()
155            || self.native_tool_wire_format.is_some()
156            || self.image_url_input_supported.is_some()
157            || self.file_upload_wire_format.is_some()
158            || self.reasoning_wire_format.is_some()
159            || self.files_api_supported.is_some()
160            || self.seed_supported.is_some()
161            || self.top_k_supported.is_some()
162            || self.temperature_supported.is_some()
163            || self.top_p_supported.is_some()
164            || self.frequency_penalty_supported.is_some()
165            || self.presence_penalty_supported.is_some()
166    }
167}
168
169/// One row of the capability matrix.
170#[derive(Debug, Clone, Deserialize)]
171pub struct ProviderRule {
172    /// Glob pattern (supports leading / trailing `*` and a single mid-`*`).
173    /// Matched case-insensitively against the model ID.
174    pub model_match: String,
175    /// Optional `[major, minor]` lower bound. When set, the model ID
176    /// must parse via the provider's version extractor AND compare ≥
177    /// this tuple. Rules with an unparseable `version_min` for the
178    /// given model are skipped, not merged.
179    #[serde(default)]
180    pub version_min: Option<Vec<u32>>,
181    /// Per-rule fall-through. A matching rule with `extends = true`
182    /// contributes ONLY the fields it explicitly sets; resolution then
183    /// continues to later matching rules (user rules before built-in rules,
184    /// then the `provider_family` chain) and ultimately to provider /
185    /// built-in defaults to fill the rest. A matching rule without
186    /// `extends` (or with `extends = false`) terminates resolution exactly
187    /// as before this flag existed. This lets an overlay tweak one field of
188    /// a shipped row without copying the whole row verbatim (which drifts).
189    #[serde(default)]
190    pub extends: bool,
191    #[serde(default)]
192    pub native_tools: Option<bool>,
193    /// Message/request/response wire format used by shared helpers.
194    /// Known values are `openai`, `anthropic`, `gemini`, and `ollama`.
195    #[serde(default)]
196    pub message_wire_format: Option<String>,
197    /// Native tool definition wire shape. Known values are `openai`
198    /// and `anthropic`.
199    #[serde(default)]
200    pub native_tool_wire_format: Option<String>,
201    #[serde(default)]
202    pub defer_loading: Option<bool>,
203    #[serde(default)]
204    pub tool_search: Option<Vec<String>>,
205    /// Whether Harn supports this route through the provider's native
206    /// Responses-style API instead of generic chat completions.
207    #[serde(default)]
208    pub responses_api: Option<bool>,
209    /// Provider-hosted tools Harn can pass through without local execution.
210    #[serde(default)]
211    pub hosted_tools: Option<Vec<String>>,
212    /// Whether provider-hosted remote MCP connectors can be mediated by the
213    /// provider for this route.
214    #[serde(default)]
215    pub remote_mcp: Option<bool>,
216    /// Whether provider-managed previous-response conversation state is
217    /// available.
218    #[serde(default)]
219    pub conversation_state: Option<bool>,
220    /// Whether provider-side truncation/compaction controls are available.
221    #[serde(default)]
222    pub compaction: Option<bool>,
223    /// Whether provider-side background Responses jobs are available.
224    #[serde(default)]
225    pub background_mode: Option<bool>,
226    /// Approval policy modes available when provider-hosted tools execute.
227    #[serde(default)]
228    pub tool_approval_policy: Option<String>,
229    #[serde(default)]
230    pub max_tools: Option<u32>,
231    #[serde(default)]
232    pub prompt_caching: Option<bool>,
233    /// Request-side cache breakpoint strategy for routes that require
234    /// `cache_control` to opt into provider prompt caching. Known values are
235    /// `none`, `top_level`, and `last_block`.
236    #[serde(default)]
237    pub cache_breakpoint_style: Option<String>,
238    /// Whether this provider/model route accepts image or other visual
239    /// input blocks through Harn's LLM message path.
240    #[serde(default)]
241    pub vision: Option<bool>,
242    /// Whether this provider/model route accepts audio input blocks
243    /// through Harn's LLM message path.
244    #[serde(default, alias = "audio_supported")]
245    pub audio: Option<bool>,
246    /// Whether this provider/model route accepts PDF/document input blocks
247    /// through Harn's LLM message path.
248    #[serde(default, alias = "pdf_supported")]
249    pub pdf: Option<bool>,
250    /// Whether this provider/model route accepts video input blocks
251    /// through Harn's LLM message path.
252    #[serde(default, alias = "video_supported")]
253    pub video: Option<bool>,
254    /// Whether uploaded file references can be reused in message content.
255    #[serde(default)]
256    pub files_api_supported: Option<bool>,
257    /// File-upload transport used by `std/files.upload`. Known values
258    /// are `anthropic` and `gemini`.
259    #[serde(default)]
260    pub file_upload_wire_format: Option<String>,
261    /// Structured-output transport strategy. Known values are:
262    /// `native`, `tool_use`, `format_kw`, and `none`.
263    #[serde(default)]
264    pub structured_output: Option<String>,
265    /// Legacy name retained for project overrides written before
266    /// `structured_output` became the canonical capability.
267    #[serde(default)]
268    pub json_schema: Option<String>,
269    /// Whether prompt sections should prefer XML-style tags such as
270    /// `<task>` / `<examples>` over Markdown headings.
271    #[serde(default)]
272    pub prefers_xml_scaffolding: Option<bool>,
273    /// Whether this model's tokenizer reserves `<tool_call>` / `</tool_call>`
274    /// as single special tokens (the native Hermes tool-call markers). When
275    /// true, harn remaps those delimiters to a non-special bracket form on the
276    /// wire to avoid degenerate opener repetition; see [`crate::llm::tool_delimiter`].
277    #[serde(default)]
278    pub reserved_tool_call_token: Option<bool>,
279    /// Whether prompt sections should prefer Markdown headings such as
280    /// `## Task` / `## Examples`.
281    #[serde(default)]
282    pub prefers_markdown_scaffolding: Option<bool>,
283    /// Preferred logical structured-output prompt shape. This is separate
284    /// from the transport-level `structured_output` strategy above.
285    /// Known values are `native_json`, `delimited`, and `xml_tagged`.
286    #[serde(default)]
287    pub structured_output_mode: Option<String>,
288    /// Whether the route accepts an assistant-role prefill message.
289    #[serde(default)]
290    pub supports_assistant_prefill: Option<bool>,
291    /// Whether durable instructions should use OpenAI's `developer` role
292    /// instead of `system`.
293    #[serde(default)]
294    pub prefers_role_developer: Option<bool>,
295    /// Whether text-rendered tool specifications should use XML wrappers
296    /// instead of JSON-schema prose.
297    #[serde(default)]
298    pub prefers_xml_tools: Option<bool>,
299    /// Preferred representation for model thinking/reasoning blocks in
300    /// transcript-like prompt context. Known values are `none`,
301    /// `thinking_blocks`, `reasoning_summary`, and `inline`.
302    #[serde(default)]
303    pub thinking_block_style: Option<String>,
304    /// Supported thinking/reasoning modes for this rule. Values are
305    /// script-facing mode names: `enabled`, `adaptive`, and `effort`.
306    #[serde(default)]
307    pub thinking_modes: Option<Vec<String>>,
308    /// Whether Anthropic interleaved thinking is supported for this
309    /// provider/model route.
310    #[serde(default)]
311    pub interleaved_thinking_supported: Option<bool>,
312    /// Anthropic beta features that should be requested for this route.
313    #[serde(default)]
314    pub anthropic_beta_features: Option<Vec<String>>,
315    /// Legacy override compatibility. New built-in rules should use
316    /// `thinking_modes` so the capability matrix preserves mode detail.
317    #[serde(default)]
318    pub thinking: Option<bool>,
319    /// Whether the model accepts image inputs in chat content.
320    #[serde(default)]
321    pub vision_supported: Option<bool>,
322    /// Whether image content blocks may reference remote URLs.
323    #[serde(default)]
324    pub image_url_input_supported: Option<bool>,
325    /// Carry `<think>...</think>` blocks in assistant history across turns.
326    /// Qwen3.6 exposes this as `chat_template_kwargs.preserve_thinking`;
327    /// Alibaba recommends enabling it for long-horizon agent loops so the
328    /// model doesn't re-derive context it already worked out in prior turns.
329    /// Anthropic's adaptive-thinking signature contract is stricter but plays
330    /// the same role there.
331    #[serde(default)]
332    pub preserve_thinking: Option<bool>,
333    /// Name of any server-side response parser that can transform model
334    /// bytes before Harn sees them. `none` means the provider returns the
335    /// model text/tool channel without an implicit parser.
336    #[serde(default)]
337    pub server_parser: Option<String>,
338    /// Whether provider-specific chat-template options are honored. Most
339    /// OpenAI-compatible servers call this `chat_template_kwargs`; Baseten's
340    /// Model APIs spell the same concept `chat_template_args`.
341    #[serde(default)]
342    pub honors_chat_template_kwargs: Option<bool>,
343    /// Request body field for provider-specific chat-template options when it
344    /// differs from the default `chat_template_kwargs`.
345    #[serde(default)]
346    pub chat_template_options_field: Option<String>,
347    /// Whether this route requires OpenAI's `max_completion_tokens`
348    /// request field instead of legacy `max_tokens`.
349    #[serde(default)]
350    pub requires_completion_tokens: Option<bool>,
351    /// Whether this route rejects non-streaming chat-completion requests.
352    /// Harn forces streaming for such routes so callers can keep provider-
353    /// neutral `stream` preferences.
354    #[serde(default)]
355    pub requires_streaming: Option<bool>,
356    /// Whether this route accepts Harn's provider-neutral reasoning effort
357    /// control. Providers project this to their native field (for example
358    /// OpenAI `reasoning_effort` or Anthropic `output_config.effort`).
359    #[serde(default)]
360    pub reasoning_effort_supported: Option<bool>,
361    /// Accepted effort values for routes that expose a narrower subset than
362    /// Harn's provider-neutral enum. Empty means "unknown/all".
363    #[serde(default)]
364    pub reasoning_effort_levels: Option<Vec<String>>,
365    /// Whether this route accepts effort "none" as a true reasoning-off
366    /// setting. Older GPT-5 variants support effort but only floor at
367    /// `minimal`.
368    #[serde(default)]
369    pub reasoning_none_supported: Option<bool>,
370    /// Maximum thinking-budget tokens this model accepts for its high/xhigh/max
371    /// reasoning levels, when the provider takes an explicit token budget rather
372    /// than an effort enum. The canonical case is the native Gemini API
373    /// `generationConfig.thinkingConfig.thinkingBudget` field, whose ceiling
374    /// differs by model (Gemini 2.5 Flash caps at 24576, Pro at 32768).
375    /// Declared alongside the model's other wire capabilities instead of a
376    /// hard-coded `model.contains("flash")` branch in the provider.
377    #[serde(default)]
378    pub max_thinking_budget: Option<i64>,
379    /// Whether this route accepts an explicit disabled/off reasoning switch.
380    /// Some routes require reasoning and reject the provider's disabled shape.
381    #[serde(default)]
382    pub reasoning_disable_supported: Option<bool>,
383    /// Whether this model performs *tool calls inside its reasoning channel*,
384    /// so disabling reasoning silently breaks tool calling. The canonical case
385    /// is the OpenAI gpt-oss (Harmony) family: with reasoning disabled it emits
386    /// 0 tool_calls and a tiny billed-noncommittal completion; with reasoning
387    /// enabled (even `low`) it emits clean native tool calls. This is the
388    /// *opposite* of the Qwen3 quirk (Qwen narrates tool intent in the
389    /// reasoning trace and emits zero `tool_calls`, so Qwen needs reasoning
390    /// OFF for tools). When set, `reasoning_policy` refuses to downgrade the
391    /// auto reasoning level to `off` for tool-bearing tasks (agent/code/verify)
392    /// — flooring instead to the lowest supported effort — so no future
393    /// auto-policy default or session pin can re-introduce the
394    /// billed-noncommittal failure at the data layer.
395    #[serde(default)]
396    pub reasoning_required_for_tools: Option<bool>,
397    /// Whether reasoning-only clean stops may be promoted into visible text.
398    /// Disable this for providers whose `reasoning` field is always private
399    /// trace, even when `content` is empty.
400    #[serde(default)]
401    pub reasoning_text_promotable: Option<bool>,
402    /// Provider-specific reasoning request shape for OpenAI-compatible
403    /// transports. Known values are `openrouter`, `enabled`, and `minimax`.
404    #[serde(default)]
405    pub reasoning_wire_format: Option<String>,
406    #[serde(default)]
407    pub seed_supported: Option<bool>,
408    #[serde(default)]
409    pub top_k_supported: Option<bool>,
410    #[serde(default)]
411    pub temperature_supported: Option<bool>,
412    #[serde(default)]
413    pub top_p_supported: Option<bool>,
414    #[serde(default)]
415    pub frequency_penalty_supported: Option<bool>,
416    #[serde(default)]
417    pub presence_penalty_supported: Option<bool>,
418    /// Accepted provider-native `tool_choice` modes. Empty means unrestricted
419    /// or unknown. Use this for routes whose native tools work, but whose API
420    /// rejects forced/specified tool choices.
421    #[serde(default)]
422    pub allowed_tool_choice_modes: Option<Vec<String>>,
423    /// Whether an assistant `tool_calls` message must be followed immediately
424    /// by `role=tool` messages for every emitted `tool_call_id`.
425    #[serde(default)]
426    pub requires_tool_result_adjacency: Option<bool>,
427    /// Whether a single assistant message may contain multiple tool calls.
428    /// Some OpenAI-compatible providers reject replayed history with more than
429    /// one `tool_calls[]` entry even when the calls were parsed from Harn's text
430    /// tool protocol, so the request builder must serialize history as
431    /// one-call assistant turns for those routes.
432    #[serde(default)]
433    pub supports_parallel_tool_calls: Option<bool>,
434    /// Whether the route rejects `response_format` when native `tools` are
435    /// present. Strict OpenAI-compatible servers such as Cerebras accept each
436    /// feature alone but reject the pair together.
437    #[serde(default)]
438    pub tools_exclude_response_format: Option<bool>,
439    /// Preferred endpoint family for this provider/model route. Values
440    /// are descriptive labels consumed by providers, e.g.
441    /// `/api/generate-raw` for Ollama raw prompt bypass.
442    #[serde(default)]
443    pub recommended_endpoint: Option<String>,
444    /// Whether Harn's text-tool protocol (`<tool_call>name({...})`) can
445    /// survive the provider route and return in the visible response body.
446    #[serde(default)]
447    pub text_tool_wire_format_supported: Option<bool>,
448    /// Preferred tool-calling mode for this provider/model route when
449    /// callers do not explicitly choose `tool_format`. This lets the
450    /// capability matrix route around known provider-native regressions
451    /// without making presets branch on model names.
452    #[serde(default)]
453    pub preferred_tool_format: Option<String>,
454    /// Empirical native/text interchangeability status for this route.
455    /// Known values are descriptive, not gates: `interchangeable`,
456    /// `native_unreliable`, `text_unreliable`, `native_only`,
457    /// `text_only`, and `unknown`.
458    #[serde(default)]
459    pub tool_mode_parity: Option<String>,
460    /// Short human-readable note explaining `tool_mode_parity`.
461    #[serde(default)]
462    pub tool_mode_parity_notes: Option<String>,
463    /// In-prompt directive that disables this model's "thinking" mode when
464    /// the API doesn't expose a first-class field (or exposes it
465    /// inconsistently across templates / quantizations). For Qwen3 family
466    /// chat templates this is `/no_think`. When `thinking: false` is
467    /// requested and this is set, Harn auto-prepends the directive to the
468    /// system message so script authors don't need to know it exists.
469    #[serde(default)]
470    pub thinking_disable_directive: Option<String>,
471    /// Per-task auto-policy reasoning-level overrides for this route.
472    /// Keys are task labels (`agent`, `verify`, `chat`, `summarize`,
473    /// `code`); values are reasoning levels (`off`, `minimal`, `low`,
474    /// `medium`, `high`, `xhigh`, `max`). Consulted by `reasoning_policy` only
475    /// when policy resolves to `auto` — explicit policies always win.
476    ///
477    /// Use this to declare known per-model regressions that should
478    /// flip the auto-policy default, instead of hard-coding the model/
479    /// provider pattern in resolver code. The canonical example is the
480    /// Qwen3 tool-call regression — `{ agent = "off" }` disables
481    /// reasoning whenever a script registers tools with that route,
482    /// matching Qwen's own published guidance.
483    #[serde(default)]
484    pub auto_reasoning_overrides: Option<BTreeMap<String, String>>,
485    /// OpenRouter upstream provider names that must be excluded from routing
486    /// for this `(provider, model)` row. Materialized into the request body's
487    /// `provider.ignore` array (see
488    /// [`crate::llm::providers::openai_compat::apply_openrouter_route_denylist`]).
489    /// This is a data-driven route-around for upstreams that serve a route
490    /// incorrectly while still advertising the model — the canonical case is
491    /// OpenRouter's `Ambient` upstream billing reasoning tokens for
492    /// `qwen/qwen3.6-35b-a3b` and then finishing with empty `tool_calls`,
493    /// while Parasail / AtlasCloud / AkashML serve the identical request
494    /// natively. Only consulted for the `openrouter` provider.
495    #[serde(default)]
496    pub provider_route_denylist: Option<Vec<String>>,
497    /// OpenRouter upstream provider names this `(provider, model)` row is
498    /// PINNED to, in preference order. Materialized into the request body's
499    /// `provider.order` array with `allow_fallbacks = false` (see
500    /// [`crate::llm::providers::openai_compat::apply_openrouter_provider_order`]),
501    /// so OpenRouter only ever routes the model to these known-clean upstreams
502    /// and never silently falls back to a sketchier one. This is the
503    /// *allowlist* counterpart to [`Self::provider_route_denylist`]: prefer it
504    /// when the bad upstreams are intermittent / hard to enumerate but the
505    /// clean ones are few and stable. The canonical case is OpenRouter's
506    /// `openai/gpt-oss-*` route, which fans out across ~17 upstreams in a
507    /// sub-provider lottery; some mis-serialize the Harmony tool call even with
508    /// reasoning ON (billed-noncommittal: 0 tool_calls), while Cerebras and
509    /// Groq serve it cleanly. Only consulted for the `openrouter` provider. An
510    /// empty / unset list means "no pin" (free OpenRouter routing). When both a
511    /// pin and a denylist are present the pin wins (a closed allowlist already
512    /// excludes everything not on it). Validated by the footgun gate in
513    /// [`crate::llm::capability_audit`].
514    #[serde(default)]
515    pub openrouter_provider_order: Option<Vec<String>>,
516    /// Serving-quality / precision trust verdict for this `(provider, model)`
517    /// route. A provider can be live and fast yet still serve a model at
518    /// DEGRADED quality (e.g. an undocumented quantization) or reject otherwise
519    /// valid requests, silently contaminating any eval/meter that trusts its
520    /// numbers. This is the data-driven sibling of [`Self::provider_route_denylist`]
521    /// / [`Self::openrouter_provider_order`]: instead of routing *around* a bad
522    /// upstream, it labels the route's measured precision so tooling (the
523    /// meter precision canary) can refuse to trust a `degraded` route and flag a
524    /// `throttled` one. Known values are `trusted` (full-precision verified
525    /// against a reference), `degraded` (proven to serve at reduced quality),
526    /// `throttled` (full-precision but rate-limited to unusable timing), and
527    /// `unverified` (no verdict — treated the same as unset). Unset means
528    /// `unverified`.
529    #[serde(default)]
530    pub serving_precision: Option<String>,
531}
532
533impl ProviderRule {
534    /// Fill every capability field that `self` (the accumulated `extends`
535    /// fall-through chain so far) has NOT explicitly set from `other`, a
536    /// later matching rule with lower precedence. "Explicitly set" is the
537    /// serde `Option` raw-deserialization state — never inferred from a
538    /// field's value equaling the default.
539    ///
540    /// The destructure of `other` is deliberately exhaustive (no `..`
541    /// catch-all): adding a new capability field to [`ProviderRule`] fails
542    /// to compile here until the merge handles it.
543    fn fill_missing_from(&mut self, other: &ProviderRule) {
544        let ProviderRule {
545            // Rule-matching metadata, not capability payload: the merged
546            // chain keeps the first (highest-precedence) rule's identity.
547            model_match: _,
548            version_min: _,
549            extends: _,
550            native_tools,
551            message_wire_format,
552            native_tool_wire_format,
553            defer_loading,
554            tool_search,
555            responses_api,
556            hosted_tools,
557            remote_mcp,
558            conversation_state,
559            compaction,
560            background_mode,
561            tool_approval_policy,
562            max_tools,
563            prompt_caching,
564            cache_breakpoint_style,
565            vision,
566            audio,
567            pdf,
568            video,
569            files_api_supported,
570            file_upload_wire_format,
571            structured_output,
572            json_schema,
573            prefers_xml_scaffolding,
574            reserved_tool_call_token,
575            prefers_markdown_scaffolding,
576            structured_output_mode,
577            supports_assistant_prefill,
578            prefers_role_developer,
579            prefers_xml_tools,
580            thinking_block_style,
581            thinking_modes,
582            interleaved_thinking_supported,
583            anthropic_beta_features,
584            thinking,
585            vision_supported,
586            image_url_input_supported,
587            preserve_thinking,
588            server_parser,
589            honors_chat_template_kwargs,
590            chat_template_options_field,
591            requires_completion_tokens,
592            requires_streaming,
593            reasoning_effort_supported,
594            reasoning_effort_levels,
595            reasoning_none_supported,
596            max_thinking_budget,
597            reasoning_disable_supported,
598            reasoning_required_for_tools,
599            reasoning_text_promotable,
600            reasoning_wire_format,
601            seed_supported,
602            top_k_supported,
603            temperature_supported,
604            top_p_supported,
605            frequency_penalty_supported,
606            presence_penalty_supported,
607            allowed_tool_choice_modes,
608            requires_tool_result_adjacency,
609            supports_parallel_tool_calls,
610            tools_exclude_response_format,
611            recommended_endpoint,
612            text_tool_wire_format_supported,
613            preferred_tool_format,
614            tool_mode_parity,
615            tool_mode_parity_notes,
616            thinking_disable_directive,
617            auto_reasoning_overrides,
618            provider_route_denylist,
619            openrouter_provider_order,
620            serving_precision,
621        } = other;
622        fill_opt(&mut self.native_tools, native_tools);
623        fill_opt(&mut self.message_wire_format, message_wire_format);
624        fill_opt(&mut self.native_tool_wire_format, native_tool_wire_format);
625        fill_opt(&mut self.defer_loading, defer_loading);
626        fill_opt(&mut self.tool_search, tool_search);
627        fill_opt(&mut self.responses_api, responses_api);
628        fill_opt(&mut self.hosted_tools, hosted_tools);
629        fill_opt(&mut self.remote_mcp, remote_mcp);
630        fill_opt(&mut self.conversation_state, conversation_state);
631        fill_opt(&mut self.compaction, compaction);
632        fill_opt(&mut self.background_mode, background_mode);
633        fill_opt(&mut self.tool_approval_policy, tool_approval_policy);
634        fill_opt(&mut self.max_tools, max_tools);
635        fill_opt(&mut self.prompt_caching, prompt_caching);
636        fill_opt(&mut self.cache_breakpoint_style, cache_breakpoint_style);
637        fill_opt(&mut self.audio, audio);
638        fill_opt(&mut self.pdf, pdf);
639        fill_opt(&mut self.video, video);
640        fill_opt(&mut self.files_api_supported, files_api_supported);
641        fill_opt(&mut self.file_upload_wire_format, file_upload_wire_format);
642        fill_opt(&mut self.prefers_xml_scaffolding, prefers_xml_scaffolding);
643        fill_opt(&mut self.reserved_tool_call_token, reserved_tool_call_token);
644        fill_opt(
645            &mut self.prefers_markdown_scaffolding,
646            prefers_markdown_scaffolding,
647        );
648        fill_opt(&mut self.structured_output_mode, structured_output_mode);
649        fill_opt(
650            &mut self.supports_assistant_prefill,
651            supports_assistant_prefill,
652        );
653        fill_opt(&mut self.prefers_role_developer, prefers_role_developer);
654        fill_opt(&mut self.prefers_xml_tools, prefers_xml_tools);
655        fill_opt(&mut self.thinking_block_style, thinking_block_style);
656        fill_opt(
657            &mut self.interleaved_thinking_supported,
658            interleaved_thinking_supported,
659        );
660        fill_opt(&mut self.anthropic_beta_features, anthropic_beta_features);
661        fill_opt(
662            &mut self.image_url_input_supported,
663            image_url_input_supported,
664        );
665        fill_opt(&mut self.preserve_thinking, preserve_thinking);
666        fill_opt(&mut self.server_parser, server_parser);
667        fill_opt(
668            &mut self.honors_chat_template_kwargs,
669            honors_chat_template_kwargs,
670        );
671        fill_opt(
672            &mut self.chat_template_options_field,
673            chat_template_options_field,
674        );
675        fill_opt(
676            &mut self.requires_completion_tokens,
677            requires_completion_tokens,
678        );
679        fill_opt(&mut self.requires_streaming, requires_streaming);
680        fill_opt(
681            &mut self.reasoning_effort_supported,
682            reasoning_effort_supported,
683        );
684        fill_opt(&mut self.reasoning_effort_levels, reasoning_effort_levels);
685        fill_opt(&mut self.reasoning_none_supported, reasoning_none_supported);
686        fill_opt(&mut self.max_thinking_budget, max_thinking_budget);
687        fill_opt(
688            &mut self.reasoning_disable_supported,
689            reasoning_disable_supported,
690        );
691        fill_opt(
692            &mut self.reasoning_required_for_tools,
693            reasoning_required_for_tools,
694        );
695        fill_opt(
696            &mut self.reasoning_text_promotable,
697            reasoning_text_promotable,
698        );
699        fill_opt(&mut self.reasoning_wire_format, reasoning_wire_format);
700        fill_opt(&mut self.seed_supported, seed_supported);
701        fill_opt(&mut self.top_k_supported, top_k_supported);
702        fill_opt(&mut self.temperature_supported, temperature_supported);
703        fill_opt(&mut self.top_p_supported, top_p_supported);
704        fill_opt(
705            &mut self.frequency_penalty_supported,
706            frequency_penalty_supported,
707        );
708        fill_opt(
709            &mut self.presence_penalty_supported,
710            presence_penalty_supported,
711        );
712        fill_opt(
713            &mut self.allowed_tool_choice_modes,
714            allowed_tool_choice_modes,
715        );
716        fill_opt(
717            &mut self.requires_tool_result_adjacency,
718            requires_tool_result_adjacency,
719        );
720        fill_opt(
721            &mut self.supports_parallel_tool_calls,
722            supports_parallel_tool_calls,
723        );
724        fill_opt(
725            &mut self.tools_exclude_response_format,
726            tools_exclude_response_format,
727        );
728        fill_opt(&mut self.recommended_endpoint, recommended_endpoint);
729        fill_opt(
730            &mut self.text_tool_wire_format_supported,
731            text_tool_wire_format_supported,
732        );
733        fill_opt(&mut self.preferred_tool_format, preferred_tool_format);
734        fill_opt(&mut self.tool_mode_parity, tool_mode_parity);
735        fill_opt(&mut self.tool_mode_parity_notes, tool_mode_parity_notes);
736        fill_opt(
737            &mut self.thinking_disable_directive,
738            thinking_disable_directive,
739        );
740        fill_opt(&mut self.auto_reasoning_overrides, auto_reasoning_overrides);
741        fill_opt(&mut self.provider_route_denylist, provider_route_denylist);
742        fill_opt(
743            &mut self.openrouter_provider_order,
744            openrouter_provider_order,
745        );
746        fill_opt(&mut self.serving_precision, serving_precision);
747        // Legacy alias pairs resolve as ONE logical capability
748        // (`rule_structured_output`, `rule_thinking_modes`, `rule_vision`),
749        // so they fill as a unit: when the accumulated chain has explicitly
750        // set either spelling, the later rule's pair must not leak through
751        // the other spelling and override that explicit choice.
752        if self.structured_output.is_none() && self.json_schema.is_none() {
753            self.structured_output.clone_from(structured_output);
754            self.json_schema.clone_from(json_schema);
755        }
756        if self.thinking_modes.is_none() && self.thinking.is_none() {
757            self.thinking_modes.clone_from(thinking_modes);
758            self.thinking.clone_from(thinking);
759        }
760        if self.vision.is_none() && self.vision_supported.is_none() {
761            self.vision.clone_from(vision);
762            self.vision_supported.clone_from(vision_supported);
763        }
764    }
765}
766
767/// Resolved capabilities for a `(provider, model)` pair. Unset rule
768/// fields resolve to `false` / empty / `None` so callers never have to
769/// unwrap an `Option<bool>` for what are really boolean gates.
770#[derive(Debug, Clone, PartialEq, Eq)]
771pub struct Capabilities {
772    pub native_tools: bool,
773    pub message_wire_format: String,
774    pub native_tool_wire_format: String,
775    pub defer_loading: bool,
776    pub tool_search: Vec<String>,
777    pub responses_api: bool,
778    pub hosted_tools: Vec<String>,
779    pub remote_mcp: bool,
780    pub conversation_state: bool,
781    pub compaction: bool,
782    pub background_mode: bool,
783    pub tool_approval_policy: Option<String>,
784    pub max_tools: Option<u32>,
785    pub prompt_caching: bool,
786    pub cache_breakpoint_style: String,
787    pub vision: bool,
788    pub audio: bool,
789    pub pdf: bool,
790    pub video: bool,
791    pub files_api_supported: bool,
792    pub file_upload_wire_format: Option<String>,
793    pub structured_output: Option<String>,
794    /// Legacy mirror for CLI display and older callers.
795    pub json_schema: Option<String>,
796    pub prefers_xml_scaffolding: bool,
797    /// See [`ProviderRule::reserved_tool_call_token`].
798    pub reserved_tool_call_token: bool,
799    pub prefers_markdown_scaffolding: bool,
800    pub structured_output_mode: String,
801    pub supports_assistant_prefill: bool,
802    pub prefers_role_developer: bool,
803    pub prefers_xml_tools: bool,
804    pub thinking_block_style: String,
805    pub thinking_modes: Vec<String>,
806    pub interleaved_thinking_supported: bool,
807    pub anthropic_beta_features: Vec<String>,
808    pub vision_supported: bool,
809    pub image_url_input_supported: bool,
810    pub preserve_thinking: bool,
811    pub server_parser: String,
812    pub honors_chat_template_kwargs: bool,
813    pub chat_template_options_field: Option<String>,
814    pub requires_completion_tokens: bool,
815    pub requires_streaming: bool,
816    pub reasoning_effort_supported: bool,
817    pub reasoning_effort_levels: Vec<String>,
818    pub reasoning_none_supported: bool,
819    /// See [`ProviderRule::max_thinking_budget`]. `None` means the model uses
820    /// the provider's own default ceiling.
821    pub max_thinking_budget: Option<i64>,
822    pub reasoning_disable_supported: bool,
823    /// See [`ProviderRule::reasoning_required_for_tools`].
824    pub reasoning_required_for_tools: bool,
825    pub reasoning_text_promotable: bool,
826    pub reasoning_wire_format: Option<String>,
827    pub seed_supported: bool,
828    pub top_k_supported: bool,
829    pub temperature_supported: bool,
830    pub top_p_supported: bool,
831    pub frequency_penalty_supported: bool,
832    pub presence_penalty_supported: bool,
833    pub allowed_tool_choice_modes: Vec<String>,
834    pub requires_tool_result_adjacency: bool,
835    pub supports_parallel_tool_calls: bool,
836    pub tools_exclude_response_format: bool,
837    pub recommended_endpoint: Option<String>,
838    pub text_tool_wire_format_supported: bool,
839    pub preferred_tool_format: Option<String>,
840    pub tool_mode_parity: Option<String>,
841    pub tool_mode_parity_notes: Option<String>,
842    pub thinking_disable_directive: Option<String>,
843    /// Per-task auto-policy reasoning-level overrides for this route.
844    /// See [`ProviderRule::auto_reasoning_overrides`].
845    pub auto_reasoning_overrides: BTreeMap<String, String>,
846    /// OpenRouter upstream provider names to exclude from routing for this
847    /// row. See [`ProviderRule::provider_route_denylist`]. Empty means "no
848    /// route restriction".
849    pub provider_route_denylist: Vec<String>,
850    /// OpenRouter upstream provider names this row is PINNED to (allowlist), in
851    /// preference order. See [`ProviderRule::openrouter_provider_order`]. Empty
852    /// means "no pin" (free OpenRouter routing).
853    pub openrouter_provider_order: Vec<String>,
854    /// Serving-quality / precision trust verdict for this route. See
855    /// [`ProviderRule::serving_precision`]. `"unverified"` when unset.
856    pub serving_precision: String,
857}
858
859impl Default for Capabilities {
860    fn default() -> Self {
861        Self {
862            native_tools: false,
863            message_wire_format: "openai".to_string(),
864            native_tool_wire_format: "openai".to_string(),
865            defer_loading: false,
866            tool_search: Vec::new(),
867            responses_api: false,
868            hosted_tools: Vec::new(),
869            remote_mcp: false,
870            conversation_state: false,
871            compaction: false,
872            background_mode: false,
873            tool_approval_policy: None,
874            max_tools: None,
875            prompt_caching: false,
876            cache_breakpoint_style: "none".to_string(),
877            vision: false,
878            audio: false,
879            pdf: false,
880            video: false,
881            files_api_supported: false,
882            file_upload_wire_format: None,
883            structured_output: None,
884            json_schema: None,
885            prefers_xml_scaffolding: false,
886            reserved_tool_call_token: false,
887            prefers_markdown_scaffolding: false,
888            structured_output_mode: "none".to_string(),
889            supports_assistant_prefill: false,
890            prefers_role_developer: false,
891            prefers_xml_tools: false,
892            thinking_block_style: "none".to_string(),
893            thinking_modes: Vec::new(),
894            interleaved_thinking_supported: false,
895            anthropic_beta_features: Vec::new(),
896            vision_supported: false,
897            image_url_input_supported: true,
898            preserve_thinking: false,
899            server_parser: "none".to_string(),
900            honors_chat_template_kwargs: false,
901            chat_template_options_field: None,
902            requires_completion_tokens: false,
903            requires_streaming: false,
904            reasoning_effort_supported: false,
905            reasoning_effort_levels: Vec::new(),
906            reasoning_none_supported: false,
907            max_thinking_budget: None,
908            reasoning_disable_supported: true,
909            reasoning_required_for_tools: false,
910            reasoning_text_promotable: true,
911            reasoning_wire_format: None,
912            seed_supported: true,
913            top_k_supported: true,
914            temperature_supported: true,
915            top_p_supported: true,
916            frequency_penalty_supported: true,
917            presence_penalty_supported: true,
918            allowed_tool_choice_modes: Vec::new(),
919            requires_tool_result_adjacency: false,
920            supports_parallel_tool_calls: true,
921            tools_exclude_response_format: false,
922            recommended_endpoint: None,
923            text_tool_wire_format_supported: true,
924            preferred_tool_format: None,
925            tool_mode_parity: None,
926            tool_mode_parity_notes: None,
927            thinking_disable_directive: None,
928            auto_reasoning_overrides: BTreeMap::new(),
929            provider_route_denylist: Vec::new(),
930            openrouter_provider_order: Vec::new(),
931            serving_precision: "unverified".to_string(),
932        }
933    }
934}
935
936/// Display-oriented row for `harn provider catalog matrix`, the legacy
937/// `harn check --provider-matrix` surface, and the generated docs page. Rows
938/// are intentionally rule-shaped: `model` is the rule's `model_match` pattern,
939/// because the shipped capability source of truth is a first-match rule table
940/// rather than an exhaustive remote model inventory.
941#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
942pub struct ProviderCapabilityMatrixRow {
943    pub provider: String,
944    pub model: String,
945    pub version_min: Option<Vec<u32>>,
946    /// Whether this rule opts into field-wise fall-through
947    /// ([`ProviderRule::extends`]). Rows in this matrix are rule-shaped, so
948    /// an `extends` row honestly reports its OWN fields only — for a
949    /// matching model, unset fields resolve from later matching rows and
950    /// provider defaults rather than the printed per-rule values.
951    pub extends: bool,
952    pub thinking: Vec<String>,
953    pub vision: bool,
954    pub audio: bool,
955    pub pdf: bool,
956    pub video: bool,
957    pub streaming: bool,
958    pub files_api_supported: bool,
959    pub json_schema: Option<String>,
960    pub prefers_xml_scaffolding: bool,
961    pub reserved_tool_call_token: bool,
962    pub prefers_markdown_scaffolding: bool,
963    pub structured_output_mode: String,
964    pub supports_assistant_prefill: bool,
965    pub prefers_role_developer: bool,
966    pub prefers_xml_tools: bool,
967    pub thinking_block_style: String,
968    pub native_tools: bool,
969    pub text_tools: bool,
970    pub preferred_tool_format: String,
971    pub tool_mode_parity: String,
972    pub tools: bool,
973    pub cache: bool,
974    /// Serving-quality / precision trust verdict for this route. See
975    /// [`ProviderRule::serving_precision`]. `"unverified"` when unset.
976    pub serving_precision: String,
977    pub source: String,
978}
979
980#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
981pub struct ToolCapabilityAuditReport {
982    pub audited_models: usize,
983    pub gaps: Vec<ToolCapabilityAuditGap>,
984}
985
986impl ToolCapabilityAuditReport {
987    pub fn ok(&self) -> bool {
988        self.gaps.is_empty()
989    }
990
991    pub fn render_human(&self) -> String {
992        if self.gaps.is_empty() {
993            return format!(
994                "provider capability audit OK: {} priced chat models have explicit native_tools and preferred_tool_format rules",
995                self.audited_models
996            );
997        }
998
999        let mut out = format!(
1000            "provider capability audit found {} catalog gaps among {} priced chat models:",
1001            self.gaps.len(),
1002            self.audited_models
1003        );
1004        for gap in &self.gaps {
1005            let matched = match (&gap.rule_provider, &gap.rule_model_match) {
1006                (Some(provider), Some(model_match)) => {
1007                    format!("provider.{provider} model_match=\"{model_match}\"")
1008                }
1009                _ => "no matching rule".to_string(),
1010            };
1011            out.push_str(&format!(
1012                "\n- {}:{} ({matched}) missing {}; suggest native_tools = {}, preferred_tool_format = \"{}\"",
1013                gap.provider,
1014                gap.model,
1015                gap.missing_fields.join(", "),
1016                gap.suggested_native_tools,
1017                gap.suggested_preferred_tool_format,
1018            ));
1019        }
1020        out
1021    }
1022}
1023
1024#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
1025pub struct ToolCapabilityAuditGap {
1026    pub provider: String,
1027    pub model: String,
1028    pub rule_provider: Option<String>,
1029    pub rule_model_match: Option<String>,
1030    pub missing_fields: Vec<String>,
1031    pub suggested_native_tools: bool,
1032    pub suggested_preferred_tool_format: String,
1033}
1034
1035thread_local! {
1036    /// Per-thread user overrides installed by the CLI at startup. Kept
1037    /// thread-local (not process-static) to match the rest of the VM
1038    /// state model — the VM is !Send and each VM thread owns its own
1039    /// configuration.
1040    static USER_OVERRIDES: RefCell<Option<CapabilitiesFile>> = const { RefCell::new(None) };
1041}
1042
1043/// Lazily-parsed built-in rules. The `include_str!` content is a static
1044/// constant; parsing it once per process is safe and free of ordering
1045/// hazards.
1046static BUILTIN: OnceLock<CapabilitiesFile> = OnceLock::new();
1047
1048fn builtin() -> &'static CapabilitiesFile {
1049    BUILTIN.get_or_init(|| {
1050        toml::from_str::<CapabilitiesFile>(BUILTIN_TOML)
1051            .expect("capabilities.toml must parse at build time")
1052    })
1053}
1054
1055/// The shipped (built-in) capability matrix. Public so the footgun gate in
1056/// [`crate::llm::capability_audit`] can audit exactly what Harn ships.
1057pub fn builtin_file() -> &'static CapabilitiesFile {
1058    builtin()
1059}
1060
1061/// Install project-level overrides for the current thread. Usually
1062/// called once at CLI bootstrap after reading `harn.toml`. Passing
1063/// `None` clears any prior override.
1064pub fn set_user_overrides(file: Option<CapabilitiesFile>) {
1065    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = file);
1066}
1067
1068/// Clear any thread-local user overrides. Used between test runs.
1069pub fn clear_user_overrides() {
1070    set_user_overrides(None);
1071}
1072
1073/// Parse a TOML string containing the capabilities section's own shape
1074/// (i.e. top-level `[[provider.X]]` + optional `[provider_family]`, the
1075/// same layout used by the built-in `capabilities.toml`) and install as
1076/// the current thread's override.
1077pub fn set_user_overrides_toml(src: &str) -> Result<(), String> {
1078    set_user_overrides(Some(parse_capabilities_toml(src)?));
1079    Ok(())
1080}
1081
1082/// Parse a capabilities TOML document (the same layout used by the built-in
1083/// `capabilities.toml`) without installing it anywhere, for callers that
1084/// thread an explicit capability overlay instead of mutating thread state
1085/// (e.g. `harn provider catalog export --capabilities-overlay`).
1086pub fn parse_capabilities_toml(src: &str) -> Result<CapabilitiesFile, String> {
1087    toml::from_str(src).map_err(|e| e.to_string())
1088}
1089
1090/// Extract the `[capabilities]` section from a full `harn.toml` source
1091/// and install it as the current thread's override. The schema inside
1092/// that section mirrors `CapabilitiesFile` but with every key prefixed
1093/// by `capabilities.`:
1094///
1095/// ```toml
1096/// [[capabilities.provider.my-proxy]]
1097/// model_match = "*"
1098/// native_tools = true
1099/// tool_search = ["hosted"]
1100/// ```
1101pub fn set_user_overrides_from_manifest_toml(src: &str) -> Result<(), String> {
1102    #[derive(Deserialize)]
1103    struct Manifest {
1104        #[serde(default)]
1105        capabilities: Option<CapabilitiesFile>,
1106    }
1107    let parsed: Manifest = toml::from_str(src).map_err(|e| e.to_string())?;
1108    set_user_overrides(parsed.capabilities);
1109    Ok(())
1110}
1111
1112/// Look up effective capabilities for a `(provider, model)` pair.
1113/// Walks the provider_family chain until it finds a rule list that
1114/// matches. Within any one provider's rule list, user overrides are
1115/// consulted before the built-in rules. The first matching rule wins —
1116/// later rules (and later layers in the family chain) are ignored —
1117/// unless it sets `extends = true`, in which case it contributes only the
1118/// fields it explicitly sets and resolution continues to later matching
1119/// rules (and ultimately provider / built-in defaults) to fill the rest.
1120pub fn lookup(provider: &str, model: &str) -> Capabilities {
1121    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1122    lookup_with_user_overrides(provider, model, user.as_ref())
1123}
1124
1125pub fn lookup_with_user_overrides(
1126    provider: &str,
1127    model: &str,
1128    user_overrides: Option<&CapabilitiesFile>,
1129) -> Capabilities {
1130    let mut caps = lookup_with(provider, model, builtin(), user_overrides);
1131    if provider != "openai" && provider != "mock" {
1132        caps.responses_api = false;
1133        caps.hosted_tools.clear();
1134        caps.remote_mcp = false;
1135        caps.conversation_state = false;
1136        caps.compaction = false;
1137        caps.background_mode = false;
1138        caps.tool_approval_policy = None;
1139    }
1140    caps
1141}
1142
1143/// The wire channel a `tool_format` string flows through. `native` is the
1144/// provider's structured `tool_calls` JSON channel; `text` and `json` are
1145/// text-channel grammars carried in assistant content. Mirrors
1146/// `llm_config::ToolFormatChannel`, kept local so the capability registry
1147/// (the single source of truth for tool-call dialect validity) has no
1148/// dependency on the resolver.
1149#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1150pub enum ToolFormatWire {
1151    /// Provider-native JSON tool calling (`tool_format = "native"`).
1152    Native,
1153    /// A text-channel grammar (`tool_format = "text"` or `"json"`).
1154    Text,
1155}
1156
1157impl ToolFormatWire {
1158    /// Classify a `tool_format` string. Returns `None` for unknown values so
1159    /// callers can reject typos loudly rather than guessing a channel.
1160    pub fn classify(tool_format: &str) -> Option<Self> {
1161        match tool_format {
1162            "native" => Some(Self::Native),
1163            "text" | "json" => Some(Self::Text),
1164            _ => None,
1165        }
1166    }
1167}
1168
1169/// Outcome of validating a requested `(provider, model, tool_format)` combo
1170/// against the capability registry's tool-call dialect validity model.
1171///
1172/// This is the FOOTGUN-REMOVAL contract: a harness developer can ask for any
1173/// tool_format, and the registry guarantees the resolved format is one that
1174/// actually yields parseable tool calls for that route — auto-correcting a
1175/// known-broken combo (e.g. a `native` pin on a `native_unreliable` route that
1176/// silently drops to unparsed DSML text) and explaining why.
1177#[derive(Debug, Clone, PartialEq, Eq)]
1178pub struct ToolFormatDecision {
1179    /// The tool_format that should actually be used on the wire. Equal to the
1180    /// requested format when the combo was already valid; otherwise the
1181    /// registry's `preferred_tool_format` for the route.
1182    pub effective: String,
1183    /// Set when the requested format was overridden. Human-readable, names the
1184    /// bad combo and the working alternative — surface this to the harness
1185    /// developer so vanishing tool calls are never silent.
1186    pub correction: Option<String>,
1187}
1188
1189impl ToolFormatDecision {
1190    fn accepted(format: String) -> Self {
1191        Self {
1192            effective: format,
1193            correction: None,
1194        }
1195    }
1196}
1197
1198/// True when a route's `tool_mode_parity` says the native (provider JSON)
1199/// channel cannot be trusted to yield parseable tool calls. `unsupported`
1200/// (no working channel) is intentionally excluded: there is no better format
1201/// to steer to, so the gate leaves such a route alone rather than rewriting to
1202/// another broken channel under a misleading "Using X instead" message.
1203fn parity_forbids_native(parity: &str) -> bool {
1204    matches!(parity, "native_unreliable" | "text_only")
1205}
1206
1207/// True when a route's `tool_mode_parity` says a text-channel grammar cannot be
1208/// trusted to yield parseable tool calls. See [`parity_forbids_native`] for why
1209/// `unsupported` is excluded.
1210fn parity_forbids_text(parity: &str) -> bool {
1211    matches!(parity, "text_unreliable" | "native_only")
1212}
1213
1214/// True when the requested wire channel is known not to return parseable tool
1215/// calls for a route. The gate auto-corrects only on *positive* evidence of
1216/// breakage, never on a "we don't know" default:
1217///
1218/// - `tool_mode_parity` is an explicit verdict (`parity_forbids_*`).
1219/// - `text_tool_wire_format_supported = false` is an explicit declaration that
1220///   the text channel does not survive this route (e.g. native-only local
1221///   Ollama Qwen3 rows that omit a parity string). It defaults to `true`, so an
1222///   unknown route is never wrongly judged text-broken.
1223///
1224/// `native_tools` is deliberately NOT consulted here: it defaults to `false`
1225/// for unknown providers, so treating `!native_tools` as "native is broken"
1226/// would wrongly rewrite a custom proxy that does support native tools. The
1227/// hard `native` + `!native_tools` capability gate in `extract_llm_options`
1228/// already rejects a genuine native-on-non-native mismatch loudly.
1229fn channel_forbidden(wire: ToolFormatWire, caps: &Capabilities) -> bool {
1230    let parity = caps.tool_mode_parity.as_deref().unwrap_or("unknown");
1231    match wire {
1232        ToolFormatWire::Native => parity_forbids_native(parity),
1233        ToolFormatWire::Text => {
1234            parity_forbids_text(parity) || !caps.text_tool_wire_format_supported
1235        }
1236    }
1237}
1238
1239/// Validate (and, where the registry knows better, auto-correct) a requested
1240/// `tool_format` for a `(provider, model)` route.
1241///
1242/// This is the single enforcement seam for tool-call dialect validity. The
1243/// capability registry already declares, per route, which channel actually
1244/// returns parseable tool calls (`tool_mode_parity`) and which format to use
1245/// (`preferred_tool_format`). Before this function those fields were advisory
1246/// metadata that any alias pin or explicit `--tool-format` flag could silently
1247/// override — the footgun behind the DeepSeek V3.2 DSML "vanishing tool calls"
1248/// dead-abstain. Now any combo whose requested channel is forbidden — by the
1249/// route's `tool_mode_parity` verdict OR by an explicit
1250/// `text_tool_wire_format_supported = false` declaration — is rewritten to a
1251/// working channel (preferring the route's `preferred_tool_format`), with a
1252/// `correction` message naming both. Unknown formats, routes with no adverse
1253/// signal (`unknown`/`interchangeable`), and routes with no working channel at
1254/// all pass through unchanged.
1255pub fn validate_tool_format(provider: &str, model: &str, requested: &str) -> ToolFormatDecision {
1256    let caps = lookup(provider, model);
1257    validate_tool_format_with_caps(provider, model, requested, &caps)
1258}
1259
1260/// `validate_tool_format` against an already-resolved [`Capabilities`], so hot
1261/// callers that already hold one avoid a second matrix lookup.
1262pub fn validate_tool_format_with_caps(
1263    provider: &str,
1264    model: &str,
1265    requested: &str,
1266    caps: &Capabilities,
1267) -> ToolFormatDecision {
1268    // Unknown / unclassifiable formats are not ours to second-guess — the
1269    // exhaustive-match guard elsewhere already rejects typos loudly.
1270    let Some(wire) = ToolFormatWire::classify(requested) else {
1271        return ToolFormatDecision::accepted(requested.to_string());
1272    };
1273
1274    if !channel_forbidden(wire, caps) {
1275        return ToolFormatDecision::accepted(requested.to_string());
1276    }
1277
1278    // The requested channel is known-broken for this route. Pick the opposite
1279    // channel as the steer target, preferring the route's declared
1280    // `preferred_tool_format` when it lands on a channel that is itself not
1281    // forbidden. If BOTH channels are forbidden (a route with no working tool
1282    // surface), there is nothing better to offer — pass the request through
1283    // unchanged rather than rewrite to an equally-broken format under a
1284    // misleading correction message.
1285    let opposite = match wire {
1286        ToolFormatWire::Native => ToolFormatWire::Text,
1287        ToolFormatWire::Text => ToolFormatWire::Native,
1288    };
1289    if channel_forbidden(opposite, caps) {
1290        return ToolFormatDecision::accepted(requested.to_string());
1291    }
1292    let preferred = caps
1293        .preferred_tool_format
1294        .clone()
1295        .filter(|fmt| ToolFormatWire::classify(fmt) == Some(opposite))
1296        .unwrap_or_else(|| match opposite {
1297            ToolFormatWire::Native => "native".to_string(),
1298            ToolFormatWire::Text => "json".to_string(),
1299        });
1300
1301    let parity = caps.tool_mode_parity.as_deref().unwrap_or("unknown");
1302    let mut correction = format!(
1303        "tool_format `{requested}` is not safe for {provider}/{model} \
1304         (tool_mode_parity = `{parity}`): this route does not return parseable \
1305         tool calls on the {} channel, so calls would silently vanish. \
1306         Using `{preferred}` instead.",
1307        match wire {
1308            ToolFormatWire::Native => "provider-native",
1309            ToolFormatWire::Text => "text",
1310        }
1311    );
1312    if let Some(note) = caps.tool_mode_parity_notes.as_deref() {
1313        if !note.is_empty() {
1314            correction.push_str(" (");
1315            correction.push_str(note);
1316            correction.push(')');
1317        }
1318    }
1319
1320    ToolFormatDecision {
1321        effective: preferred,
1322        correction: Some(correction),
1323    }
1324}
1325
1326/// FOOTGUN-REMOVAL — fail fast when a `(provider, model)` route has NO viable
1327/// tool channel at all: the registry forbids both the provider-native channel
1328/// AND every text-channel grammar. `validate_tool_format` deliberately passes
1329/// such a route through unchanged (it has no *better* format to steer to and
1330/// must not rewrite to an equally-broken one under a misleading "Using X
1331/// instead" message); but a tool-bearing call dispatched on a route with no
1332/// working channel can only produce a silent empty tool stream. This guard lets
1333/// the call seam reject that combo BEFORE dispatch with an actionable message —
1334/// naming the bad `(provider, model)` and a suggested alternative provider for
1335/// the same model family — instead of billing a noncommittal completion.
1336///
1337/// Returns `Some(message)` only when both channels are forbidden (e.g. a route
1338/// flagged `native_unreliable` whose text channel is also declared unsupported,
1339/// or one explicitly pinned `tool_mode_parity = "unsupported"`). Returns `None`
1340/// for every route that still has at least one working channel, so it never
1341/// fires on the auto-correctable DeepInfra/SambaNova gpt-oss rows (those keep a
1342/// working text channel) or on any healthy route. Modeled on the same
1343/// `channel_forbidden` machinery `validate_tool_format` uses, so the two stay in
1344/// lock-step: the gate auto-corrects when one channel works and fails fast when
1345/// neither does.
1346pub fn no_viable_tool_channel(provider: &str, model: &str) -> Option<String> {
1347    let caps = lookup(provider, model);
1348    no_viable_tool_channel_with_caps(provider, model, &caps)
1349}
1350
1351/// `no_viable_tool_channel` against an already-resolved [`Capabilities`], so hot
1352/// callers that already hold one avoid a second matrix lookup.
1353pub fn no_viable_tool_channel_with_caps(
1354    provider: &str,
1355    model: &str,
1356    caps: &Capabilities,
1357) -> Option<String> {
1358    let native_forbidden = channel_forbidden(ToolFormatWire::Native, caps);
1359    let text_forbidden = channel_forbidden(ToolFormatWire::Text, caps);
1360    if !(native_forbidden && text_forbidden) {
1361        return None;
1362    }
1363    let parity = caps.tool_mode_parity.as_deref().unwrap_or("unknown");
1364    let mut message = format!(
1365        "no viable tool-calling channel for {provider}/{model} \
1366         (tool_mode_parity = `{parity}`): the registry trusts neither the \
1367         provider-native `tool_calls` channel nor a text-channel grammar to \
1368         return parseable tool calls on this route, so a tool-bearing call here \
1369         can only emit a silent empty tool stream. {}",
1370        suggested_alternative_provider_hint(model)
1371    );
1372    if let Some(note) = caps.tool_mode_parity_notes.as_deref() {
1373        if !note.is_empty() {
1374            message.push_str(" (");
1375            message.push_str(note);
1376            message.push(')');
1377        }
1378    }
1379    Some(message)
1380}
1381
1382/// A short, actionable "try this provider instead" hint for a model whose
1383/// current route has no viable tool channel. gpt-oss (Harmony) is the canonical
1384/// case: its native channel is a footgun on several pay-per-token routes, so
1385/// steer callers to the channels Harn has proven clean (Fireworks/DeepInfra/
1386/// SambaNova on TEXT, or a native-clean route). Generic for everything else.
1387fn suggested_alternative_provider_hint(model: &str) -> String {
1388    if model.to_ascii_lowercase().contains("gpt-oss") {
1389        "For gpt-oss (Harmony), use a TEXT-channel route (e.g. \
1390         `fireworks`/`deepinfra`/`sambanova` gpt-oss, which Harn pins to \
1391         `tool_format = \"text\"`) or a native-clean route; the provider-native \
1392         Harmony channel drops tool calls into the reasoning channel."
1393            .to_string()
1394    } else {
1395        "Pick a provider whose route for this model has a working native or \
1396         text tool channel (see `harn provider catalog matrix`)."
1397            .to_string()
1398    }
1399}
1400
1401/// Return the currently-effective provider capability rule matrix. User
1402/// override rows, when installed for the current thread, are emitted before
1403/// built-in rows so the display mirrors lookup precedence.
1404pub fn matrix_rows() -> Vec<ProviderCapabilityMatrixRow> {
1405    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1406    let mut rows = Vec::new();
1407    if let Some(user) = user.as_ref() {
1408        push_matrix_rows(&mut rows, user, "project");
1409    }
1410    push_matrix_rows(&mut rows, builtin(), "builtin");
1411    rows
1412}
1413
1414/// Audit the currently effective provider/model catalog against the currently
1415/// effective capability rules. This is the user-facing path used by the CLI
1416/// when authors are adding provider catalog or capability override rows.
1417pub fn audit_catalogued_chat_model_tool_capabilities() -> ToolCapabilityAuditReport {
1418    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1419    audit_tool_capability_coverage(
1420        crate::llm_config::model_catalog_entries(),
1421        builtin(),
1422        user.as_ref(),
1423    )
1424}
1425
1426/// Audit the built-in catalog only. The CI test uses this path so external
1427/// provider config cannot hide a gap in the shipped TOML assets.
1428pub fn audit_builtin_catalogued_chat_model_tool_capabilities() -> ToolCapabilityAuditReport {
1429    let catalog = crate::llm_config::parse_config_toml(BUILTIN_PROVIDERS_TOML)
1430        .expect("providers.toml must parse at build time");
1431    audit_tool_capability_coverage(catalog.models, builtin(), None)
1432}
1433
1434fn audit_tool_capability_coverage<I>(
1435    models: I,
1436    builtin: &CapabilitiesFile,
1437    user: Option<&CapabilitiesFile>,
1438) -> ToolCapabilityAuditReport
1439where
1440    I: IntoIterator<Item = (String, crate::llm_config::ModelDef)>,
1441{
1442    let mut gaps = Vec::new();
1443    let mut audited_models = 0;
1444
1445    for (model_id, model) in models {
1446        if model.pricing.is_none() {
1447            continue;
1448        }
1449        audited_models += 1;
1450        let matched = first_matching_rule(user, builtin, &model.provider, &model_id);
1451        let mut missing_fields = Vec::new();
1452        match matched.as_ref().map(|matched| &matched.rule) {
1453            Some(rule) => {
1454                if rule.native_tools.is_none() {
1455                    missing_fields.push("native_tools".to_string());
1456                }
1457                if rule.preferred_tool_format.is_none() {
1458                    missing_fields.push("preferred_tool_format".to_string());
1459                }
1460            }
1461            None => {
1462                missing_fields.push("native_tools".to_string());
1463                missing_fields.push("preferred_tool_format".to_string());
1464            }
1465        }
1466        if missing_fields.is_empty() {
1467            continue;
1468        }
1469
1470        let (suggested_native_tools, suggested_preferred_tool_format) =
1471            suggested_tool_capability_defaults(
1472                &model.provider,
1473                &model_id,
1474                &model,
1475                matched.as_ref(),
1476            );
1477        gaps.push(ToolCapabilityAuditGap {
1478            provider: model.provider,
1479            model: model_id,
1480            rule_provider: matched.as_ref().map(|matched| matched.provider.clone()),
1481            // Honest per-rule provenance: an `extends` fall-through chain
1482            // reports every absorbed rule pattern in precedence order, not a
1483            // fake single source row.
1484            rule_model_match: matched.map(|matched| matched.matched_patterns.join(" -> ")),
1485            missing_fields,
1486            suggested_native_tools,
1487            suggested_preferred_tool_format,
1488        });
1489    }
1490
1491    gaps.sort_by(|left, right| {
1492        left.provider
1493            .cmp(&right.provider)
1494            .then_with(|| left.model.cmp(&right.model))
1495    });
1496    ToolCapabilityAuditReport {
1497        audited_models,
1498        gaps,
1499    }
1500}
1501
1502struct MatchedCapabilityRule {
1503    /// Provider layer of the first (highest-precedence) matched rule.
1504    provider: String,
1505    /// Effective rule: the first match, with fields it left unset filled from
1506    /// later matching rules while the chain opted into `extends` fall-through.
1507    rule: ProviderRule,
1508    /// `model_match` patterns of every absorbed rule, in precedence order.
1509    /// A single entry unless the first match set `extends = true`.
1510    matched_patterns: Vec<String>,
1511}
1512
1513/// Accumulates matching rules along the resolution walk (user rules before
1514/// built-in rules within a layer, then the `provider_family` chain). The
1515/// first matched rule has the highest precedence; later matches only fill
1516/// fields the accumulated chain left unset, and only while every absorbed
1517/// rule so far opted into `extends` fall-through.
1518#[derive(Default)]
1519struct RuleResolution {
1520    /// Provider layer of the first matched rule.
1521    provider: Option<String>,
1522    merged: Option<ProviderRule>,
1523    /// `model_match` provenance of every absorbed rule, in precedence order.
1524    matched_patterns: Vec<String>,
1525}
1526
1527impl RuleResolution {
1528    /// Merge `rule` into the accumulator. Returns `true` when the walk must
1529    /// terminate: the rule does not opt into `extends` fall-through, which is
1530    /// exactly the pre-`extends` first-match-wins behavior.
1531    fn absorb(&mut self, layer_provider: &str, rule: &ProviderRule) -> bool {
1532        if self.provider.is_none() {
1533            self.provider = Some(layer_provider.to_string());
1534        }
1535        self.matched_patterns.push(rule.model_match.clone());
1536        match &mut self.merged {
1537            None => self.merged = Some(rule.clone()),
1538            Some(merged) => merged.fill_missing_from(rule),
1539        }
1540        !rule.extends
1541    }
1542
1543    fn into_matched(self) -> Option<MatchedCapabilityRule> {
1544        Some(MatchedCapabilityRule {
1545            provider: self.provider?,
1546            rule: self.merged.expect("merged is set whenever provider is set"),
1547            matched_patterns: self.matched_patterns,
1548        })
1549    }
1550}
1551
1552/// Scan the ordered rule list for `layer_provider` (user rules first, then
1553/// built-in rules), absorbing every matching rule into `resolution` until a
1554/// terminating (non-`extends`) match. Returns `true` when resolution
1555/// terminated within this layer.
1556fn absorb_layer_matches(
1557    user: Option<&CapabilitiesFile>,
1558    builtin: &CapabilitiesFile,
1559    layer_provider: &str,
1560    model: &str,
1561    resolution: &mut RuleResolution,
1562) -> bool {
1563    for file in user.into_iter().chain(std::iter::once(builtin)) {
1564        if let Some(rules) = file.provider.get(layer_provider) {
1565            for rule in rules {
1566                if rule_matches(rule, model) && resolution.absorb(layer_provider, rule) {
1567                    return true;
1568                }
1569            }
1570        }
1571    }
1572    false
1573}
1574
1575/// Walk provider → family(provider) → … with a visited-guard, absorbing
1576/// matching rules into a [`RuleResolution`] and accumulating per-layer
1577/// provider defaults (earlier layers win) exactly as far as the walk gets.
1578/// Stops at the first non-`extends` match, so a terminating match at layer N
1579/// never consults defaults from layers past N — the pre-`extends` behavior.
1580/// An unterminated `extends` chain keeps walking so later layers can fill
1581/// its gaps.
1582fn resolve_rule_chain(
1583    user: Option<&CapabilitiesFile>,
1584    builtin: &CapabilitiesFile,
1585    provider: &str,
1586    model: &str,
1587) -> (RuleResolution, ProviderDefaults) {
1588    let mut resolution = RuleResolution::default();
1589    let mut effective_defaults = ProviderDefaults::default();
1590    let mut current = provider.to_string();
1591    let mut visited = HashSet::new();
1592    while visited.insert(current.clone()) {
1593        let layer_defaults = merged_provider_defaults(user, builtin, &current);
1594        if effective_defaults.has_any_field() {
1595            effective_defaults.fill_missing_from(&layer_defaults);
1596        } else {
1597            effective_defaults.overlay(&layer_defaults);
1598        }
1599        if absorb_layer_matches(user, builtin, &current, model, &mut resolution) {
1600            break;
1601        }
1602        let next = user
1603            .and_then(|file| file.provider_family.get(&current))
1604            .or_else(|| builtin.provider_family.get(&current))
1605            .cloned();
1606        match next {
1607            Some(parent) => current = parent,
1608            None => break,
1609        }
1610    }
1611    (resolution, effective_defaults)
1612}
1613
1614fn first_matching_rule(
1615    user: Option<&CapabilitiesFile>,
1616    builtin: &CapabilitiesFile,
1617    provider: &str,
1618    model: &str,
1619) -> Option<MatchedCapabilityRule> {
1620    resolve_rule_chain(user, builtin, provider, model)
1621        .0
1622        .into_matched()
1623}
1624
1625fn suggested_tool_capability_defaults(
1626    provider: &str,
1627    model_id: &str,
1628    model: &crate::llm_config::ModelDef,
1629    matched: Option<&MatchedCapabilityRule>,
1630) -> (bool, String) {
1631    if let Some(rule) = matched.map(|matched| &matched.rule) {
1632        let native_tools = rule.native_tools.unwrap_or_else(|| {
1633            // Resolve native_tools from the pinned tool_format via its channel
1634            // so `json` (a TEXT-channel format) correctly implies
1635            // native_tools = false, identically to `text`. Falling through to
1636            // the provider heuristic for `json` would wrongly mark a gemini /
1637            // cerebras row native. Unknown formats keep the heuristic.
1638            match rule
1639                .preferred_tool_format
1640                .as_deref()
1641                .and_then(crate::llm_config::tool_format_channel)
1642            {
1643                Some(crate::llm_config::ToolFormatChannel::Native) => true,
1644                Some(crate::llm_config::ToolFormatChannel::Text) => false,
1645                None => suggested_native_tools(provider, model_id, model),
1646            }
1647        });
1648        let preferred_tool_format = rule
1649            .preferred_tool_format
1650            .clone()
1651            .unwrap_or_else(|| tool_format_for_native(native_tools));
1652        return (native_tools, preferred_tool_format);
1653    }
1654
1655    let native_tools = suggested_native_tools(provider, model_id, model);
1656    (native_tools, tool_format_for_native(native_tools))
1657}
1658
1659fn suggested_native_tools(
1660    provider: &str,
1661    model_id: &str,
1662    model: &crate::llm_config::ModelDef,
1663) -> bool {
1664    if provider == "anthropic" || model_id.contains("claude") {
1665        return true;
1666    }
1667    if matches!(
1668        provider,
1669        "openai" | "gemini" | "cerebras" | "bedrock" | "azure_openai" | "vertex"
1670    ) {
1671        return true;
1672    }
1673    model
1674        .capabilities
1675        .iter()
1676        .any(|capability| capability == "tools")
1677}
1678
1679/// The derived `preferred_tool_format` for a capability row (or unmatched
1680/// model) that does not pin one. Native-capable models derive `native`;
1681/// text-channel models derive `json` (fenced-JSON), the GLOBAL text-channel
1682/// default. Heredoc (`text`) is never auto-derived — it is reachable only via
1683/// an explicit `preferred_tool_format = "text"` pin or an explicit request (the
1684/// reverse safety valve). This is the primary default site: it fires for every
1685/// model that matches a capability row without an explicit format pin.
1686fn tool_format_for_native(native_tools: bool) -> String {
1687    if native_tools {
1688        "native".to_string()
1689    } else {
1690        "json".to_string()
1691    }
1692}
1693
1694fn push_matrix_rows(
1695    rows: &mut Vec<ProviderCapabilityMatrixRow>,
1696    file: &CapabilitiesFile,
1697    source: &str,
1698) {
1699    for (provider, rules) in &file.provider {
1700        for rule in rules {
1701            rows.push(rule_to_matrix_row(provider, rule, source));
1702        }
1703    }
1704}
1705
1706fn rule_to_matrix_row(
1707    provider: &str,
1708    rule: &ProviderRule,
1709    source: &str,
1710) -> ProviderCapabilityMatrixRow {
1711    ProviderCapabilityMatrixRow {
1712        provider: provider.to_string(),
1713        model: rule.model_match.clone(),
1714        version_min: rule.version_min.clone(),
1715        extends: rule.extends,
1716        thinking: rule_thinking_modes(rule),
1717        vision: rule_vision(rule),
1718        audio: rule.audio.unwrap_or(false),
1719        pdf: rule.pdf.unwrap_or(false),
1720        video: rule.video.unwrap_or(false),
1721        streaming: true,
1722        files_api_supported: rule.files_api_supported.unwrap_or(false),
1723        json_schema: rule_structured_output(rule),
1724        prefers_xml_scaffolding: rule.prefers_xml_scaffolding.unwrap_or(false),
1725        reserved_tool_call_token: rule.reserved_tool_call_token.unwrap_or(false),
1726        prefers_markdown_scaffolding: rule.prefers_markdown_scaffolding.unwrap_or(false),
1727        structured_output_mode: rule_structured_output_mode(rule),
1728        supports_assistant_prefill: rule.supports_assistant_prefill.unwrap_or(false),
1729        prefers_role_developer: rule
1730            .prefers_role_developer
1731            .unwrap_or_else(|| rule.requires_completion_tokens.unwrap_or(false)),
1732        prefers_xml_tools: rule.prefers_xml_tools.unwrap_or(false),
1733        thinking_block_style: rule_thinking_block_style(rule),
1734        native_tools: rule.native_tools.unwrap_or(false),
1735        text_tools: rule.text_tool_wire_format_supported.unwrap_or(true),
1736        preferred_tool_format: rule_preferred_tool_format(rule),
1737        tool_mode_parity: rule_tool_mode_parity(rule),
1738        tools: rule.native_tools.unwrap_or(false)
1739            || rule.text_tool_wire_format_supported.unwrap_or(true),
1740        cache: rule.prompt_caching.unwrap_or(false),
1741        serving_precision: rule
1742            .serving_precision
1743            .clone()
1744            .unwrap_or_else(|| "unverified".to_string()),
1745        source: source.to_string(),
1746    }
1747}
1748
1749fn rule_thinking_modes(rule: &ProviderRule) -> Vec<String> {
1750    rule.thinking_modes.clone().unwrap_or_else(|| {
1751        if rule.thinking.unwrap_or(false) {
1752            vec!["enabled".to_string()]
1753        } else {
1754            Vec::new()
1755        }
1756    })
1757}
1758
1759fn rule_vision(rule: &ProviderRule) -> bool {
1760    rule.vision.or(rule.vision_supported).unwrap_or(false)
1761}
1762
1763fn lookup_with(
1764    provider: &str,
1765    model: &str,
1766    builtin: &CapabilitiesFile,
1767    user: Option<&CapabilitiesFile>,
1768) -> Capabilities {
1769    // Special case: mock spoofs either shape. Try anthropic first
1770    // (Claude-shape model strings) so `mock` + `claude-opus-4-7`
1771    // resolves to the Anthropic capability row — the same behaviour
1772    // the hardcoded dispatch gave before this refactor. The native
1773    // tool-definition wire shape is pinned to OpenAI so existing
1774    // mock-based tests keep observing `t.function.name` regardless of
1775    // which family's capability row matched; per-message wire format
1776    // still tracks the matched family so Anthropic-specific request
1777    // plumbing (beta headers, file-id passthrough) is exercised when
1778    // a Claude model is mocked.
1779    if provider == "mock" {
1780        for family in ["anthropic", "openai", "gemini"] {
1781            let defaults = merged_provider_defaults(user, builtin, family);
1782            let mut resolution = RuleResolution::default();
1783            absorb_layer_matches(user, builtin, family, model, &mut resolution);
1784            if let Some(rule) = resolution.merged.as_ref() {
1785                let mut caps = rule_to_caps(rule, &defaults);
1786                if family == "anthropic" {
1787                    caps.native_tool_wire_format = "openai".to_string();
1788                }
1789                return caps;
1790            }
1791        }
1792        return Capabilities::default();
1793    }
1794
1795    // Normal chain: walk provider → family(provider) → ... with a
1796    // visited-guard to avoid cycles in malformed user overrides.
1797    let (resolution, effective_defaults) = resolve_rule_chain(user, builtin, provider, model);
1798    if let Some(rule) = resolution.merged.as_ref() {
1799        return rule_to_caps(rule, &effective_defaults);
1800    }
1801    if effective_defaults.has_any_field() {
1802        return defaults_to_caps(&effective_defaults);
1803    }
1804    Capabilities::default()
1805}
1806
1807fn merged_provider_defaults(
1808    user: Option<&CapabilitiesFile>,
1809    builtin: &CapabilitiesFile,
1810    provider: &str,
1811) -> ProviderDefaults {
1812    let mut defaults = builtin
1813        .provider_defaults
1814        .get(provider)
1815        .cloned()
1816        .unwrap_or_default();
1817    if let Some(user_defaults) = user.and_then(|file| file.provider_defaults.get(provider)) {
1818        defaults.overlay(user_defaults);
1819    }
1820    defaults
1821}
1822
1823fn defaults_to_caps(defaults: &ProviderDefaults) -> Capabilities {
1824    let empty = ProviderRule {
1825        model_match: "*".to_string(),
1826        version_min: None,
1827        extends: false,
1828        native_tools: None,
1829        message_wire_format: None,
1830        native_tool_wire_format: None,
1831        defer_loading: None,
1832        tool_search: None,
1833        responses_api: None,
1834        hosted_tools: None,
1835        remote_mcp: None,
1836        conversation_state: None,
1837        compaction: None,
1838        background_mode: None,
1839        tool_approval_policy: None,
1840        max_tools: None,
1841        prompt_caching: None,
1842        cache_breakpoint_style: None,
1843        vision: None,
1844        audio: None,
1845        pdf: None,
1846        video: None,
1847        files_api_supported: None,
1848        file_upload_wire_format: None,
1849        structured_output: None,
1850        prefers_xml_scaffolding: None,
1851        reserved_tool_call_token: None,
1852        prefers_markdown_scaffolding: None,
1853        structured_output_mode: None,
1854        supports_assistant_prefill: None,
1855        prefers_role_developer: None,
1856        prefers_xml_tools: None,
1857        thinking_block_style: None,
1858        json_schema: None,
1859        thinking_modes: None,
1860        interleaved_thinking_supported: None,
1861        anthropic_beta_features: None,
1862        thinking: None,
1863        vision_supported: None,
1864        image_url_input_supported: None,
1865        preserve_thinking: None,
1866        server_parser: None,
1867        honors_chat_template_kwargs: None,
1868        chat_template_options_field: None,
1869        requires_completion_tokens: None,
1870        requires_streaming: None,
1871        reasoning_effort_supported: None,
1872        reasoning_effort_levels: None,
1873        reasoning_none_supported: None,
1874        max_thinking_budget: None,
1875        reasoning_disable_supported: None,
1876        reasoning_required_for_tools: None,
1877        reasoning_text_promotable: None,
1878        reasoning_wire_format: None,
1879        seed_supported: None,
1880        top_k_supported: None,
1881        temperature_supported: None,
1882        top_p_supported: None,
1883        frequency_penalty_supported: None,
1884        presence_penalty_supported: None,
1885        allowed_tool_choice_modes: None,
1886        requires_tool_result_adjacency: None,
1887        supports_parallel_tool_calls: None,
1888        tools_exclude_response_format: None,
1889        recommended_endpoint: None,
1890        text_tool_wire_format_supported: None,
1891        preferred_tool_format: None,
1892        tool_mode_parity: None,
1893        tool_mode_parity_notes: None,
1894        thinking_disable_directive: None,
1895        auto_reasoning_overrides: None,
1896        provider_route_denylist: None,
1897        openrouter_provider_order: None,
1898        serving_precision: None,
1899    };
1900    let mut caps = rule_to_caps(&empty, defaults);
1901    caps.preferred_tool_format = None;
1902    caps.tool_mode_parity = None;
1903    caps
1904}
1905
1906fn rule_to_caps(rule: &ProviderRule, defaults: &ProviderDefaults) -> Capabilities {
1907    let thinking_modes = rule_thinking_modes(rule);
1908    Capabilities {
1909        native_tools: rule.native_tools.unwrap_or(false),
1910        message_wire_format: rule
1911            .message_wire_format
1912            .clone()
1913            .or_else(|| defaults.message_wire_format.clone())
1914            .unwrap_or_else(|| "openai".to_string()),
1915        native_tool_wire_format: rule
1916            .native_tool_wire_format
1917            .clone()
1918            .or_else(|| defaults.native_tool_wire_format.clone())
1919            .unwrap_or_else(|| "openai".to_string()),
1920        defer_loading: rule.defer_loading.unwrap_or(false),
1921        tool_search: rule.tool_search.clone().unwrap_or_default(),
1922        responses_api: rule.responses_api.unwrap_or(false),
1923        hosted_tools: rule.hosted_tools.clone().unwrap_or_default(),
1924        remote_mcp: rule.remote_mcp.unwrap_or(false),
1925        conversation_state: rule.conversation_state.unwrap_or(false),
1926        compaction: rule.compaction.unwrap_or(false),
1927        background_mode: rule.background_mode.unwrap_or(false),
1928        tool_approval_policy: rule.tool_approval_policy.clone(),
1929        max_tools: rule.max_tools,
1930        prompt_caching: rule.prompt_caching.unwrap_or(false),
1931        cache_breakpoint_style: rule
1932            .cache_breakpoint_style
1933            .clone()
1934            .unwrap_or_else(|| "none".to_string()),
1935        vision: rule_vision(rule),
1936        audio: rule.audio.unwrap_or(false),
1937        pdf: rule.pdf.unwrap_or(false),
1938        video: rule.video.unwrap_or(false),
1939        files_api_supported: rule
1940            .files_api_supported
1941            .or(defaults.files_api_supported)
1942            .unwrap_or(false),
1943        file_upload_wire_format: rule
1944            .file_upload_wire_format
1945            .clone()
1946            .or_else(|| defaults.file_upload_wire_format.clone()),
1947        structured_output: rule_structured_output(rule),
1948        json_schema: rule_structured_output(rule),
1949        prefers_xml_scaffolding: rule.prefers_xml_scaffolding.unwrap_or(false),
1950        reserved_tool_call_token: rule.reserved_tool_call_token.unwrap_or(false),
1951        prefers_markdown_scaffolding: rule.prefers_markdown_scaffolding.unwrap_or(false),
1952        structured_output_mode: rule_structured_output_mode(rule),
1953        supports_assistant_prefill: rule.supports_assistant_prefill.unwrap_or(false),
1954        prefers_role_developer: rule.prefers_role_developer.unwrap_or(false),
1955        prefers_xml_tools: rule.prefers_xml_tools.unwrap_or(false),
1956        thinking_block_style: rule_thinking_block_style(rule),
1957        thinking_modes,
1958        interleaved_thinking_supported: rule.interleaved_thinking_supported.unwrap_or(false),
1959        anthropic_beta_features: rule.anthropic_beta_features.clone().unwrap_or_default(),
1960        vision_supported: rule.vision_supported.unwrap_or(false),
1961        image_url_input_supported: rule
1962            .image_url_input_supported
1963            .or(defaults.image_url_input_supported)
1964            .unwrap_or(true),
1965        preserve_thinking: rule.preserve_thinking.unwrap_or(false),
1966        server_parser: rule
1967            .server_parser
1968            .clone()
1969            .unwrap_or_else(|| "none".to_string()),
1970        honors_chat_template_kwargs: rule.honors_chat_template_kwargs.unwrap_or(false),
1971        chat_template_options_field: rule.chat_template_options_field.clone(),
1972        requires_completion_tokens: rule.requires_completion_tokens.unwrap_or(false),
1973        requires_streaming: rule.requires_streaming.unwrap_or(false),
1974        reasoning_effort_supported: rule.reasoning_effort_supported.unwrap_or(false),
1975        reasoning_effort_levels: rule.reasoning_effort_levels.clone().unwrap_or_default(),
1976        reasoning_none_supported: rule.reasoning_none_supported.unwrap_or(false),
1977        max_thinking_budget: rule.max_thinking_budget,
1978        reasoning_disable_supported: rule.reasoning_disable_supported.unwrap_or(true),
1979        reasoning_required_for_tools: rule.reasoning_required_for_tools.unwrap_or(false),
1980        reasoning_text_promotable: rule.reasoning_text_promotable.unwrap_or(true),
1981        reasoning_wire_format: rule
1982            .reasoning_wire_format
1983            .clone()
1984            .or_else(|| defaults.reasoning_wire_format.clone()),
1985        seed_supported: rule
1986            .seed_supported
1987            .or(defaults.seed_supported)
1988            .unwrap_or(true),
1989        top_k_supported: rule
1990            .top_k_supported
1991            .or(defaults.top_k_supported)
1992            .unwrap_or(true),
1993        temperature_supported: rule
1994            .temperature_supported
1995            .or(defaults.temperature_supported)
1996            .unwrap_or(true),
1997        top_p_supported: rule
1998            .top_p_supported
1999            .or(defaults.top_p_supported)
2000            .unwrap_or(true),
2001        frequency_penalty_supported: rule
2002            .frequency_penalty_supported
2003            .or(defaults.frequency_penalty_supported)
2004            .unwrap_or(true),
2005        presence_penalty_supported: rule
2006            .presence_penalty_supported
2007            .or(defaults.presence_penalty_supported)
2008            .unwrap_or(true),
2009        allowed_tool_choice_modes: rule.allowed_tool_choice_modes.clone().unwrap_or_default(),
2010        requires_tool_result_adjacency: rule.requires_tool_result_adjacency.unwrap_or(false),
2011        supports_parallel_tool_calls: rule.supports_parallel_tool_calls.unwrap_or(true),
2012        tools_exclude_response_format: rule.tools_exclude_response_format.unwrap_or(false),
2013        recommended_endpoint: rule.recommended_endpoint.clone(),
2014        text_tool_wire_format_supported: rule.text_tool_wire_format_supported.unwrap_or(true),
2015        preferred_tool_format: Some(rule_preferred_tool_format(rule)),
2016        tool_mode_parity: Some(rule_tool_mode_parity(rule)),
2017        tool_mode_parity_notes: rule.tool_mode_parity_notes.clone(),
2018        thinking_disable_directive: rule.thinking_disable_directive.clone(),
2019        auto_reasoning_overrides: rule.auto_reasoning_overrides.clone().unwrap_or_default(),
2020        provider_route_denylist: rule.provider_route_denylist.clone().unwrap_or_default(),
2021        openrouter_provider_order: rule.openrouter_provider_order.clone().unwrap_or_default(),
2022        serving_precision: rule
2023            .serving_precision
2024            .clone()
2025            .unwrap_or_else(|| "unverified".to_string()),
2026    }
2027}
2028
2029fn rule_preferred_tool_format(rule: &ProviderRule) -> String {
2030    // This is the `caps.preferred_tool_format` the runtime `lookup` returns for
2031    // a matched capability row. When the row pins a format, honor it (including
2032    // an explicit `text` — the reverse safety valve). Otherwise derive: native
2033    // models get `native`, text-channel models get `json` (fenced-JSON), the
2034    // GLOBAL text-channel default. Heredoc `text` is never auto-derived.
2035    rule.preferred_tool_format.clone().unwrap_or_else(|| {
2036        if rule.native_tools.unwrap_or(false) {
2037            "native".to_string()
2038        } else {
2039            "json".to_string()
2040        }
2041    })
2042}
2043
2044fn rule_tool_mode_parity(rule: &ProviderRule) -> String {
2045    rule.tool_mode_parity.clone().unwrap_or_else(|| {
2046        match (
2047            rule.native_tools.unwrap_or(false),
2048            rule.text_tool_wire_format_supported.unwrap_or(true),
2049        ) {
2050            (true, true) => "unknown".to_string(),
2051            (true, false) => "native_only".to_string(),
2052            (false, true) => "text_only".to_string(),
2053            (false, false) => "unsupported".to_string(),
2054        }
2055    })
2056}
2057
2058fn rule_structured_output(rule: &ProviderRule) -> Option<String> {
2059    rule.structured_output
2060        .clone()
2061        .or_else(|| rule.json_schema.clone())
2062        .filter(|value| value != "none")
2063}
2064
2065fn rule_structured_output_mode(rule: &ProviderRule) -> String {
2066    if let Some(mode) = &rule.structured_output_mode {
2067        return mode.clone();
2068    }
2069    match rule_structured_output(rule).as_deref() {
2070        Some("native") | Some("format_kw") => "native_json".to_string(),
2071        Some("tool_use") => "xml_tagged".to_string(),
2072        _ => "none".to_string(),
2073    }
2074}
2075
2076fn rule_thinking_block_style(rule: &ProviderRule) -> String {
2077    rule.thinking_block_style.clone().unwrap_or_else(|| {
2078        if rule.reasoning_effort_supported.unwrap_or(false)
2079            || rule.requires_completion_tokens.unwrap_or(false)
2080        {
2081            "reasoning_summary".to_string()
2082        } else {
2083            "none".to_string()
2084        }
2085    })
2086}
2087
2088pub(crate) fn rule_matches(rule: &ProviderRule, model: &str) -> bool {
2089    let lower = model.to_lowercase();
2090    if !glob_match(&rule.model_match.to_lowercase(), &lower) {
2091        return false;
2092    }
2093    if let Some(version_min) = &rule.version_min {
2094        if version_min.len() != 2 {
2095            return false;
2096        }
2097        let want = (version_min[0], version_min[1]);
2098        let have = match extract_version(model) {
2099            Some(v) => v,
2100            // `version_min` was set but the model ID can't be parsed.
2101            // Fail closed: skip this rule so more permissive catch-all
2102            // rules below can still match.
2103            None => return false,
2104        };
2105        if have < want {
2106            return false;
2107        }
2108    }
2109    true
2110}
2111
2112/// Extract `(major, minor)` from a model ID by trying the Anthropic
2113/// parser first (for `claude-*` shapes) then the OpenAI parser (`gpt-*`).
2114/// Both parsers return `None` for shapes they don't recognise so this
2115/// never mis-parses across families.
2116fn extract_version(model: &str) -> Option<(u32, u32)> {
2117    claude_generation(model).or_else(|| gpt_generation(model))
2118}
2119
2120// Model-pattern matching for capability rules. Shared workspace semantics live
2121// in `harn-glob`; keep capability and provider matching on that helper instead
2122// of mirroring glob behavior locally.
2123use harn_glob::match_name as glob_match;
2124
2125#[cfg(test)]
2126mod tests {
2127    use super::*;
2128
2129    fn reset() {
2130        clear_user_overrides();
2131    }
2132
2133    fn assert_cerebras_effort_reasoning(model: &str, thinking_block_style: &str) {
2134        let caps = lookup("cerebras", model);
2135        assert_eq!(caps.thinking_modes, vec!["effort"]);
2136        assert!(caps.reasoning_effort_supported);
2137        // tool_format is NOT asserted here: cerebras gpt-oss and zai-glm have
2138        // different defaults (gpt-oss harmonized to `json`, glm stays
2139        // `native`), and this shared helper is about reasoning-effort
2140        // behavior. Tool-format resolution is asserted in the dedicated
2141        // harmonization tests.
2142        assert_eq!(caps.structured_output.as_deref(), Some("native"));
2143        assert_eq!(caps.structured_output_mode, "native_json");
2144        assert_eq!(caps.thinking_block_style, thinking_block_style);
2145    }
2146
2147    fn assert_openrouter_anthropic_runtime_parity(model: &str) {
2148        let direct = lookup("anthropic", model);
2149        let routed = lookup("openrouter", model);
2150
2151        assert_eq!(
2152            routed.native_tools, direct.native_tools,
2153            "{model}: native tool support should match direct Anthropic"
2154        );
2155        assert_eq!(
2156            routed.preferred_tool_format, direct.preferred_tool_format,
2157            "{model}: preferred tool format should match direct Anthropic"
2158        );
2159        assert_eq!(
2160            routed.structured_output, direct.structured_output,
2161            "{model}: structured output transport should match direct Anthropic"
2162        );
2163        assert_eq!(
2164            routed.structured_output_mode, direct.structured_output_mode,
2165            "{model}: structured output mode should match direct Anthropic"
2166        );
2167        assert_eq!(
2168            routed.thinking_modes,
2169            Vec::<String>::new(),
2170            "{model}: OpenRouter Claude routes must not advertise direct Anthropic thinking controls"
2171        );
2172        assert!(
2173            !routed.reasoning_effort_supported,
2174            "{model}: OpenRouter Claude routes must not advertise direct Anthropic effort controls"
2175        );
2176        assert!(
2177            !routed.interleaved_thinking_supported,
2178            "{model}: OpenRouter Claude routes must not advertise interleaved thinking"
2179        );
2180        assert_eq!(
2181            routed.supports_assistant_prefill, direct.supports_assistant_prefill,
2182            "{model}: assistant prefill support should match direct Anthropic"
2183        );
2184        assert_eq!(
2185            routed.prompt_caching, direct.prompt_caching,
2186            "{model}: prompt cache support should match direct Anthropic"
2187        );
2188        assert_eq!(
2189            routed.prefers_xml_scaffolding, direct.prefers_xml_scaffolding,
2190            "{model}: XML scaffolding preference should match direct Anthropic"
2191        );
2192        assert_eq!(
2193            routed.prefers_markdown_scaffolding, direct.prefers_markdown_scaffolding,
2194            "{model}: Markdown scaffolding preference should match direct Anthropic"
2195        );
2196        assert_eq!(
2197            routed.prefers_role_developer, direct.prefers_role_developer,
2198            "{model}: developer role preference should match direct Anthropic"
2199        );
2200        assert_eq!(
2201            routed.prefers_xml_tools, direct.prefers_xml_tools,
2202            "{model}: XML tool preference should match direct Anthropic"
2203        );
2204        assert_eq!(
2205            routed.thinking_block_style, direct.thinking_block_style,
2206            "{model}: thinking block style should match direct Anthropic"
2207        );
2208        assert_eq!(
2209            routed.text_tool_wire_format_supported, direct.text_tool_wire_format_supported,
2210            "{model}: text-tool fallback support should match direct Anthropic"
2211        );
2212    }
2213
2214    #[test]
2215    fn every_catalogued_chat_model_has_explicit_tool_capabilities() {
2216        reset();
2217        let report = audit_builtin_catalogued_chat_model_tool_capabilities();
2218        assert!(report.ok(), "{}", report.render_human());
2219    }
2220
2221    #[test]
2222    fn every_catalogued_alias_has_explicit_tool_capabilities() {
2223        // The model-level audit only covers priced catalog `models`, so a
2224        // `[[provider.local]]` / Ollama alias (e.g. the local gemma-4 route in
2225        // Fix A) could omit native_tools/preferred_tool_format and silently
2226        // degrade to text tools without tripping a test. Walk every alias's
2227        // (provider, id) through the same matcher and require explicit fields.
2228        reset();
2229        let catalog = crate::llm_config::parse_config_toml(BUILTIN_PROVIDERS_TOML)
2230            .expect("providers.toml must parse at build time");
2231        let builtin = builtin();
2232        let mut gaps = Vec::new();
2233        for (alias, def) in &catalog.aliases {
2234            let matched = first_matching_rule(None, builtin, &def.provider, &def.id);
2235            let explicit = matched
2236                .as_ref()
2237                .map(|matched| {
2238                    matched.rule.native_tools.is_some()
2239                        && matched.rule.preferred_tool_format.is_some()
2240                })
2241                .unwrap_or(false);
2242            if !explicit {
2243                gaps.push(format!(
2244                    "{alias} -> {}:{} (rule={})",
2245                    def.provider,
2246                    def.id,
2247                    matched
2248                        .as_ref()
2249                        .map(|matched| matched.rule.model_match.as_str())
2250                        .unwrap_or("<none>")
2251                ));
2252            }
2253        }
2254        assert!(
2255            gaps.is_empty(),
2256            "aliases missing explicit native_tools/preferred_tool_format:\n- {}",
2257            gaps.join("\n- ")
2258        );
2259    }
2260
2261    #[test]
2262    fn every_catalogued_alias_tool_format_pin_is_safe_for_route() {
2263        // Alias pins are consumed directly by downstream catalogs and CLI
2264        // routing. They must not encode a known-broken channel that the
2265        // central runtime guard would have to correct later.
2266        reset();
2267        let catalog = crate::llm_config::parse_config_toml(BUILTIN_PROVIDERS_TOML)
2268            .expect("providers.toml must parse at build time");
2269        let mut unsafe_pins = Vec::new();
2270        for (alias, def) in &catalog.aliases {
2271            let Some(tool_format) = def.tool_format.as_deref() else {
2272                continue;
2273            };
2274            let decision = validate_tool_format(&def.provider, &def.id, tool_format);
2275            if let Some(correction) = decision.correction.as_deref() {
2276                unsafe_pins.push(format!(
2277                    "{alias} -> {}:{} pins {tool_format}, would be corrected to {} ({correction})",
2278                    def.provider, def.id, decision.effective
2279                ));
2280            }
2281        }
2282        assert!(
2283            unsafe_pins.is_empty(),
2284            "aliases pin unsafe tool_format values:\n- {}",
2285            unsafe_pins.join("\n- ")
2286        );
2287    }
2288
2289    #[test]
2290    fn tool_capability_audit_reports_suggested_defaults() {
2291        reset();
2292        let capabilities: CapabilitiesFile = toml::from_str(
2293            r#"
2294[[provider.acme]]
2295model_match = "acme-good-*"
2296preferred_tool_format = "native"
2297"#,
2298        )
2299        .unwrap();
2300        let report = audit_tool_capability_coverage(
2301            vec![(
2302                "acme-good-1".to_string(),
2303                crate::llm_config::ModelDef {
2304                    name: "Acme Good".to_string(),
2305                    provider: "acme".to_string(),
2306                    context_window: 128_000,
2307                    logical_model: None,
2308                    equivalence_group: None,
2309                    served_variant: None,
2310                    wire_model: None,
2311                    api_dialect: None,
2312                    rate_limits: None,
2313                    performance: None,
2314                    architecture: None,
2315                    local_memory: None,
2316                    runtime_context_window: None,
2317                    stream_timeout: None,
2318                    capabilities: Vec::new(),
2319                    pricing: Some(crate::llm_config::ModelPricing {
2320                        input_per_mtok: 1.0,
2321                        output_per_mtok: 2.0,
2322                        cache_read_per_mtok: None,
2323                        cache_write_per_mtok: None,
2324                    }),
2325                    deprecated: false,
2326                    deprecation_note: None,
2327                    superseded_by: None,
2328                    fast_mode: None,
2329                    quality_tags: Vec::new(),
2330                    availability: crate::llm_config::ModelAvailability::Serverless,
2331                    tier: None,
2332                    open_weight: None,
2333                    strengths: Vec::new(),
2334                    benchmarks: std::collections::BTreeMap::new(),
2335                    family: None,
2336                    lineage: None,
2337                    complementary_with: Vec::new(),
2338                    avoid_as_reviewer_for: Vec::new(),
2339                },
2340            )],
2341            &capabilities,
2342            None,
2343        );
2344
2345        assert!(!report.ok());
2346        assert_eq!(report.audited_models, 1);
2347        assert_eq!(report.gaps.len(), 1);
2348        assert_eq!(report.gaps[0].missing_fields, ["native_tools"]);
2349        assert!(report.gaps[0].suggested_native_tools);
2350        assert_eq!(report.gaps[0].suggested_preferred_tool_format, "native");
2351        assert!(report.render_human().contains(
2352            "acme:acme-good-1 (provider.acme model_match=\"acme-good-*\") missing native_tools; suggest native_tools = true, preferred_tool_format = \"native\""
2353        ));
2354    }
2355
2356    #[test]
2357    fn openrouter_qwen36_keeps_native_and_denies_ambient_upstream() {
2358        reset();
2359        for model in [
2360            "qwen/qwen3.6-flash",
2361            "qwen/qwen3.6-plus",
2362            "qwen/qwen3.6-35b-a3b",
2363        ] {
2364            let caps = lookup("openrouter", model);
2365            // The route-around must NOT downgrade the tool format: native stays on.
2366            assert!(caps.native_tools, "{model}: native tools");
2367            assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
2368            // The broken Ambient upstream is denied via the data-driven denylist.
2369            assert_eq!(
2370                caps.provider_route_denylist,
2371                vec!["Ambient".to_string()],
2372                "{model}: denylist",
2373            );
2374        }
2375    }
2376
2377    #[test]
2378    fn provider_route_denylist_defaults_empty_for_unmarked_rows() {
2379        reset();
2380        let caps = lookup("anthropic", "claude-opus-4-7");
2381        assert!(caps.provider_route_denylist.is_empty());
2382    }
2383
2384    #[test]
2385    fn strict_openai_compat_rows_require_tool_result_adjacency() {
2386        reset();
2387        assert!(lookup("moonshot", "moonshot/kimi-k2.6").requires_tool_result_adjacency);
2388        assert!(lookup("moonshot", "moonshot/kimi-k2.7-code").requires_tool_result_adjacency);
2389        assert!(lookup("minimax", "MiniMax-M2").requires_tool_result_adjacency);
2390        assert!(lookup("minimax", "MiniMax-M2.7").requires_tool_result_adjacency);
2391        assert!(!lookup("openai", "gpt-4o").requires_tool_result_adjacency);
2392    }
2393
2394    #[test]
2395    fn fireworks_gpt_oss_disables_parallel_tool_call_history() {
2396        reset();
2397        assert!(
2398            !lookup("fireworks", "accounts/fireworks/models/gpt-oss-120b")
2399                .supports_parallel_tool_calls
2400        );
2401        assert!(lookup("openai", "gpt-4o").supports_parallel_tool_calls);
2402    }
2403
2404    #[test]
2405    fn cerebras_tools_exclude_response_format() {
2406        reset();
2407        assert!(lookup("cerebras", "gpt-oss-120b").tools_exclude_response_format);
2408        assert!(lookup("cerebras", "zai-glm-4.7").tools_exclude_response_format);
2409        assert!(!lookup("openai", "gpt-4o").tools_exclude_response_format);
2410    }
2411
2412    #[test]
2413    fn serving_precision_seeds_known_gpt_oss_verdicts() {
2414        reset();
2415        // Full-precision routes verified during the 2026-06 meter effort.
2416        assert_eq!(
2417            lookup("fireworks", "accounts/fireworks/models/gpt-oss-120b").serving_precision,
2418            "trusted"
2419        );
2420        assert_eq!(
2421            lookup("openrouter", "openai/gpt-oss-120b").serving_precision,
2422            "trusted"
2423        );
2424        // SambaNova serves gpt-oss quantized (proven 0/5 vs reference 3/3).
2425        assert_eq!(
2426            lookup("sambanova", "gpt-oss-120b").serving_precision,
2427            "degraded"
2428        );
2429        // Cerebras is full precision but rate-throttled to unusable timing.
2430        assert_eq!(
2431            lookup("cerebras", "gpt-oss-120b").serving_precision,
2432            "throttled"
2433        );
2434    }
2435
2436    #[test]
2437    fn serving_precision_defaults_unverified_for_unmarked_rows() {
2438        reset();
2439        // A route with no serving_precision verdict resolves to "unverified",
2440        // never an empty string, so callers can branch on a stable enum.
2441        assert_eq!(
2442            lookup("anthropic", "claude-opus-4-7").serving_precision,
2443            "unverified"
2444        );
2445    }
2446
2447    #[test]
2448    fn anthropic_opus_47_gets_full_capabilities() {
2449        reset();
2450        let caps = lookup("anthropic", "claude-opus-4-7");
2451        assert!(caps.native_tools);
2452        assert!(caps.defer_loading);
2453        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
2454        assert!(caps.prompt_caching);
2455        assert_eq!(caps.thinking_modes, vec!["adaptive", "effort"]);
2456        assert!(caps.reasoning_effort_supported);
2457        assert_eq!(
2458            caps.reasoning_effort_levels,
2459            vec!["low", "medium", "high", "xhigh", "max"]
2460        );
2461        assert!(caps.interleaved_thinking_supported);
2462        assert!(caps.vision_supported);
2463        assert!(caps.audio);
2464        assert!(caps.pdf);
2465        assert!(caps.files_api_supported);
2466        assert_eq!(caps.max_tools, Some(10000));
2467        assert!(caps.prefers_xml_scaffolding);
2468        assert!(!caps.prefers_markdown_scaffolding);
2469        assert_eq!(caps.structured_output_mode, "xml_tagged");
2470        assert!(!caps.supports_assistant_prefill);
2471        assert!(!caps.prefers_role_developer);
2472        assert!(caps.prefers_xml_tools);
2473        assert_eq!(caps.thinking_block_style, "thinking_blocks");
2474    }
2475
2476    #[test]
2477    fn anthropic_sonnet_5_gets_adaptive_effort_capabilities() {
2478        reset();
2479        let caps = lookup("anthropic", "claude-sonnet-5");
2480        assert!(caps.native_tools);
2481        assert!(caps.defer_loading);
2482        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
2483        assert!(caps.prompt_caching);
2484        assert_eq!(caps.thinking_modes, vec!["adaptive", "effort"]);
2485        assert!(caps.reasoning_effort_supported);
2486        assert_eq!(
2487            caps.reasoning_effort_levels,
2488            vec!["low", "medium", "high", "xhigh", "max"]
2489        );
2490        assert!(caps.reasoning_disable_supported);
2491        assert!(!caps.reasoning_none_supported);
2492        assert!(caps.interleaved_thinking_supported);
2493        assert!(!caps.supports_assistant_prefill);
2494        assert_eq!(caps.thinking_block_style, "thinking_blocks");
2495    }
2496
2497    #[test]
2498    fn anthropic_fable_effort_cannot_be_disabled() {
2499        reset();
2500        for model in ["claude-fable-5", "anthropic/claude-fable-5"] {
2501            let caps = lookup("anthropic", model);
2502            assert_eq!(caps.thinking_modes, vec!["adaptive", "effort"]);
2503            assert!(caps.reasoning_effort_supported);
2504            assert_eq!(
2505                caps.reasoning_effort_levels,
2506                vec!["low", "medium", "high", "xhigh", "max"]
2507            );
2508            assert!(!caps.reasoning_disable_supported);
2509            assert!(!caps.supports_assistant_prefill);
2510        }
2511    }
2512
2513    #[test]
2514    fn anthropic_opus_46_uses_budgeted_thinking() {
2515        reset();
2516        let caps = lookup("anthropic", "claude-opus-4-6");
2517        assert_eq!(caps.thinking_modes, vec!["enabled"]);
2518        assert!(caps.interleaved_thinking_supported);
2519        assert!(!caps.supports_assistant_prefill);
2520    }
2521
2522    #[test]
2523    fn anthropic_opus_45_does_not_support_interleaved_thinking() {
2524        reset();
2525        let caps = lookup("anthropic", "claude-opus-4-5");
2526        assert_eq!(caps.thinking_modes, vec!["enabled"]);
2527        assert!(!caps.interleaved_thinking_supported);
2528        assert!(caps.supports_assistant_prefill);
2529    }
2530
2531    #[test]
2532    fn openrouter_claude_rows_track_direct_anthropic_runtime_quirks() {
2533        reset();
2534        for model in [
2535            "anthropic/claude-fable-5-0",
2536            "anthropic/claude-mythos-5-0",
2537            "anthropic/claude-haiku-4-5",
2538            "anthropic/claude-haiku-4-7",
2539            "anthropic/claude-sonnet-4-6",
2540            "anthropic/claude-sonnet-4-7",
2541            "anthropic/claude-sonnet-5",
2542            "anthropic/claude-opus-4-6",
2543            "anthropic/claude-opus-4-7",
2544        ] {
2545            assert_openrouter_anthropic_runtime_parity(model);
2546        }
2547    }
2548
2549    #[test]
2550    fn override_can_supply_anthropic_beta_features() {
2551        reset();
2552        let toml_src = r#"
2553[[provider.anthropic]]
2554model_match = "claude-custom-*"
2555native_tools = true
2556anthropic_beta_features = ["fine-grained-tool-streaming-2025-05-14"]
2557"#;
2558        set_user_overrides_toml(toml_src).unwrap();
2559        let caps = lookup("anthropic", "claude-custom-1");
2560        assert_eq!(
2561            caps.anthropic_beta_features,
2562            vec!["fine-grained-tool-streaming-2025-05-14"]
2563        );
2564        reset();
2565    }
2566
2567    #[test]
2568    fn anthropic_haiku_44_has_no_tool_search() {
2569        reset();
2570        let caps = lookup("anthropic", "claude-haiku-4-4");
2571        // Haiku 4.4 falls through to the `claude-*` catch-all row.
2572        assert!(caps.native_tools);
2573        assert!(caps.prompt_caching);
2574        assert!(!caps.defer_loading);
2575        assert!(caps.tool_search.is_empty());
2576    }
2577
2578    #[test]
2579    fn anthropic_haiku_45_supports_tool_search() {
2580        reset();
2581        let caps = lookup("anthropic", "claude-haiku-4-5");
2582        assert!(caps.defer_loading);
2583        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
2584    }
2585
2586    #[test]
2587    fn old_claude_gets_catchall() {
2588        reset();
2589        let caps = lookup("anthropic", "claude-opus-3-5");
2590        assert!(caps.native_tools);
2591        assert!(caps.prompt_caching);
2592        assert!(!caps.defer_loading);
2593        assert!(caps.tool_search.is_empty());
2594    }
2595
2596    #[test]
2597    fn openai_gpt_54_supports_tool_search() {
2598        reset();
2599        let caps = lookup("openai", "gpt-5.4");
2600        assert!(caps.defer_loading);
2601        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
2602        assert_eq!(caps.json_schema.as_deref(), Some("native"));
2603        assert_eq!(caps.thinking_modes, vec!["effort"]);
2604        assert!(caps.reasoning_effort_supported);
2605        assert!(caps.reasoning_none_supported);
2606        assert!(!caps.prefers_xml_scaffolding);
2607        assert!(caps.prefers_markdown_scaffolding);
2608        assert_eq!(caps.structured_output_mode, "native_json");
2609        assert!(!caps.supports_assistant_prefill);
2610        assert!(!caps.prefers_role_developer);
2611        assert!(!caps.prefers_xml_tools);
2612        assert_eq!(caps.thinking_block_style, "reasoning_summary");
2613    }
2614
2615    #[test]
2616    fn openai_gpt_53_has_reasoning_none_without_tool_search() {
2617        reset();
2618        let caps = lookup("openai", "gpt-5.3");
2619        assert!(caps.native_tools);
2620        assert!(!caps.defer_loading);
2621        assert!(caps.vision_supported);
2622        assert!(caps.tool_search.is_empty());
2623        assert_eq!(caps.thinking_modes, vec!["effort"]);
2624        assert!(caps.reasoning_effort_supported);
2625        assert!(caps.reasoning_none_supported);
2626    }
2627
2628    #[test]
2629    fn openai_original_gpt_5_has_reasoning_floor_without_none() {
2630        reset();
2631        let caps = lookup("openai", "gpt-5");
2632        assert!(caps.native_tools);
2633        assert!(!caps.defer_loading);
2634        assert_eq!(caps.thinking_modes, vec!["effort"]);
2635        assert!(caps.reasoning_effort_supported);
2636        assert!(!caps.reasoning_none_supported);
2637    }
2638
2639    #[test]
2640    fn gemini_thinking_budget_quirks_are_declared_in_matrix() {
2641        reset();
2642        // Flash: 24576 ceiling, can disable thinking.
2643        let flash = lookup("gemini", "gemini-2.5-flash");
2644        assert_eq!(flash.max_thinking_budget, Some(24_576));
2645        assert!(flash.reasoning_disable_supported);
2646        assert!(flash.thinking_modes.iter().any(|m| m == "effort"));
2647        // Pro: 32768 ceiling, cannot disable thinking.
2648        let pro = lookup("gemini", "gemini-2.5-pro");
2649        assert_eq!(pro.max_thinking_budget, Some(32_768));
2650        assert!(!pro.reasoning_disable_supported);
2651        assert!(pro.thinking_modes.iter().any(|m| m == "effort"));
2652        // The `models/` REST resource name resolves the same.
2653        let flash_resource = lookup("gemini", "models/gemini-2.5-flash");
2654        assert_eq!(flash_resource.max_thinking_budget, Some(24_576));
2655        assert!(flash_resource.reasoning_disable_supported);
2656        // Non-2.5 gemini has no effort thinking support -> provider sends no
2657        // thinkingConfig (unchanged behavior).
2658        let legacy = lookup("gemini", "gemini-1.5-pro");
2659        assert!(!legacy.thinking_modes.iter().any(|m| m == "effort"));
2660    }
2661
2662    #[test]
2663    fn openai_gpt_4o_matrix_fields_include_multimodal_support() {
2664        reset();
2665        let caps = lookup("openai", "gpt-4o");
2666        assert!(caps.native_tools);
2667        assert!(caps.vision);
2668        assert!(caps.audio);
2669        assert!(!caps.pdf);
2670        assert_eq!(caps.json_schema.as_deref(), Some("native"));
2671    }
2672
2673    #[test]
2674    fn openai_reasoning_models_support_effort() {
2675        reset();
2676        let caps = lookup("openai", "o3");
2677        assert_eq!(caps.thinking_modes, vec!["effort"]);
2678        assert!(caps.requires_completion_tokens);
2679        assert!(caps.reasoning_effort_supported);
2680        assert!(caps.prefers_role_developer);
2681        assert_eq!(caps.thinking_block_style, "reasoning_summary");
2682        let prefixed = lookup("openrouter", "openai/o4-mini");
2683        assert!(prefixed.requires_completion_tokens);
2684        assert!(prefixed.reasoning_effort_supported);
2685    }
2686
2687    #[test]
2688    fn vision_capability_gates_known_multimodal_models() {
2689        reset();
2690        let minimax_m3 = lookup("minimax", "MiniMax-M3");
2691        assert!(minimax_m3.vision_supported);
2692        assert!(minimax_m3.video);
2693        assert_eq!(minimax_m3.thinking_modes, vec!["adaptive"]);
2694        assert_eq!(minimax_m3.reasoning_wire_format.as_deref(), Some("minimax"));
2695        assert!(minimax_m3.requires_completion_tokens);
2696        let openrouter_m3 = lookup("openrouter", "minimax/minimax-m3");
2697        assert!(openrouter_m3.vision_supported);
2698        assert!(openrouter_m3.video);
2699        assert!(lookup("openai", "gpt-4o").vision_supported);
2700        assert!(lookup("openai", "gpt-5.4-preview").vision_supported);
2701        assert!(lookup("anthropic", "claude-sonnet-4-6").vision_supported);
2702        assert!(lookup("anthropic", "claude-sonnet-4-6").pdf);
2703        assert!(lookup("anthropic", "claude-sonnet-4-6").files_api_supported);
2704        assert!(lookup("openrouter", "google/gemini-2.5-flash").vision_supported);
2705        assert!(lookup("gemini", "gemini-2.5-flash").vision_supported);
2706        assert!(lookup("gemini", "gemini-2.5-flash").audio);
2707        assert!(lookup("gemini", "gemini-2.5-flash").pdf);
2708        assert_eq!(
2709            lookup("gemini", "gemini-2.5-flash").structured_output_mode,
2710            "native_json"
2711        );
2712        assert!(lookup("ollama", "llava:latest").vision_supported);
2713        assert!(lookup("ollama", "gemma4:26b").vision_supported);
2714        assert!(lookup("ollama", "gemma4-128k:latest").vision_supported);
2715        assert!(!lookup("openai", "gpt-3.5-turbo").vision_supported);
2716        assert!(!lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4").vision_supported);
2717    }
2718
2719    #[test]
2720    fn openrouter_gemini_explicit_cache_uses_block_breakpoints() {
2721        reset();
2722        let caps = lookup("openrouter", "google/gemini-2.5-flash");
2723        assert!(caps.prompt_caching);
2724        assert_eq!(caps.cache_breakpoint_style, "last_block");
2725    }
2726
2727    #[test]
2728    fn local_gemma4_exposes_native_tools_and_structured_output() {
2729        // Fix A: vLLM/SGLang serve Gemma 4 over the OpenAI-compatible surface,
2730        // so the local route must declare native tools + native structured
2731        // output like its hosted gemma-4 siblings — not silently fall back to
2732        // text tools.
2733        reset();
2734        let caps = lookup("local", "gemma-4-26b-a4b-it");
2735        assert!(caps.native_tools);
2736        assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
2737        assert_eq!(caps.structured_output.as_deref(), Some("native"));
2738    }
2739
2740    #[test]
2741    fn local_gemma4_exposes_vision_like_hosted_siblings() {
2742        // harn#3585: Gemma 4 is multimodal on every served surface. The local
2743        // OpenAI-compat route must declare vision so the derived structured
2744        // caps and emitted `capability_tags` agree with the gemini/openrouter/
2745        // together siblings.
2746        reset();
2747        for model in ["gemma-4-e4b-it", "gemma-4-e2b-it", "gemma-4-26b-a4b-it"] {
2748            let caps = lookup("local", model);
2749            assert!(
2750                caps.vision_supported,
2751                "local {model} should expose vision_supported"
2752            );
2753            let tags = crate::llm_config::capability_tags_from_capabilities(&caps);
2754            assert!(
2755                tags.iter().any(|t| t == "vision"),
2756                "local {model} emitted capability_tags should include `vision`, got {tags:?}"
2757            );
2758        }
2759    }
2760
2761    #[test]
2762    fn ollama_vision_models_have_no_reasoning_scaffold() {
2763        // Fix B: bakllava / llama3.2-vision / gemma3 are caption/vision models
2764        // with no reasoning capability; they must resolve to the "none" thinking
2765        // block style (like the llava sibling) so the template does not emit a
2766        // spurious "## Reasoning" scaffold.
2767        reset();
2768        for model in ["bakllava:latest", "llama3.2-vision:11b", "gemma3:27b"] {
2769            assert_eq!(
2770                lookup("ollama", model).thinking_block_style,
2771                "none",
2772                "{model} should resolve to thinking_block_style=\"none\""
2773            );
2774        }
2775        // Sibling sanity check.
2776        assert_eq!(
2777            lookup("ollama", "llava:latest").thinking_block_style,
2778            "none"
2779        );
2780    }
2781
2782    #[test]
2783    fn ollama_gemma4_supports_structured_output_and_text_tools() {
2784        // Fix C: Ollama honors the `format` kwarg, so both gemma4 rules must
2785        // declare structured_output="format_kw" (otherwise JSON/schema output
2786        // was blocked) plus explicit text tools for parity with the qwen rules.
2787        reset();
2788        for model in ["gemma4:12b-mlx", "gemma4:26b"] {
2789            let caps = lookup("ollama", model);
2790            assert_eq!(
2791                caps.structured_output.as_deref(),
2792                Some("format_kw"),
2793                "{model} should resolve structured_output=\"format_kw\""
2794            );
2795            assert!(!caps.native_tools, "{model} should use text tools");
2796            assert_eq!(
2797                caps.preferred_tool_format.as_deref(),
2798                Some("text"),
2799                "{model} should prefer text tool format"
2800            );
2801            assert_eq!(
2802                caps.thinking_block_style, "none",
2803                "{model} ships thinking-off"
2804            );
2805        }
2806    }
2807
2808    #[test]
2809    fn openrouter_inherits_openai() {
2810        reset();
2811        let caps = lookup("openrouter", "gpt-5.4");
2812        assert!(caps.defer_loading);
2813        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
2814        assert_eq!(caps.reasoning_wire_format.as_deref(), Some("openrouter"));
2815        assert!(!caps.top_k_supported);
2816    }
2817
2818    #[test]
2819    fn openrouter_kimi27_code_records_tool_choice_and_sampling_limits() {
2820        reset();
2821        let caps = lookup("openrouter", "moonshotai/kimi-k2.7-code");
2822        assert!(caps.native_tools);
2823        assert!(caps.prompt_caching);
2824        assert!(caps.vision_supported);
2825        assert!(caps.video);
2826        // 2026-06-24 forced-format sweep flipped this route native -> text:
2827        // native double-escaped backslash bodies (1/5) and fenced-JSON produced
2828        // no parseable Harn call (0/5); heredoc text was 5/5 byte-clean.
2829        assert_eq!(caps.preferred_tool_format.as_deref(), Some("text"));
2830        assert_eq!(caps.tool_mode_parity.as_deref(), Some("native_unreliable"));
2831        assert_eq!(caps.thinking_modes, vec!["enabled"]);
2832        assert_eq!(caps.allowed_tool_choice_modes, vec!["auto", "none"]);
2833        assert!(!caps.temperature_supported);
2834        assert!(!caps.top_p_supported);
2835        assert!(!caps.frequency_penalty_supported);
2836        assert!(!caps.presence_penalty_supported);
2837
2838        let prior = lookup("openrouter", "moonshotai/kimi-k2.6");
2839        assert!(prior.prompt_caching);
2840        assert!(prior.vision_supported);
2841        assert!(!prior.video);
2842        assert!(prior.allowed_tool_choice_modes.is_empty());
2843        assert!(prior.temperature_supported);
2844    }
2845
2846    #[test]
2847    fn qwen37_routes_record_prompt_cache_vision_and_streaming_quirks() {
2848        reset();
2849        let plus = lookup("openrouter", "qwen/qwen3.7-plus");
2850        assert!(plus.native_tools);
2851        assert!(plus.prompt_caching);
2852        assert!(plus.vision_supported);
2853        assert_eq!(plus.preferred_tool_format.as_deref(), Some("native"));
2854        assert_eq!(plus.thinking_modes, vec!["enabled"]);
2855        assert_eq!(
2856            plus.auto_reasoning_overrides
2857                .get("agent")
2858                .map(String::as_str),
2859            Some("off"),
2860            "Qwen tool-bearing agent turns should disable reasoning automatically",
2861        );
2862
2863        let max = lookup("openrouter", "qwen/qwen3.7-max");
2864        assert!(max.native_tools);
2865        assert!(max.prompt_caching);
2866        assert!(!max.vision_supported);
2867        assert_eq!(max.thinking_modes, vec!["enabled"]);
2868
2869        let together = lookup("together", "Qwen/Qwen3.7-Max");
2870        assert!(together.native_tools);
2871        assert!(together.prompt_caching);
2872        assert!(together.requires_streaming);
2873        assert!(!together.honors_chat_template_kwargs);
2874
2875        let glm = lookup("together", "zai-org/GLM-5.1");
2876        assert!(glm.native_tools);
2877        assert!(glm.prompt_caching);
2878        assert_eq!(glm.preferred_tool_format.as_deref(), Some("text"));
2879        assert_eq!(glm.tool_mode_parity.as_deref(), Some("native_unreliable"));
2880        assert_eq!(
2881            glm.auto_reasoning_overrides
2882                .get("agent")
2883                .map(String::as_str),
2884            Some("off"),
2885        );
2886
2887        let openrouter_glm = lookup("openrouter", "z-ai/glm-5.2");
2888        assert!(openrouter_glm.reasoning_effort_supported);
2889        assert_eq!(
2890            openrouter_glm.reasoning_effort_levels,
2891            vec!["high", "xhigh", "max"]
2892        );
2893        assert_eq!(
2894            openrouter_glm.preferred_tool_format.as_deref(),
2895            Some("text")
2896        );
2897
2898        let minimax = lookup("together", "MiniMaxAI/MiniMax-M2.7");
2899        assert!(minimax.native_tools);
2900        assert!(minimax.prompt_caching);
2901        // 2026-06-24 forced-format sweep flipped this route json -> text: heredoc
2902        // beat fenced-JSON on both dispatch and backslash-body fidelity at N=5.
2903        assert_eq!(minimax.preferred_tool_format.as_deref(), Some("text"));
2904        assert_eq!(
2905            minimax.tool_mode_parity.as_deref(),
2906            Some("native_unreliable")
2907        );
2908        assert!(!minimax.reasoning_text_promotable);
2909
2910        let step = lookup("openrouter", "stepfun/step-3.7-flash");
2911        assert!(step.native_tools);
2912        assert!(step.prompt_caching);
2913        assert!(!step.reasoning_disable_supported);
2914        assert_eq!(step.thinking_modes, vec!["enabled"]);
2915    }
2916
2917    #[test]
2918    fn openrouter_structured_routes_cover_current_open_models() {
2919        reset();
2920        for model in [
2921            "deepseek/deepseek-v4-flash",
2922            "mistralai/devstral-small",
2923            "meta-llama/llama-4-scout",
2924            "kwaipilot/kat-coder-pro-v2",
2925        ] {
2926            let caps = lookup("openrouter", model);
2927            assert!(caps.native_tools, "{model} should expose native tools");
2928            assert_eq!(caps.structured_output.as_deref(), Some("native"));
2929            assert_eq!(caps.structured_output_mode, "native_json");
2930        }
2931        assert!(lookup("openrouter", "deepseek/deepseek-v4-flash").top_k_supported);
2932        assert!(lookup("openrouter", "meta-llama/llama-4-scout").top_k_supported);
2933        assert!(!lookup("openrouter", "mistralai/devstral-small").top_k_supported);
2934        assert!(lookup("openrouter", "google/gemma-4-26b-a4b-it").top_k_supported);
2935    }
2936
2937    #[test]
2938    fn openrouter_anthropic_claude_models_support_native_tools() {
2939        // Regression for #2319: OpenRouter Anthropic slugs must match the
2940        // Anthropic capability rules before the OpenRouter -> OpenAI family
2941        // chain, otherwise native-tool requests get rejected as unsupported.
2942        reset();
2943        for model in [
2944            "anthropic/claude-haiku-4-5",
2945            "anthropic/claude-haiku-4-5-20251001",
2946            "anthropic/claude-sonnet-4-6",
2947            "anthropic/claude-sonnet-4-7",
2948            "anthropic/claude-opus-4-7",
2949        ] {
2950            let caps = lookup("openrouter", model);
2951            assert!(
2952                caps.native_tools,
2953                "{model} via openrouter should report native_tools=true",
2954            );
2955            assert!(
2956                caps.prompt_caching,
2957                "{model} via openrouter should report prompt_caching=true",
2958            );
2959            assert_eq!(
2960                caps.cache_breakpoint_style, "top_level",
2961                "{model} via openrouter should use top-level cache_control",
2962            );
2963            assert_eq!(
2964                caps.structured_output.as_deref(),
2965                Some("tool_use"),
2966                "{model} via openrouter should structured_output=tool_use (matches direct anthropic)",
2967            );
2968        }
2969    }
2970
2971    #[test]
2972    fn openrouter_deepseek_v32_defaults_to_text_tools() {
2973        reset();
2974        let caps = lookup("openrouter", "deepseek/deepseek-v3.2");
2975        assert!(caps.native_tools);
2976        assert!(caps.text_tool_wire_format_supported);
2977        assert_eq!(caps.preferred_tool_format.as_deref(), Some("text"));
2978        assert_eq!(caps.tool_mode_parity.as_deref(), Some("native_unreliable"));
2979        assert_eq!(caps.structured_output.as_deref(), Some("native"));
2980        assert!(caps.prompt_caching);
2981        assert_eq!(caps.cache_breakpoint_style, "last_block");
2982
2983        let automated = lookup("openrouter", "deepseek/deepseek-v3");
2984        assert!(automated.prompt_caching);
2985        assert_eq!(automated.cache_breakpoint_style, "none");
2986    }
2987
2988    #[test]
2989    fn openrouter_explicit_cache_routes_get_block_breakpoints() {
2990        reset();
2991        for model in [
2992            "qwen/qwen3.6-plus",
2993            "qwen/qwen3-coder-plus",
2994            "qwen/qwen3-coder-flash",
2995            "qwen/qwen3-max",
2996            "qwen/qwen-plus",
2997        ] {
2998            let caps = lookup("openrouter", model);
2999            assert!(caps.prompt_caching, "{model} should support prompt cache");
3000            assert_eq!(
3001                caps.cache_breakpoint_style, "last_block",
3002                "{model} should request explicit content-block cache breakpoints",
3003            );
3004        }
3005
3006        let open_weight = lookup("openrouter", "qwen/qwen3.6-35b-a3b");
3007        assert!(!open_weight.prompt_caching);
3008        assert_eq!(open_weight.cache_breakpoint_style, "none");
3009    }
3010
3011    #[test]
3012    fn openrouter_deepseek_alias_slugs_support_native_tools() {
3013        reset();
3014        for model in ["deepseek/deepseek-chat", "deepseek/deepseek-chat-v3-0324"] {
3015            let caps = lookup("openrouter", model);
3016            assert!(caps.native_tools, "{model} should expose native tools");
3017            assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
3018            assert_eq!(caps.structured_output.as_deref(), Some("native"));
3019            assert!(
3020                caps.thinking_modes.is_empty(),
3021                "{model} is not a reasoning route"
3022            );
3023            assert_eq!(caps.thinking_block_style, "none");
3024            assert!(
3025                caps.top_k_supported,
3026                "{model} should accept top_k through OpenRouter"
3027            );
3028        }
3029
3030        for model in [
3031            "deepseek/deepseek-chat-v3.1",
3032            "deepseek/deepseek-r1",
3033            "deepseek/deepseek-r1-0528",
3034        ] {
3035            let caps = lookup("openrouter", model);
3036            assert!(caps.native_tools, "{model} should expose native tools");
3037            assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
3038            assert_eq!(caps.structured_output.as_deref(), Some("native"));
3039            assert_eq!(caps.thinking_modes, vec!["enabled", "effort"]);
3040            assert_eq!(caps.thinking_block_style, "reasoning_summary");
3041            assert!(
3042                caps.top_k_supported,
3043                "{model} should accept top_k through OpenRouter"
3044            );
3045        }
3046
3047        assert!(!lookup("openrouter", "deepseek/deepseek-r1-distill-qwen-32b").native_tools);
3048    }
3049
3050    #[test]
3051    fn openrouter_qwen_coder_defaults_to_text_tools() {
3052        reset();
3053        let caps = lookup("openrouter", "qwen/qwen3-coder-flash");
3054        assert!(caps.native_tools);
3055        assert!(caps.text_tool_wire_format_supported);
3056        assert_eq!(caps.preferred_tool_format.as_deref(), Some("text"));
3057        assert_eq!(caps.tool_mode_parity.as_deref(), Some("native_unreliable"));
3058    }
3059
3060    #[test]
3061    fn bedrock_claude_uses_anthropic_wire_capabilities() {
3062        reset();
3063        let caps = lookup("bedrock", "anthropic.claude-3-5-sonnet-20240620-v1:0");
3064        assert!(caps.native_tools);
3065        assert_eq!(caps.message_wire_format, "anthropic");
3066        assert_eq!(caps.native_tool_wire_format, "anthropic");
3067    }
3068
3069    #[test]
3070    fn groq_inherits_openai_family_only() {
3071        reset();
3072        let caps = lookup("groq", "gpt-5.5-preview");
3073        assert!(caps.defer_loading);
3074    }
3075
3076    #[test]
3077    fn cerebras_inherits_openai_family() {
3078        reset();
3079        let caps = lookup("cerebras", "gpt-oss-120b");
3080        assert_eq!(caps.message_wire_format, "openai");
3081        assert_eq!(caps.native_tool_wire_format, "openai");
3082        // gpt-oss uses NATIVE tool calls across cerebras/groq/together. Under
3083        // json/text it emits a bare {"tool","arguments"} dialect the
3084        // fenced-JSON parser rejects (zero parsed calls), so native is the only
3085        // working channel.
3086        assert!(caps.native_tools);
3087        assert_eq!(caps.preferred_tool_format.as_deref(), Some("native"));
3088    }
3089
3090    #[test]
3091    fn cerebras_gpt_oss_declares_supported_reasoning_efforts() {
3092        // Cerebras GPT-OSS accepts low/medium/high only. The policy resolver
3093        // uses this list to floor `reasoning_policy: "off"` to `low` instead
3094        // of sending unsupported `none` or `minimal` values.
3095        reset();
3096        let caps = lookup("cerebras", "gpt-oss-120b");
3097        assert_cerebras_effort_reasoning("gpt-oss-120b", "reasoning_summary");
3098        assert!(!caps.reasoning_none_supported);
3099        assert_eq!(caps.reasoning_effort_levels, vec!["low", "medium", "high"]);
3100    }
3101
3102    #[test]
3103    fn gpt_oss_requires_reasoning_for_tools_with_provider_specific_tool_wire() {
3104        // gpt-oss (Harmony) calls tools INSIDE the chain-of-thought channel, so
3105        // reasoning-off breaks tool calling. Provider catch-all rules carry no
3106        // reasoning fields, so without a dedicated `*gpt-oss*` row gpt-oss
3107        // would fall through to reasoning-OFF and the eval loop would bill a
3108        // noncommittal. Tool wire support is provider-specific: the pay-per-token
3109        // routes (OpenRouter, Fireworks, DeepInfra, SambaNova) ride Harn's TEXT
3110        // channel — their provider-native Harmony path drops tool calls into the
3111        // reasoning/commentary channel (empty `tool_calls` / billed-noncommittal,
3112        // see the DeepInfra/SambaNova rows + vLLM #22578/#44216, SGLang
3113        // #8976/#10738, openai/harmony #68). Within the text channel they use the
3114        // escape-free heredoc (`text`) grammar rather than fenced-JSON, because
3115        // gpt-oss double-escapes the backslashes a JSON string arg requires and
3116        // corrupts `\\`-heavy code bodies (empirical A/B 2026-06-21: text beats
3117        // json on both dispatch and byte-fidelity). Only the native-clean direct
3118        // routes (Cerebras, Groq) still use provider-native tools.
3119        reset();
3120        for (provider, model, native_tools, preferred_tool_format) in [
3121            ("openrouter", "openai/gpt-oss-120b", false, "text"),
3122            (
3123                "fireworks",
3124                "accounts/fireworks/models/gpt-oss-120b",
3125                false,
3126                "text",
3127            ),
3128            ("deepinfra", "openai/gpt-oss-120b", false, "text"),
3129            ("sambanova", "sambanova/gpt-oss-120b", false, "text"),
3130            ("cerebras", "gpt-oss-120b", true, "native"),
3131            ("groq", "openai/gpt-oss-120b", true, "native"),
3132        ] {
3133            let caps = lookup(provider, model);
3134            assert!(
3135                caps.reasoning_required_for_tools,
3136                "{provider}/{model}: reasoning_required_for_tools must be true"
3137            );
3138            assert!(
3139                caps.reasoning_effort_supported,
3140                "{provider}/{model}: reasoning_effort_supported must be true"
3141            );
3142            assert_eq!(
3143                caps.reasoning_effort_levels,
3144                vec!["low", "medium", "high"],
3145                "{provider}/{model}: effort levels"
3146            );
3147            assert_eq!(caps.thinking_modes, vec!["effort"], "{provider}/{model}");
3148            assert_eq!(
3149                caps.native_tools, native_tools,
3150                "{provider}/{model}: native_tools"
3151            );
3152            assert_eq!(
3153                caps.preferred_tool_format.as_deref(),
3154                Some(preferred_tool_format),
3155                "{provider}/{model}: preferred tool format"
3156            );
3157            assert_eq!(
3158                caps.thinking_block_style, "reasoning_summary",
3159                "{provider}/{model}"
3160            );
3161        }
3162    }
3163
3164    #[test]
3165    fn cerebras_glm_47_supports_reasoning_none() {
3166        // Cerebras documents GLM 4.7's no-reasoning value as
3167        // reasoning_effort="none"; the older disable_reasoning knob is
3168        // deprecated. Keep the route on the same policy path as GPT-OSS.
3169        reset();
3170        let caps = lookup("cerebras", "zai-glm-4.7");
3171        assert_cerebras_effort_reasoning("zai-glm-4.7", "inline");
3172        assert!(caps.reasoning_none_supported);
3173    }
3174
3175    #[test]
3176    fn mock_with_claude_model_routes_to_anthropic() {
3177        reset();
3178        let caps = lookup("mock", "claude-sonnet-4-7");
3179        assert!(caps.defer_loading);
3180        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
3181    }
3182
3183    #[test]
3184    fn mock_with_gpt_model_routes_to_openai() {
3185        reset();
3186        let caps = lookup("mock", "gpt-5.4-preview");
3187        assert!(caps.defer_loading);
3188        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
3189    }
3190
3191    #[test]
3192    fn mock_with_gemini_model_routes_to_gemini() {
3193        reset();
3194        let caps = lookup("mock", "gemini-2.5-flash");
3195        assert_eq!(caps.message_wire_format, "gemini");
3196        assert_eq!(caps.native_tool_wire_format, "openai");
3197        assert!(caps.prefers_xml_scaffolding);
3198    }
3199
3200    #[test]
3201    fn qwen36_ollama_preserves_thinking() {
3202        reset();
3203        let caps = lookup("ollama", "qwen3.6:35b-a3b-coding-nvfp4");
3204        assert!(!caps.native_tools);
3205        assert_eq!(caps.json_schema.as_deref(), Some("format_kw"));
3206        assert!(!caps.thinking_modes.is_empty());
3207        assert!(
3208            caps.preserve_thinking,
3209            "Qwen3.6 should enable preserve_thinking by default for long-horizon loops"
3210        );
3211        assert_eq!(caps.server_parser, "none");
3212        assert!(!caps.honors_chat_template_kwargs);
3213        assert_eq!(caps.recommended_endpoint.as_deref(), Some("/api/chat"));
3214        assert!(caps.text_tool_wire_format_supported);
3215        assert!(caps.prefers_markdown_scaffolding);
3216        assert_eq!(caps.structured_output_mode, "delimited");
3217        assert!(!caps.prefers_xml_tools);
3218        assert_eq!(caps.thinking_block_style, "inline");
3219    }
3220
3221    #[test]
3222    fn qwen35_ollama_does_not_preserve_thinking() {
3223        reset();
3224        let caps = lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4");
3225        assert!(caps.native_tools);
3226        assert!(!caps.thinking_modes.is_empty());
3227        assert!(
3228            !caps.preserve_thinking,
3229            "Qwen3.5 lacks the preserve_thinking kwarg — rely on the chat template's rolling checkpoint instead"
3230        );
3231        assert_eq!(caps.server_parser, "ollama_qwen3coder");
3232        assert!(!caps.text_tool_wire_format_supported);
3233    }
3234
3235    #[test]
3236    fn qwen36_routed_providers_all_preserve_thinking() {
3237        reset();
3238        for (provider, model) in [
3239            ("openrouter", "qwen/qwen3.6-plus"),
3240            ("together", "Qwen/Qwen3.6-Plus"),
3241            ("huggingface", "Qwen/Qwen3.6-35B-A3B"),
3242            ("fireworks", "accounts/fireworks/models/qwen3p6-plus"),
3243            ("dashscope", "qwen3.6-plus"),
3244            ("local", "Qwen3.6-35B-A3B"),
3245            ("mlx", "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit"),
3246            ("mlx", "Qwen/Qwen3.6-35B-A3B"),
3247        ] {
3248            let caps = lookup(provider, model);
3249            assert!(
3250                !caps.thinking_modes.is_empty(),
3251                "{provider}/{model}: thinking"
3252            );
3253            assert!(
3254                caps.preserve_thinking,
3255                "{provider}/{model}: preserve_thinking must be on for Qwen3.6"
3256            );
3257            assert!(caps.native_tools, "{provider}/{model}: native_tools");
3258            assert_ne!(
3259                caps.server_parser, "ollama_qwen3coder",
3260                "{provider}/{model}: only Ollama routes through the qwen3coder response parser"
3261            );
3262        }
3263
3264        let caps = lookup("llamacpp", "unsloth/Qwen3.6-35B-A3B-GGUF");
3265        assert!(!caps.thinking_modes.is_empty());
3266        assert!(caps.preserve_thinking);
3267        assert!(!caps.native_tools);
3268        assert!(caps.text_tool_wire_format_supported);
3269        assert_eq!(caps.server_parser, "none");
3270    }
3271
3272    #[test]
3273    fn qwen_coder_models_do_not_claim_thinking_modes() {
3274        reset();
3275        for (provider, model) in [
3276            ("together", "Qwen/Qwen3-Coder-Next-FP8"),
3277            ("together", "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"),
3278            ("openrouter", "qwen/qwen3-coder-next"),
3279            ("huggingface", "Qwen/Qwen3-Coder-Next"),
3280        ] {
3281            let caps = lookup(provider, model);
3282            assert!(caps.native_tools, "{provider}/{model}: native_tools");
3283            assert!(
3284                caps.thinking_modes.is_empty(),
3285                "{provider}/{model}: coder models are non-thinking routes"
3286            );
3287            assert!(
3288                !caps.preserve_thinking,
3289                "{provider}/{model}: preserve_thinking must stay off"
3290            );
3291            assert!(
3292                caps.thinking_disable_directive.is_none(),
3293                "{provider}/{model}: no /no_think shim should be needed"
3294            );
3295        }
3296    }
3297
3298    #[test]
3299    fn llamacpp_qwen_keeps_text_tool_wire_format() {
3300        reset();
3301        let caps = lookup("llamacpp", "unsloth/Qwen3.5-Coder-GGUF");
3302        assert_eq!(caps.server_parser, "none");
3303        assert!(caps.honors_chat_template_kwargs);
3304        assert!(!caps.native_tools);
3305        assert!(caps.text_tool_wire_format_supported);
3306        assert_eq!(
3307            caps.recommended_endpoint.as_deref(),
3308            Some("/v1/chat/completions")
3309        );
3310    }
3311
3312    #[test]
3313    fn devstral_local_routes_default_to_json_tools() {
3314        reset();
3315        for provider in ["ollama", "llamacpp"] {
3316            let caps = lookup(provider, "devstral-small-2:24b");
3317            assert!(!caps.native_tools, "{provider}: native tools stay opt-in");
3318            assert!(
3319                caps.text_tool_wire_format_supported,
3320                "{provider}: text tools should remain available"
3321            );
3322            // devstral has no reserved-token constraint, so it uses the global
3323            // `json` (fenced-JSON) text-channel default. Heredoc stays
3324            // reachable via an explicit `preferred_tool_format = "text"` pin.
3325            assert_eq!(
3326                caps.preferred_tool_format.as_deref(),
3327                Some("json"),
3328                "{provider}: devstral inherits the global json default"
3329            );
3330        }
3331    }
3332
3333    #[test]
3334    fn openrouter_mistral_routes_use_native_tools() {
3335        reset();
3336        let caps = lookup("openrouter", "mistralai/mistral-small-2603");
3337        assert!(caps.native_tools);
3338        assert!(caps.text_tool_wire_format_supported);
3339        assert_eq!(caps.structured_output.as_deref(), Some("native"));
3340        assert_eq!(caps.structured_output_mode, "native_json");
3341    }
3342
3343    #[test]
3344    fn dashscope_and_llamacpp_resolve_capabilities() {
3345        reset();
3346        // New sibling providers should fall through to `openai` for
3347        // gpt-*  models even without dedicated rules.
3348        let caps = lookup("dashscope", "gpt-5.4-preview");
3349        assert!(caps.defer_loading);
3350        let caps = lookup("llamacpp", "gpt-5.4-preview");
3351        assert!(caps.defer_loading);
3352    }
3353
3354    #[test]
3355    fn unknown_provider_has_no_capabilities() {
3356        reset();
3357        let caps = lookup("my-custom-proxy", "foo-bar-1");
3358        assert!(!caps.native_tools);
3359        assert!(!caps.defer_loading);
3360        assert!(caps.tool_search.is_empty());
3361    }
3362
3363    #[test]
3364    fn openrouter_specific_rules_win_and_family_inheritance_is_preserved() {
3365        // Capability resolution is first-match-wins over fragment order
3366        // (`first_matching_rule_in_file` -> `Iterator::find`), and when no
3367        // `provider.openrouter` rule matches it walks the `[provider_family]`
3368        // chain (openrouter -> openai). Both contracts must hold so that:
3369        //   1. a specific OpenRouter carve-out beats a broader OpenRouter rule,
3370        //   2. gpt-/o-family slugs routed through OpenRouter still inherit the
3371        //      rich openai-family capability set (a blanket `*` openrouter row
3372        //      would shadow this — see the catalog-or-defaults report).
3373        reset();
3374
3375        // 1. Specific carve-out wins: deepseek/deepseek-v3.2 is pinned to the
3376        // Harn text-tool channel even though the broader deepseek/deepseek-v3*
3377        // rule below it would otherwise resolve `native`.
3378        let deepseek = lookup("openrouter", "deepseek/deepseek-v3.2");
3379        assert_eq!(
3380            deepseek.preferred_tool_format.as_deref(),
3381            Some("text"),
3382            "deepseek-v3.2 text carve-out must win over the broader deepseek-v3* rule"
3383        );
3384        assert_eq!(
3385            deepseek.tool_mode_parity.as_deref(),
3386            Some("native_unreliable")
3387        );
3388        // The broader sibling still resolves native for non-3.2 v3 slugs.
3389        assert_eq!(
3390            lookup("openrouter", "deepseek/deepseek-v3-base")
3391                .preferred_tool_format
3392                .as_deref(),
3393            Some("native")
3394        );
3395
3396        // 2. Family inheritance preserved: an openai-prefixed slug routed via
3397        // OpenRouter still picks up openai-family reasoning fields.
3398        let prefixed = lookup("openrouter", "openai/o4-mini");
3399        assert!(prefixed.requires_completion_tokens);
3400        assert!(prefixed.reasoning_effort_supported);
3401
3402        // The newly added MiniMax M2.5 OR mirror resolves native via the
3403        // existing `minimax/minimax-m2*` rule.
3404        let m25 = lookup("openrouter", "minimax/minimax-m2.5");
3405        assert!(m25.native_tools);
3406        assert_eq!(m25.preferred_tool_format.as_deref(), Some("native"));
3407    }
3408
3409    #[test]
3410    fn enterprise_routes_expose_format_preferences() {
3411        reset();
3412        let bedrock_claude = lookup("bedrock", "anthropic.claude-opus-4-7-v1:0");
3413        assert!(bedrock_claude.prefers_xml_scaffolding);
3414        assert_eq!(bedrock_claude.structured_output_mode, "xml_tagged");
3415        assert!(!bedrock_claude.supports_assistant_prefill);
3416        assert!(bedrock_claude.prefers_xml_tools);
3417
3418        let azure_o = lookup("azure_openai", "o3-prod");
3419        assert!(azure_o.prefers_markdown_scaffolding);
3420        assert_eq!(azure_o.structured_output_mode, "native_json");
3421        assert!(azure_o.prefers_role_developer);
3422        assert_eq!(azure_o.thinking_block_style, "reasoning_summary");
3423    }
3424
3425    #[test]
3426    fn user_override_adds_new_provider() {
3427        reset();
3428        let toml_src = concat!(
3429            "[[provider.my-proxy]]\n",
3430            "model_match = \"*\"\n",
3431            "native_tools = true\n",
3432            "tool_search = [\"hosted\"]\n",
3433            "prefers_xml_scaffolding = true\n",
3434            "structured_output_mode = \"xml_tagged\"\n",
3435            "supports_assistant_prefill = true\n",
3436            "prefers_xml_tools = true\n",
3437            "thinking_block_style = \"thinking_blocks\"\n",
3438        );
3439        set_user_overrides_toml(toml_src).unwrap();
3440        let caps = lookup("my-proxy", "anything");
3441        assert!(caps.native_tools);
3442        assert_eq!(caps.tool_search, vec!["hosted"]);
3443        assert!(caps.prefers_xml_scaffolding);
3444        assert_eq!(caps.structured_output_mode, "xml_tagged");
3445        assert!(caps.supports_assistant_prefill);
3446        assert!(caps.prefers_xml_tools);
3447        assert_eq!(caps.thinking_block_style, "thinking_blocks");
3448        clear_user_overrides();
3449    }
3450
3451    #[test]
3452    fn user_override_takes_precedence_over_builtin() {
3453        reset();
3454        let toml_src = r#"
3455[[provider.anthropic]]
3456model_match = "claude-opus-*"
3457native_tools = true
3458defer_loading = false
3459tool_search = []
3460"#;
3461        set_user_overrides_toml(toml_src).unwrap();
3462        let caps = lookup("anthropic", "claude-opus-4-7");
3463        assert!(caps.native_tools);
3464        assert!(!caps.defer_loading);
3465        assert!(caps.tool_search.is_empty());
3466        clear_user_overrides();
3467    }
3468
3469    #[test]
3470    fn user_override_from_manifest_toml() {
3471        reset();
3472        let manifest = concat!(
3473            "[package]\n",
3474            "name = \"demo\"\n\n",
3475            "[[capabilities.provider.my-proxy]]\n",
3476            "model_match = \"*\"\n",
3477            "native_tools = true\n",
3478            "tool_search = [\"hosted\"]\n",
3479            "prefers_markdown_scaffolding = true\n",
3480            "structured_output_mode = \"native_json\"\n",
3481            "prefers_role_developer = true\n",
3482            "thinking_block_style = \"reasoning_summary\"\n",
3483        );
3484        set_user_overrides_from_manifest_toml(manifest).unwrap();
3485        let caps = lookup("my-proxy", "foo");
3486        assert!(caps.native_tools);
3487        assert_eq!(caps.tool_search, vec!["hosted"]);
3488        assert!(caps.prefers_markdown_scaffolding);
3489        assert_eq!(caps.structured_output_mode, "native_json");
3490        assert!(caps.prefers_role_developer);
3491        assert_eq!(caps.thinking_block_style, "reasoning_summary");
3492        clear_user_overrides();
3493    }
3494
3495    #[test]
3496    fn version_min_requires_parseable_model() {
3497        reset();
3498        let toml_src = r#"
3499[[provider.custom]]
3500model_match = "*"
3501version_min = [5, 4]
3502native_tools = true
3503"#;
3504        set_user_overrides_toml(toml_src).unwrap();
3505        // Unparseable model ID + version_min → rule doesn't match.
3506        let caps = lookup("custom", "mystery-model");
3507        assert!(!caps.native_tools);
3508        clear_user_overrides();
3509    }
3510
3511    #[test]
3512    fn glob_match_substring() {
3513        assert!(glob_match("*gpt*", "openai/gpt-5.4"));
3514        assert!(glob_match("*claude*", "anthropic/claude-opus-4-7"));
3515        assert!(!glob_match("*xyz*", "openai/gpt-5.4"));
3516    }
3517
3518    #[test]
3519    fn openrouter_namespaced_anthropic_model() {
3520        reset();
3521        let caps = lookup("anthropic", "anthropic/claude-opus-4-7");
3522        assert!(caps.defer_loading);
3523    }
3524
3525    #[test]
3526    fn matrix_rows_include_provider_patterns_and_sources() {
3527        reset();
3528        let rows = matrix_rows();
3529        assert!(rows.iter().any(|row| {
3530            row.provider == "openai"
3531                && row.model == "gpt-4o*"
3532                && row.vision
3533                && row.audio
3534                && row.json_schema.as_deref() == Some("native")
3535                && row.source == "builtin"
3536        }));
3537    }
3538
3539    #[test]
3540    fn validate_tool_format_autocorrects_native_pin_on_native_unreliable_route() {
3541        reset();
3542        // DeepSeek V3.2 on OpenRouter: tool_mode_parity = native_unreliable,
3543        // preferred_tool_format = text. A `native` request is the footgun — it
3544        // drops to unparsed DSML text and gets rejected. The gate must steer it
3545        // to the route's preferred text-channel format and explain why.
3546        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3.2", "native");
3547        assert_eq!(
3548            decision.effective, "text",
3549            "native must be auto-corrected to the route's preferred text format"
3550        );
3551        let reason = decision.correction.expect("a correction must be reported");
3552        assert!(reason.contains("native"), "names the rejected format");
3553        assert!(reason.contains("native_unreliable"), "names the parity");
3554        assert!(reason.contains("text"), "names the working alternative");
3555    }
3556
3557    #[test]
3558    fn validate_tool_format_passes_through_safe_combos() {
3559        reset();
3560        // A native-capable route with no adverse parity keeps the requested
3561        // native format untouched (no spurious correction).
3562        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3-base", "native");
3563        assert_eq!(decision.effective, "native");
3564        assert!(decision.correction.is_none());
3565
3566        // The same native_unreliable route is fine when text is requested.
3567        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3.2", "text");
3568        assert_eq!(decision.effective, "text");
3569        assert!(decision.correction.is_none());
3570
3571        // json is also a text-channel grammar and is accepted on a text route.
3572        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3.2", "json");
3573        assert_eq!(decision.effective, "json");
3574        assert!(decision.correction.is_none());
3575    }
3576
3577    #[test]
3578    fn validate_tool_format_leaves_unknown_routes_and_formats_alone() {
3579        reset();
3580        // Unknown provider/model has parity = unknown -> no opinion, pass through.
3581        let decision = validate_tool_format("my-proxy", "mystery-1", "native");
3582        assert_eq!(decision.effective, "native");
3583        assert!(decision.correction.is_none());
3584
3585        // An unclassifiable tool_format string is not ours to rewrite.
3586        let decision = validate_tool_format("openrouter", "deepseek/deepseek-v3.2", "frobnicate");
3587        assert_eq!(decision.effective, "frobnicate");
3588        assert!(decision.correction.is_none());
3589    }
3590
3591    #[test]
3592    fn validate_tool_format_steers_off_text_on_native_only_route() {
3593        reset();
3594        // Synthesize a native_only route via a project override and confirm a
3595        // text request is steered to native (the symmetric direction).
3596        let overrides: CapabilitiesFile = toml::from_str(
3597            "[[provider.acme]]\n\
3598             model_match = \"native-only-*\"\n\
3599             native_tools = true\n\
3600             text_tool_wire_format_supported = false\n\
3601             tool_mode_parity = \"native_only\"\n\
3602             preferred_tool_format = \"native\"\n",
3603        )
3604        .expect("override parses");
3605        let caps = lookup_with_user_overrides("acme", "native-only-1", Some(&overrides));
3606        let decision = validate_tool_format_with_caps("acme", "native-only-1", "text", &caps);
3607        assert_eq!(decision.effective, "native");
3608        let reason = decision
3609            .correction
3610            .expect("text on native_only is corrected");
3611        assert!(reason.contains("native_only"));
3612    }
3613
3614    #[test]
3615    fn validate_tool_format_honors_structural_text_unsupported_bit() {
3616        reset();
3617        // Real shipping route: ollama/qwen3* declares native_tools = true and
3618        // text_tool_wire_format_supported = false with NO tool_mode_parity
3619        // string. The gate's contract ("always yields parseable tool calls")
3620        // must hold from the structural bit alone — a text/json request is
3621        // steered to native, not passed through onto an unsupported channel.
3622        let caps = lookup("ollama", "qwen3-coder:30b");
3623        assert!(!caps.text_tool_wire_format_supported);
3624        for requested in ["text", "json"] {
3625            let decision =
3626                validate_tool_format_with_caps("ollama", "qwen3-coder:30b", requested, &caps);
3627            assert_eq!(
3628                decision.effective, "native",
3629                "{requested} must be steered to native on a text-unsupported route"
3630            );
3631            assert!(decision.correction.is_some());
3632        }
3633        // native is the route's working channel — untouched.
3634        let native = validate_tool_format_with_caps("ollama", "qwen3-coder:30b", "native", &caps);
3635        assert_eq!(native.effective, "native");
3636        assert!(native.correction.is_none());
3637    }
3638
3639    #[test]
3640    fn tool_format_resolution_is_serving_stack_aware_for_same_weights() {
3641        // The (model x serving-stack) insight: the SAME Qwen3.6 weights resolve
3642        // to DIFFERENT working tool-call channels depending on who serves them.
3643        // This divergence lives in the capability matrix as data (provider rows),
3644        // NOT in alias pins — so an alias refactor must not be able to regress
3645        // it. Locking the three live serving stacks here makes that explicit.
3646        reset();
3647
3648        // llama.cpp (:8001) — native is probe-validated and trusted.
3649        let llamacpp = validate_tool_format("llamacpp", "qwen3.6-35b-a3b-ud-q4-k-xl", "native");
3650        assert_eq!(
3651            llamacpp.effective, "native",
3652            "llama.cpp serves qwen3.6 native"
3653        );
3654        assert!(llamacpp.correction.is_none());
3655
3656        // Ollama (/v1) — the embedded qwen tool-call parser 500s on text-mode
3657        // output, so this route is served on the text/json channel: a native
3658        // request must be auto-corrected to json (never silently dropped).
3659        let ollama = validate_tool_format("ollama", "qwen3.6-35b-a3b", "native");
3660        assert_eq!(
3661            ollama.effective, "json",
3662            "ollama qwen3.6 must steer native -> json (server-side parser 500 leak)"
3663        );
3664        assert!(
3665            ollama.correction.is_some(),
3666            "the native->json steer must be explained, not silent"
3667        );
3668
3669        // A native_unreliable cloud route (deepinfra GLM-5) carries the same
3670        // serving-stack verdict via tool_mode_parity + empirical notes, and is
3671        // likewise steered off native.
3672        let glm = validate_tool_format("deepinfra", "deepinfra/glm-5.2", "native");
3673        assert_eq!(glm.effective, "json");
3674        assert!(glm.correction.is_some());
3675    }
3676
3677    #[test]
3678    fn validate_tool_format_passes_through_when_no_channel_works() {
3679        reset();
3680        // A route with no working tool surface — text_only parity forbids the
3681        // native channel, and text_tool_wire_format_supported = false forbids
3682        // the text channel — so BOTH channels are forbidden. The gate has
3683        // nothing better to steer to; it must NOT rewrite to an equally broken
3684        // format under a misleading correction. Pass through unchanged.
3685        let overrides: CapabilitiesFile = toml::from_str(
3686            "[[provider.acme]]\n\
3687             model_match = \"no-tools-*\"\n\
3688             native_tools = false\n\
3689             tool_mode_parity = \"text_only\"\n\
3690             text_tool_wire_format_supported = false\n",
3691        )
3692        .expect("override parses");
3693        let caps = lookup_with_user_overrides("acme", "no-tools-1", Some(&overrides));
3694        for requested in ["native", "text", "json"] {
3695            let decision = validate_tool_format_with_caps("acme", "no-tools-1", requested, &caps);
3696            assert_eq!(
3697                decision.effective, requested,
3698                "{requested} passes through unchanged"
3699            );
3700            assert!(decision.correction.is_none());
3701        }
3702    }
3703
3704    /// FOOTGUN-REMOVAL — gpt-oss (Harmony) on the pay-per-token DeepInfra and
3705    /// SambaNova routes drops tool calls into the reasoning channel on native, so
3706    /// a `native` pin must auto-correct to the route's `text` channel with an
3707    /// explanatory correction. The known-good native routes (cerebras gpt-oss,
3708    /// sambanova minimax) must stay untouched.
3709    #[test]
3710    fn validate_tool_format_autocorrects_gpt_oss_native_pin_to_text() {
3711        reset();
3712        for (provider, model) in [
3713            ("deepinfra", "deepinfra/openai/gpt-oss-120b"),
3714            ("sambanova", "sambanova/gpt-oss-120b"),
3715        ] {
3716            let decision = validate_tool_format(provider, model, "native");
3717            assert_eq!(
3718                decision.effective, "text",
3719                "{provider}/{model}: native must auto-correct to text"
3720            );
3721            let reason = decision
3722                .correction
3723                .unwrap_or_else(|| panic!("{provider}/{model}: a correction must be reported"));
3724            assert!(
3725                reason.contains("native_unreliable"),
3726                "{provider}/{model}: names the parity"
3727            );
3728            assert!(
3729                reason.contains("text"),
3730                "{provider}/{model}: names the working alternative"
3731            );
3732            // text is already safe and passes through unchanged.
3733            let text = validate_tool_format(provider, model, "text");
3734            assert_eq!(text.effective, "text");
3735            assert!(text.correction.is_none());
3736        }
3737    }
3738
3739    /// FOOTGUN-REMOVAL — the GLM-5.x native channel emits `<tool_call>` markup
3740    /// instead of provider-native `tool_calls`, so the zai-direct GLM rows pin
3741    /// text and a `native` pin must auto-correct, matching the Fireworks/
3742    /// DeepInfra/Baseten precedents.
3743    #[test]
3744    fn validate_tool_format_autocorrects_zai_glm_native_pin_to_text() {
3745        reset();
3746        for model in ["glm-5.2", "glm-5.1", "glm-5"] {
3747            let decision = validate_tool_format("zai", model, "native");
3748            assert_eq!(
3749                decision.effective, "text",
3750                "zai/{model}: native must auto-correct to text"
3751            );
3752            let reason = decision
3753                .correction
3754                .unwrap_or_else(|| panic!("zai/{model}: a correction must be reported"));
3755            assert!(
3756                reason.contains("native_unreliable"),
3757                "zai/{model}: names the parity"
3758            );
3759        }
3760    }
3761
3762    /// The known-good native routes must NOT be touched by the gpt-oss/GLM
3763    /// pins above — a native pin stays native with no spurious correction.
3764    #[test]
3765    fn validate_tool_format_leaves_known_good_native_routes_unchanged() {
3766        reset();
3767        for (provider, model) in [
3768            // cerebras gpt-oss is native-clean (only throttled).
3769            ("cerebras", "gpt-oss-120b"),
3770            // sambanova deepseek-v3.2 is native and interchangeable; minimax is
3771            // native_unreliable upstream and is not a known-good native
3772            // exemplar.
3773            ("sambanova", "DeepSeek-V3.2"),
3774        ] {
3775            let decision = validate_tool_format(provider, model, "native");
3776            assert_eq!(
3777                decision.effective, "native",
3778                "{provider}/{model}: known-good native route must stay native"
3779            );
3780            assert!(
3781                decision.correction.is_none(),
3782                "{provider}/{model}: no spurious correction"
3783            );
3784        }
3785    }
3786
3787    /// FOOTGUN-REMOVAL — the first-class no-viable-channel guard fires when BOTH
3788    /// channels are forbidden (a route the registry trusts on neither native nor
3789    /// text), naming the bad combo and a suggested alternative — never a silent
3790    /// empty tool stream.
3791    #[test]
3792    fn no_viable_tool_channel_guard_fires_only_when_both_channels_forbidden() {
3793        reset();
3794        // Construct a gpt-oss route with NO working channel: native_unreliable
3795        // forbids native, and text_tool_wire_format_supported = false forbids the
3796        // text channel too.
3797        let overrides: CapabilitiesFile = toml::from_str(
3798            "[[provider.acme]]\n\
3799             model_match = \"acme/gpt-oss-stub\"\n\
3800             native_tools = false\n\
3801             tool_mode_parity = \"native_unreliable\"\n\
3802             text_tool_wire_format_supported = false\n",
3803        )
3804        .expect("override parses");
3805        let caps = lookup_with_user_overrides("acme", "acme/gpt-oss-stub", Some(&overrides));
3806        let message = no_viable_tool_channel_with_caps("acme", "acme/gpt-oss-stub", &caps)
3807            .expect("the guard must fire when neither channel works");
3808        assert!(
3809            message.contains("no viable tool-calling channel"),
3810            "names the failure: {message}"
3811        );
3812        assert!(
3813            message.contains("acme/gpt-oss-stub"),
3814            "names the bad combo: {message}"
3815        );
3816        // gpt-oss models get the Harmony-specific text-channel hint.
3817        assert!(
3818            message.contains("gpt-oss") && message.contains("text"),
3819            "suggests an alternative: {message}"
3820        );
3821
3822        // The DeepInfra/SambaNova gpt-oss rows keep a working text channel, so
3823        // the guard must NOT fire on them (they auto-correct instead).
3824        assert!(
3825            no_viable_tool_channel("deepinfra", "deepinfra/openai/gpt-oss-120b").is_none(),
3826            "auto-correctable route must not trip the fail-fast guard"
3827        );
3828        assert!(
3829            no_viable_tool_channel("sambanova", "sambanova/gpt-oss-120b").is_none(),
3830            "auto-correctable route must not trip the fail-fast guard"
3831        );
3832        // A healthy native-clean route never trips it.
3833        assert!(
3834            no_viable_tool_channel("cerebras", "gpt-oss-120b").is_none(),
3835            "healthy native route must not trip the guard"
3836        );
3837        // The generic (non-gpt-oss) no-channel case still fires with a generic
3838        // hint.
3839        let generic: CapabilitiesFile = toml::from_str(
3840            "[[provider.acme]]\n\
3841             model_match = \"mystery-1\"\n\
3842             native_tools = false\n\
3843             tool_mode_parity = \"text_only\"\n\
3844             text_tool_wire_format_supported = false\n",
3845        )
3846        .expect("override parses");
3847        let caps = lookup_with_user_overrides("acme", "mystery-1", Some(&generic));
3848        let message = no_viable_tool_channel_with_caps("acme", "mystery-1", &caps)
3849            .expect("guard fires on the generic no-channel route too");
3850        assert!(
3851            message.contains("harn provider catalog matrix"),
3852            "{message}"
3853        );
3854    }
3855
3856    // --- `extends = true` field-wise fall-through ---
3857
3858    /// Resolve capabilities for a synthetic provider whose rules come entirely
3859    /// from `src`: the parsed file is passed as the builtin base with no user
3860    /// layer, so no shipped rule interferes with the `extends` assertions.
3861    fn extends_caps(src: &str) -> Capabilities {
3862        let file = parse_capabilities_toml(src).expect("test capabilities toml parses");
3863        lookup_with("testprov", "test-model", &file, None)
3864    }
3865
3866    #[test]
3867    fn extends_rule_fills_unset_fields_from_later_matching_rule() {
3868        // Rule 1 opts into `extends` and sets only native_tools; rule 2 (lower
3869        // precedence, same match) supplies the fields the chain left unset.
3870        let caps = extends_caps(
3871            r#"
3872[[provider.testprov]]
3873model_match = "test-*"
3874extends = true
3875native_tools = true
3876
3877[[provider.testprov]]
3878model_match = "test-*"
3879vision = true
3880message_wire_format = "anthropic"
3881"#,
3882        );
3883        assert!(caps.native_tools, "field from the extends rule applies");
3884        assert!(
3885            caps.vision,
3886            "unset field filled from the later matching rule"
3887        );
3888        assert_eq!(caps.message_wire_format, "anthropic");
3889    }
3890
3891    #[test]
3892    fn non_extends_rule_terminates_resolution_unchanged() {
3893        // Without `extends`, the first match wins outright and the later
3894        // rule's vision never applies — the pre-`extends` first-match-wins
3895        // behavior is preserved.
3896        let caps = extends_caps(
3897            r#"
3898[[provider.testprov]]
3899model_match = "test-*"
3900native_tools = true
3901
3902[[provider.testprov]]
3903model_match = "test-*"
3904vision = true
3905"#,
3906        );
3907        assert!(caps.native_tools);
3908        assert!(
3909            !caps.vision,
3910            "a non-extends first match must not absorb later rules"
3911        );
3912    }
3913
3914    #[test]
3915    fn extends_rule_does_not_override_explicitly_set_field() {
3916        // The higher-precedence extends rule's explicit native_tools = true
3917        // wins; the later rule only fills fields the chain left unset, so its
3918        // native_tools = false is ignored while its vision still applies.
3919        let caps = extends_caps(
3920            r#"
3921[[provider.testprov]]
3922model_match = "test-*"
3923extends = true
3924native_tools = true
3925
3926[[provider.testprov]]
3927model_match = "test-*"
3928native_tools = false
3929vision = true
3930"#,
3931        );
3932        assert!(
3933            caps.native_tools,
3934            "the extends rule's explicit value is not overridden by a lower rule"
3935        );
3936        assert!(caps.vision, "still fills the field the chain left unset");
3937    }
3938
3939    #[test]
3940    fn extends_chain_falls_through_to_provider_defaults() {
3941        // An unterminated extends chain (no later matching rule) fills its
3942        // remaining gaps from provider defaults.
3943        let caps = extends_caps(
3944            r#"
3945[provider_defaults.testprov]
3946seed_supported = true
3947
3948[[provider.testprov]]
3949model_match = "test-*"
3950extends = true
3951native_tools = true
3952"#,
3953        );
3954        assert!(caps.native_tools, "field from the extends rule applies");
3955        assert!(
3956            caps.seed_supported,
3957            "unset field filled from provider defaults"
3958        );
3959    }
3960}
harn_vm/llm/capabilities.rs

harn_vm/llm/
capabilities.rs