harn_vm/llm/
capabilities.rs

1//! Data-driven provider capabilities.
2//!
3//! The per-(provider, model) capability matrix (native tools, deferred
4//! tool loading, tool-search variants, prompt caching, extended thinking,
5//! max tool count) lives in the shipped `capabilities.toml` and is
6//! overridable per-project via `[[capabilities.provider.<name>]]` blocks
7//! in `harn.toml`. This module owns:
8//!
9//! - loading the built-in TOML (compiled in via `include_str!`);
10//! - merging user overrides on top;
11//! - matching a `(provider, model)` pair against the rule list with
12//!   glob + semver semantics;
13//! - exposing a stable `Capabilities` struct that the `LlmProvider`
14//!   trait delegates to as the single source of truth.
15//!
16//! Before this module the Anthropic / OpenAI gates were spread across
17//! `providers/anthropic.rs` (`claude_generation`, `claude_model_supports_tool_search`)
18//! and `providers/openai_compat.rs` (`gpt_generation`, `gpt_model_supports_tool_search`).
19//! Those parsers are still used here — they supply the version extractor —
20//! but the boolean gates that used to live alongside them are now data.
21
22use std::cell::RefCell;
23use std::collections::BTreeMap;
24use std::sync::OnceLock;
25
26use serde::{Deserialize, Serialize};
27
28use super::providers::anthropic::claude_generation;
29use super::providers::openai_compat::gpt_generation;
30
31/// Shipped default rules. Compiled into the binary at build time.
32const BUILTIN_TOML: &str = include_str!("capabilities.toml");
33
34/// Parsed on-disk capabilities schema. Public so harn-cli can
35/// construct one directly when wiring harn.toml overrides.
36#[derive(Debug, Clone, Deserialize, Default)]
37pub struct CapabilitiesFile {
38    /// Per-provider ordered rule lists. First matching rule wins.
39    #[serde(default)]
40    pub provider: BTreeMap<String, Vec<ProviderRule>>,
41    /// Sibling → canonical family mapping. Providers with no rule of
42    /// their own fall through to the named family (recursively).
43    #[serde(default)]
44    pub provider_family: BTreeMap<String, String>,
45}
46
47/// One row of the capability matrix.
48#[derive(Debug, Clone, Deserialize)]
49pub struct ProviderRule {
50    /// Glob pattern (supports leading / trailing `*` and a single mid-`*`).
51    /// Matched case-insensitively against the model ID.
52    pub model_match: String,
53    /// Optional `[major, minor]` lower bound. When set, the model ID
54    /// must parse via the provider's version extractor AND compare ≥
55    /// this tuple. Rules with an unparseable `version_min` for the
56    /// given model are skipped, not merged.
57    #[serde(default)]
58    pub version_min: Option<Vec<u32>>,
59    #[serde(default)]
60    pub native_tools: Option<bool>,
61    #[serde(default)]
62    pub defer_loading: Option<bool>,
63    #[serde(default)]
64    pub tool_search: Option<Vec<String>>,
65    #[serde(default)]
66    pub max_tools: Option<u32>,
67    #[serde(default)]
68    pub prompt_caching: Option<bool>,
69    /// Whether this provider/model route accepts image or other visual
70    /// input blocks through Harn's LLM message path.
71    #[serde(default)]
72    pub vision: Option<bool>,
73    /// Whether this provider/model route accepts audio input blocks
74    /// through Harn's LLM message path.
75    #[serde(default, alias = "audio_supported")]
76    pub audio: Option<bool>,
77    /// Whether this provider/model route accepts PDF/document input blocks
78    /// through Harn's LLM message path.
79    #[serde(default, alias = "pdf_supported")]
80    pub pdf: Option<bool>,
81    /// Whether uploaded file references can be reused in message content.
82    #[serde(default)]
83    pub files_api_supported: Option<bool>,
84    /// Structured-output transport strategy. Known values are:
85    /// `native`, `tool_use`, `format_kw`, and `none`.
86    #[serde(default)]
87    pub structured_output: Option<String>,
88    /// Legacy name retained for project overrides written before
89    /// `structured_output` became the canonical capability.
90    #[serde(default)]
91    pub json_schema: Option<String>,
92    /// Supported thinking/reasoning modes for this rule. Values are
93    /// script-facing mode names: `enabled`, `adaptive`, and `effort`.
94    #[serde(default)]
95    pub thinking_modes: Option<Vec<String>>,
96    /// Whether Anthropic interleaved thinking is supported for this
97    /// provider/model route.
98    #[serde(default)]
99    pub interleaved_thinking_supported: Option<bool>,
100    /// Anthropic beta features that should be requested for this route.
101    #[serde(default)]
102    pub anthropic_beta_features: Option<Vec<String>>,
103    /// Legacy override compatibility. New built-in rules should use
104    /// `thinking_modes` so the capability matrix preserves mode detail.
105    #[serde(default)]
106    pub thinking: Option<bool>,
107    /// Whether the model accepts image inputs in chat content.
108    #[serde(default)]
109    pub vision_supported: Option<bool>,
110    /// Carry `<think>...</think>` blocks in assistant history across turns.
111    /// Qwen3.6 exposes this as `chat_template_kwargs.preserve_thinking`;
112    /// Alibaba recommends enabling it for long-horizon agent loops so the
113    /// model doesn't re-derive context it already worked out in prior turns.
114    /// Anthropic's adaptive-thinking signature contract is stricter but plays
115    /// the same role there.
116    #[serde(default)]
117    pub preserve_thinking: Option<bool>,
118    /// Name of any server-side response parser that can transform model
119    /// bytes before Harn sees them. `none` means the provider returns the
120    /// model text/tool channel without an implicit parser.
121    #[serde(default)]
122    pub server_parser: Option<String>,
123    /// Whether provider-specific `chat_template_kwargs` are honored.
124    /// Some OpenAI-compatible servers silently drop unknown kwargs.
125    #[serde(default)]
126    pub honors_chat_template_kwargs: Option<bool>,
127    /// Whether this route requires OpenAI's `max_completion_tokens`
128    /// request field instead of legacy `max_tokens`.
129    #[serde(default)]
130    pub requires_completion_tokens: Option<bool>,
131    /// Whether this route accepts OpenAI's `reasoning_effort` request field.
132    #[serde(default)]
133    pub reasoning_effort_supported: Option<bool>,
134    /// Whether this route accepts `reasoning_effort: "none"` as a true
135    /// reasoning-off setting. Older GPT-5 variants support effort but only
136    /// floor at `minimal`.
137    #[serde(default)]
138    pub reasoning_none_supported: Option<bool>,
139    /// Preferred endpoint family for this provider/model route. Values
140    /// are descriptive labels consumed by providers, e.g.
141    /// `/api/generate-raw` for Ollama raw prompt bypass.
142    #[serde(default)]
143    pub recommended_endpoint: Option<String>,
144    /// Whether Harn's text-tool protocol (`<tool_call>name({...})`) can
145    /// survive the provider route and return in the visible response body.
146    #[serde(default)]
147    pub text_tool_wire_format_supported: Option<bool>,
148    /// In-prompt directive that disables this model's "thinking" mode when
149    /// the API doesn't expose a first-class field (or exposes it
150    /// inconsistently across templates / quantizations). For Qwen3 family
151    /// chat templates this is `/no_think`. When `thinking: false` is
152    /// requested and this is set, Harn auto-prepends the directive to the
153    /// system message so script authors don't need to know it exists.
154    #[serde(default)]
155    pub thinking_disable_directive: Option<String>,
156}
157
158/// Resolved capabilities for a `(provider, model)` pair. Unset rule
159/// fields resolve to `false` / empty / `None` so callers never have to
160/// unwrap an `Option<bool>` for what are really boolean gates.
161#[derive(Debug, Clone, PartialEq, Eq)]
162pub struct Capabilities {
163    pub native_tools: bool,
164    pub defer_loading: bool,
165    pub tool_search: Vec<String>,
166    pub max_tools: Option<u32>,
167    pub prompt_caching: bool,
168    pub vision: bool,
169    pub audio: bool,
170    pub pdf: bool,
171    pub files_api_supported: bool,
172    pub structured_output: Option<String>,
173    /// Legacy mirror for CLI display and older callers.
174    pub json_schema: Option<String>,
175    pub thinking_modes: Vec<String>,
176    pub interleaved_thinking_supported: bool,
177    pub anthropic_beta_features: Vec<String>,
178    pub vision_supported: bool,
179    pub preserve_thinking: bool,
180    pub server_parser: String,
181    pub honors_chat_template_kwargs: bool,
182    pub requires_completion_tokens: bool,
183    pub reasoning_effort_supported: bool,
184    pub reasoning_none_supported: bool,
185    pub recommended_endpoint: Option<String>,
186    pub text_tool_wire_format_supported: bool,
187    pub thinking_disable_directive: Option<String>,
188}
189
190impl Default for Capabilities {
191    fn default() -> Self {
192        Self {
193            native_tools: false,
194            defer_loading: false,
195            tool_search: Vec::new(),
196            max_tools: None,
197            prompt_caching: false,
198            vision: false,
199            audio: false,
200            pdf: false,
201            files_api_supported: false,
202            structured_output: None,
203            json_schema: None,
204            thinking_modes: Vec::new(),
205            interleaved_thinking_supported: false,
206            anthropic_beta_features: Vec::new(),
207            vision_supported: false,
208            preserve_thinking: false,
209            server_parser: "none".to_string(),
210            honors_chat_template_kwargs: false,
211            requires_completion_tokens: false,
212            reasoning_effort_supported: false,
213            reasoning_none_supported: false,
214            recommended_endpoint: None,
215            text_tool_wire_format_supported: true,
216            thinking_disable_directive: None,
217        }
218    }
219}
220
221/// Display-oriented row for `harn check --provider-matrix` and the generated
222/// docs page. Rows are intentionally rule-shaped: `model` is the rule's
223/// `model_match` pattern, because the shipped capability source of truth is a
224/// first-match rule table rather than an exhaustive remote model inventory.
225#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
226pub struct ProviderCapabilityMatrixRow {
227    pub provider: String,
228    pub model: String,
229    pub thinking: Vec<String>,
230    pub vision: bool,
231    pub audio: bool,
232    pub pdf: bool,
233    pub streaming: bool,
234    pub files_api_supported: bool,
235    pub json_schema: Option<String>,
236    pub tools: bool,
237    pub cache: bool,
238    pub source: String,
239}
240
241thread_local! {
242    /// Per-thread user overrides installed by the CLI at startup. Kept
243    /// thread-local (not process-static) to match the rest of the VM
244    /// state model — the VM is !Send and each VM thread owns its own
245    /// configuration.
246    static USER_OVERRIDES: RefCell<Option<CapabilitiesFile>> = const { RefCell::new(None) };
247}
248
249/// Lazily-parsed built-in rules. The `include_str!` content is a static
250/// constant; parsing it once per process is safe and free of ordering
251/// hazards.
252static BUILTIN: OnceLock<CapabilitiesFile> = OnceLock::new();
253
254fn builtin() -> &'static CapabilitiesFile {
255    BUILTIN.get_or_init(|| {
256        toml::from_str::<CapabilitiesFile>(BUILTIN_TOML)
257            .expect("capabilities.toml must parse at build time")
258    })
259}
260
261/// Install project-level overrides for the current thread. Usually
262/// called once at CLI bootstrap after reading `harn.toml`. Passing
263/// `None` clears any prior override.
264pub fn set_user_overrides(file: Option<CapabilitiesFile>) {
265    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = file);
266}
267
268/// Clear any thread-local user overrides. Used between test runs.
269pub fn clear_user_overrides() {
270    set_user_overrides(None);
271}
272
273/// Parse a TOML string containing the capabilities section's own shape
274/// (i.e. top-level `[[provider.X]]` + optional `[provider_family]`, the
275/// same layout used by the built-in `capabilities.toml`) and install as
276/// the current thread's override.
277pub fn set_user_overrides_toml(src: &str) -> Result<(), String> {
278    let parsed: CapabilitiesFile = toml::from_str(src).map_err(|e| e.to_string())?;
279    set_user_overrides(Some(parsed));
280    Ok(())
281}
282
283/// Extract the `[capabilities]` section from a full `harn.toml` source
284/// and install it as the current thread's override. The schema inside
285/// that section mirrors `CapabilitiesFile` but with every key prefixed
286/// by `capabilities.`:
287///
288/// ```toml
289/// [[capabilities.provider.my-proxy]]
290/// model_match = "*"
291/// native_tools = true
292/// tool_search = ["hosted"]
293/// ```
294pub fn set_user_overrides_from_manifest_toml(src: &str) -> Result<(), String> {
295    #[derive(Deserialize)]
296    struct Manifest {
297        #[serde(default)]
298        capabilities: Option<CapabilitiesFile>,
299    }
300    let parsed: Manifest = toml::from_str(src).map_err(|e| e.to_string())?;
301    set_user_overrides(parsed.capabilities);
302    Ok(())
303}
304
305/// Look up effective capabilities for a `(provider, model)` pair.
306/// Walks the provider_family chain until it finds a rule list that
307/// matches. Within any one provider's rule list, user overrides are
308/// consulted before the built-in rules. The first matching rule wins —
309/// later rules (and later layers in the family chain) are ignored.
310pub fn lookup(provider: &str, model: &str) -> Capabilities {
311    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
312    lookup_with(provider, model, builtin(), user.as_ref())
313}
314
315/// Return the currently-effective provider capability rule matrix. User
316/// override rows, when installed for the current thread, are emitted before
317/// built-in rows so the display mirrors lookup precedence.
318pub fn matrix_rows() -> Vec<ProviderCapabilityMatrixRow> {
319    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
320    let mut rows = Vec::new();
321    if let Some(user) = user.as_ref() {
322        push_matrix_rows(&mut rows, user, "project");
323    }
324    push_matrix_rows(&mut rows, builtin(), "builtin");
325    rows
326}
327
328fn push_matrix_rows(
329    rows: &mut Vec<ProviderCapabilityMatrixRow>,
330    file: &CapabilitiesFile,
331    source: &str,
332) {
333    for (provider, rules) in &file.provider {
334        for rule in rules {
335            rows.push(rule_to_matrix_row(provider, rule, source));
336        }
337    }
338}
339
340fn rule_to_matrix_row(
341    provider: &str,
342    rule: &ProviderRule,
343    source: &str,
344) -> ProviderCapabilityMatrixRow {
345    ProviderCapabilityMatrixRow {
346        provider: provider.to_string(),
347        model: rule.model_match.clone(),
348        thinking: rule_thinking_modes(rule),
349        vision: rule_vision(rule),
350        audio: rule.audio.unwrap_or(false),
351        pdf: rule.pdf.unwrap_or(false),
352        streaming: true,
353        files_api_supported: rule.files_api_supported.unwrap_or(false),
354        json_schema: rule_structured_output(rule),
355        tools: rule.native_tools.unwrap_or(false),
356        cache: rule.prompt_caching.unwrap_or(false),
357        source: source.to_string(),
358    }
359}
360
361fn rule_thinking_modes(rule: &ProviderRule) -> Vec<String> {
362    rule.thinking_modes.clone().unwrap_or_else(|| {
363        if rule.thinking.unwrap_or(false) {
364            vec!["enabled".to_string()]
365        } else {
366            Vec::new()
367        }
368    })
369}
370
371fn rule_vision(rule: &ProviderRule) -> bool {
372    rule.vision.or(rule.vision_supported).unwrap_or(false)
373}
374
375fn lookup_with(
376    provider: &str,
377    model: &str,
378    builtin: &CapabilitiesFile,
379    user: Option<&CapabilitiesFile>,
380) -> Capabilities {
381    // Special case: mock spoofs either shape. Try anthropic first
382    // (Claude-shape model strings) so `mock` + `claude-opus-4-7`
383    // resolves to the Anthropic capability row — the same behaviour
384    // the hardcoded dispatch gave before this refactor.
385    if provider == "mock" {
386        if let Some(caps) = try_match_layer(user, builtin, "anthropic", model, provider) {
387            return caps;
388        }
389        if let Some(caps) = try_match_layer(user, builtin, "openai", model, provider) {
390            return caps;
391        }
392        return Capabilities::default();
393    }
394
395    // Normal chain: walk provider → family(provider) → ... with a
396    // visited-guard to avoid cycles in malformed user overrides.
397    let mut current = provider.to_string();
398    let mut visited: std::collections::HashSet<String> = std::collections::HashSet::new();
399    while visited.insert(current.clone()) {
400        if let Some(caps) = try_match_layer(user, builtin, &current, model, provider) {
401            return caps;
402        }
403        let next = user
404            .and_then(|f| f.provider_family.get(&current))
405            .or_else(|| builtin.provider_family.get(&current))
406            .cloned();
407        match next {
408            Some(parent) => current = parent,
409            None => break,
410        }
411    }
412    Capabilities::default()
413}
414
415/// Try the ordered rule list for `layer_provider` (user rules first,
416/// then built-in rules). Returns `Some(caps)` on the first match, else
417/// `None`. `original_provider` is threaded through only for diagnostics.
418fn try_match_layer(
419    user: Option<&CapabilitiesFile>,
420    builtin: &CapabilitiesFile,
421    layer_provider: &str,
422    model: &str,
423    _original_provider: &str,
424) -> Option<Capabilities> {
425    if let Some(user) = user {
426        if let Some(rules) = user.provider.get(layer_provider) {
427            for rule in rules {
428                if rule_matches(rule, model) {
429                    return Some(rule_to_caps(rule));
430                }
431            }
432        }
433    }
434    if let Some(rules) = builtin.provider.get(layer_provider) {
435        for rule in rules {
436            if rule_matches(rule, model) {
437                return Some(rule_to_caps(rule));
438            }
439        }
440    }
441    None
442}
443
444fn rule_to_caps(rule: &ProviderRule) -> Capabilities {
445    let thinking_modes = rule_thinking_modes(rule);
446    Capabilities {
447        native_tools: rule.native_tools.unwrap_or(false),
448        defer_loading: rule.defer_loading.unwrap_or(false),
449        tool_search: rule.tool_search.clone().unwrap_or_default(),
450        max_tools: rule.max_tools,
451        prompt_caching: rule.prompt_caching.unwrap_or(false),
452        vision: rule_vision(rule),
453        audio: rule.audio.unwrap_or(false),
454        pdf: rule.pdf.unwrap_or(false),
455        files_api_supported: rule.files_api_supported.unwrap_or(false),
456        structured_output: rule_structured_output(rule),
457        json_schema: rule_structured_output(rule),
458        thinking_modes,
459        interleaved_thinking_supported: rule.interleaved_thinking_supported.unwrap_or(false),
460        anthropic_beta_features: rule.anthropic_beta_features.clone().unwrap_or_default(),
461        vision_supported: rule.vision_supported.unwrap_or(false),
462        preserve_thinking: rule.preserve_thinking.unwrap_or(false),
463        server_parser: rule
464            .server_parser
465            .clone()
466            .unwrap_or_else(|| "none".to_string()),
467        honors_chat_template_kwargs: rule.honors_chat_template_kwargs.unwrap_or(false),
468        requires_completion_tokens: rule.requires_completion_tokens.unwrap_or(false),
469        reasoning_effort_supported: rule.reasoning_effort_supported.unwrap_or(false),
470        reasoning_none_supported: rule.reasoning_none_supported.unwrap_or(false),
471        recommended_endpoint: rule.recommended_endpoint.clone(),
472        text_tool_wire_format_supported: rule.text_tool_wire_format_supported.unwrap_or(true),
473        thinking_disable_directive: rule.thinking_disable_directive.clone(),
474    }
475}
476
477fn rule_structured_output(rule: &ProviderRule) -> Option<String> {
478    rule.structured_output
479        .clone()
480        .or_else(|| rule.json_schema.clone())
481        .filter(|value| value != "none")
482}
483
484fn rule_matches(rule: &ProviderRule, model: &str) -> bool {
485    let lower = model.to_lowercase();
486    if !glob_match(&rule.model_match.to_lowercase(), &lower) {
487        return false;
488    }
489    if let Some(version_min) = &rule.version_min {
490        if version_min.len() != 2 {
491            return false;
492        }
493        let want = (version_min[0], version_min[1]);
494        let have = match extract_version(model) {
495            Some(v) => v,
496            // `version_min` was set but the model ID can't be parsed.
497            // Fail closed: skip this rule so more permissive catch-all
498            // rules below can still match.
499            None => return false,
500        };
501        if have < want {
502            return false;
503        }
504    }
505    true
506}
507
508/// Extract `(major, minor)` from a model ID by trying the Anthropic
509/// parser first (for `claude-*` shapes) then the OpenAI parser (`gpt-*`).
510/// Both parsers return `None` for shapes they don't recognise so this
511/// never mis-parses across families.
512fn extract_version(model: &str) -> Option<(u32, u32)> {
513    claude_generation(model).or_else(|| gpt_generation(model))
514}
515
516/// Simple glob matching with `*` wildcards. Mirrors the helper in
517/// `llm_config.rs` — keep them in sync if either ever grows regex or
518/// character-class support.
519fn glob_match(pattern: &str, input: &str) -> bool {
520    if let Some(prefix) = pattern.strip_suffix('*') {
521        if let Some(rest) = prefix.strip_prefix('*') {
522            // `*foo*` — substring match.
523            return input.contains(rest);
524        }
525        return input.starts_with(prefix);
526    }
527    if let Some(suffix) = pattern.strip_prefix('*') {
528        return input.ends_with(suffix);
529    }
530    if pattern.contains('*') {
531        let parts: Vec<&str> = pattern.split('*').collect();
532        if parts.len() == 2 {
533            return input.starts_with(parts[0]) && input.ends_with(parts[1]);
534        }
535        return input == pattern;
536    }
537    input == pattern
538}
539
540#[cfg(test)]
541mod tests {
542    use super::*;
543
544    fn reset() {
545        clear_user_overrides();
546    }
547
548    #[test]
549    fn anthropic_opus_47_gets_full_capabilities() {
550        reset();
551        let caps = lookup("anthropic", "claude-opus-4-7");
552        assert!(caps.native_tools);
553        assert!(caps.defer_loading);
554        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
555        assert!(caps.prompt_caching);
556        assert_eq!(caps.thinking_modes, vec!["adaptive"]);
557        assert!(caps.vision_supported);
558        assert!(caps.audio);
559        assert!(caps.pdf);
560        assert!(caps.files_api_supported);
561        assert_eq!(caps.max_tools, Some(10000));
562    }
563
564    #[test]
565    fn anthropic_opus_46_uses_budgeted_thinking() {
566        reset();
567        let caps = lookup("anthropic", "claude-opus-4-6");
568        assert_eq!(caps.thinking_modes, vec!["enabled"]);
569        assert!(caps.interleaved_thinking_supported);
570    }
571
572    #[test]
573    fn anthropic_opus_45_does_not_support_interleaved_thinking() {
574        reset();
575        let caps = lookup("anthropic", "claude-opus-4-5");
576        assert_eq!(caps.thinking_modes, vec!["enabled"]);
577        assert!(!caps.interleaved_thinking_supported);
578    }
579
580    #[test]
581    fn override_can_supply_anthropic_beta_features() {
582        reset();
583        let toml_src = r#"
584[[provider.anthropic]]
585model_match = "claude-custom-*"
586native_tools = true
587anthropic_beta_features = ["fine-grained-tool-streaming-2025-05-14"]
588"#;
589        set_user_overrides_toml(toml_src).unwrap();
590        let caps = lookup("anthropic", "claude-custom-1");
591        assert_eq!(
592            caps.anthropic_beta_features,
593            vec!["fine-grained-tool-streaming-2025-05-14"]
594        );
595        reset();
596    }
597
598    #[test]
599    fn anthropic_haiku_44_has_no_tool_search() {
600        reset();
601        let caps = lookup("anthropic", "claude-haiku-4-4");
602        // Haiku 4.4 falls through to the `claude-*` catch-all row.
603        assert!(caps.native_tools);
604        assert!(caps.prompt_caching);
605        assert!(!caps.defer_loading);
606        assert!(caps.tool_search.is_empty());
607    }
608
609    #[test]
610    fn anthropic_haiku_45_supports_tool_search() {
611        reset();
612        let caps = lookup("anthropic", "claude-haiku-4-5");
613        assert!(caps.defer_loading);
614        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
615    }
616
617    #[test]
618    fn old_claude_gets_catchall() {
619        reset();
620        let caps = lookup("anthropic", "claude-opus-3-5");
621        assert!(caps.native_tools);
622        assert!(caps.prompt_caching);
623        assert!(!caps.defer_loading);
624        assert!(caps.tool_search.is_empty());
625    }
626
627    #[test]
628    fn openai_gpt_54_supports_tool_search() {
629        reset();
630        let caps = lookup("openai", "gpt-5.4");
631        assert!(caps.defer_loading);
632        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
633        assert_eq!(caps.json_schema.as_deref(), Some("native"));
634        assert_eq!(caps.thinking_modes, vec!["effort"]);
635        assert!(caps.reasoning_effort_supported);
636        assert!(caps.reasoning_none_supported);
637    }
638
639    #[test]
640    fn openai_gpt_53_has_reasoning_none_without_tool_search() {
641        reset();
642        let caps = lookup("openai", "gpt-5.3");
643        assert!(caps.native_tools);
644        assert!(!caps.defer_loading);
645        assert!(caps.vision_supported);
646        assert!(caps.tool_search.is_empty());
647        assert_eq!(caps.thinking_modes, vec!["effort"]);
648        assert!(caps.reasoning_effort_supported);
649        assert!(caps.reasoning_none_supported);
650    }
651
652    #[test]
653    fn openai_original_gpt_5_has_reasoning_floor_without_none() {
654        reset();
655        let caps = lookup("openai", "gpt-5");
656        assert!(caps.native_tools);
657        assert!(!caps.defer_loading);
658        assert_eq!(caps.thinking_modes, vec!["effort"]);
659        assert!(caps.reasoning_effort_supported);
660        assert!(!caps.reasoning_none_supported);
661    }
662
663    #[test]
664    fn openai_gpt_4o_matrix_fields_include_multimodal_support() {
665        reset();
666        let caps = lookup("openai", "gpt-4o");
667        assert!(caps.native_tools);
668        assert!(caps.vision);
669        assert!(caps.audio);
670        assert!(!caps.pdf);
671        assert_eq!(caps.json_schema.as_deref(), Some("native"));
672    }
673
674    #[test]
675    fn openai_reasoning_models_support_effort() {
676        reset();
677        let caps = lookup("openai", "o3");
678        assert_eq!(caps.thinking_modes, vec!["effort"]);
679        assert!(caps.requires_completion_tokens);
680        assert!(caps.reasoning_effort_supported);
681        let prefixed = lookup("openrouter", "openai/o4-mini");
682        assert!(prefixed.requires_completion_tokens);
683        assert!(prefixed.reasoning_effort_supported);
684    }
685
686    #[test]
687    fn vision_capability_gates_known_multimodal_models() {
688        reset();
689        assert!(lookup("openai", "gpt-4o").vision_supported);
690        assert!(lookup("openai", "gpt-5.4-preview").vision_supported);
691        assert!(lookup("anthropic", "claude-sonnet-4-6").vision_supported);
692        assert!(lookup("anthropic", "claude-sonnet-4-6").pdf);
693        assert!(lookup("anthropic", "claude-sonnet-4-6").files_api_supported);
694        assert!(lookup("openrouter", "google/gemini-2.5-flash").vision_supported);
695        assert!(lookup("gemini", "gemini-2.5-flash").vision_supported);
696        assert!(lookup("gemini", "gemini-2.5-flash").audio);
697        assert!(lookup("gemini", "gemini-2.5-flash").pdf);
698        assert!(lookup("ollama", "llava:latest").vision_supported);
699        assert!(!lookup("openai", "gpt-3.5-turbo").vision_supported);
700        assert!(!lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4").vision_supported);
701    }
702
703    #[test]
704    fn openrouter_inherits_openai() {
705        reset();
706        let caps = lookup("openrouter", "gpt-5.4");
707        assert!(caps.defer_loading);
708        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
709    }
710
711    #[test]
712    fn groq_inherits_openai_family_only() {
713        reset();
714        let caps = lookup("groq", "gpt-5.5-preview");
715        assert!(caps.defer_loading);
716    }
717
718    #[test]
719    fn mock_with_claude_model_routes_to_anthropic() {
720        reset();
721        let caps = lookup("mock", "claude-sonnet-4-7");
722        assert!(caps.defer_loading);
723        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
724    }
725
726    #[test]
727    fn mock_with_gpt_model_routes_to_openai() {
728        reset();
729        let caps = lookup("mock", "gpt-5.4-preview");
730        assert!(caps.defer_loading);
731        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
732    }
733
734    #[test]
735    fn qwen36_ollama_preserves_thinking() {
736        reset();
737        let caps = lookup("ollama", "qwen3.6:35b-a3b-coding-nvfp4");
738        assert!(caps.native_tools);
739        assert_eq!(caps.json_schema.as_deref(), Some("format_kw"));
740        assert!(!caps.thinking_modes.is_empty());
741        assert!(
742            caps.preserve_thinking,
743            "Qwen3.6 should enable preserve_thinking by default for long-horizon loops"
744        );
745        assert_eq!(caps.server_parser, "ollama_qwen3coder");
746        assert!(!caps.honors_chat_template_kwargs);
747        assert_eq!(
748            caps.recommended_endpoint.as_deref(),
749            Some("/api/generate-raw")
750        );
751        assert!(!caps.text_tool_wire_format_supported);
752    }
753
754    #[test]
755    fn qwen35_ollama_does_not_preserve_thinking() {
756        reset();
757        let caps = lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4");
758        assert!(caps.native_tools);
759        assert!(!caps.thinking_modes.is_empty());
760        assert!(
761            !caps.preserve_thinking,
762            "Qwen3.5 lacks the preserve_thinking kwarg — rely on the chat template's rolling checkpoint instead"
763        );
764        assert_eq!(caps.server_parser, "ollama_qwen3coder");
765        assert!(!caps.text_tool_wire_format_supported);
766    }
767
768    #[test]
769    fn qwen36_routed_providers_all_preserve_thinking() {
770        reset();
771        for (provider, model) in [
772            ("openrouter", "qwen/qwen3.6-plus"),
773            ("together", "Qwen/Qwen3.6-Plus"),
774            ("huggingface", "Qwen/Qwen3.6-35B-A3B"),
775            ("fireworks", "accounts/fireworks/models/qwen3p6-plus"),
776            ("dashscope", "qwen3.6-plus"),
777            ("llamacpp", "unsloth/Qwen3.6-35B-A3B-GGUF"),
778            ("local", "Qwen3.6-35B-A3B"),
779            ("mlx", "unsloth/Qwen3.6-27B-UD-MLX-4bit"),
780            ("mlx", "Qwen/Qwen3.6-27B"),
781        ] {
782            let caps = lookup(provider, model);
783            assert!(
784                !caps.thinking_modes.is_empty(),
785                "{provider}/{model}: thinking"
786            );
787            assert!(
788                caps.preserve_thinking,
789                "{provider}/{model}: preserve_thinking must be on for Qwen3.6"
790            );
791            assert!(caps.native_tools, "{provider}/{model}: native_tools");
792            assert_ne!(
793                caps.server_parser, "ollama_qwen3coder",
794                "{provider}/{model}: only Ollama routes through the qwen3coder response parser"
795            );
796        }
797    }
798
799    #[test]
800    fn qwen_coder_models_do_not_claim_thinking_modes() {
801        reset();
802        for (provider, model) in [
803            ("together", "Qwen/Qwen3-Coder-Next-FP8"),
804            ("together", "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"),
805            ("openrouter", "qwen/qwen3-coder-next"),
806            ("huggingface", "Qwen/Qwen3-Coder-Next"),
807        ] {
808            let caps = lookup(provider, model);
809            assert!(caps.native_tools, "{provider}/{model}: native_tools");
810            assert!(
811                caps.thinking_modes.is_empty(),
812                "{provider}/{model}: coder models are non-thinking routes"
813            );
814            assert!(
815                !caps.preserve_thinking,
816                "{provider}/{model}: preserve_thinking must stay off"
817            );
818            assert!(
819                caps.thinking_disable_directive.is_none(),
820                "{provider}/{model}: no /no_think shim should be needed"
821            );
822        }
823    }
824
825    #[test]
826    fn llamacpp_qwen_keeps_text_tool_wire_format() {
827        reset();
828        let caps = lookup("llamacpp", "unsloth/Qwen3.5-Coder-GGUF");
829        assert_eq!(caps.server_parser, "none");
830        assert!(caps.honors_chat_template_kwargs);
831        assert!(caps.text_tool_wire_format_supported);
832        assert_eq!(
833            caps.recommended_endpoint.as_deref(),
834            Some("/v1/chat/completions")
835        );
836    }
837
838    #[test]
839    fn dashscope_and_llamacpp_resolve_capabilities() {
840        reset();
841        // New sibling providers should fall through to `openai` for
842        // gpt-*  models even without dedicated rules.
843        let caps = lookup("dashscope", "gpt-5.4-preview");
844        assert!(caps.defer_loading);
845        let caps = lookup("llamacpp", "gpt-5.4-preview");
846        assert!(caps.defer_loading);
847    }
848
849    #[test]
850    fn unknown_provider_has_no_capabilities() {
851        reset();
852        let caps = lookup("my-custom-proxy", "foo-bar-1");
853        assert!(!caps.native_tools);
854        assert!(!caps.defer_loading);
855        assert!(caps.tool_search.is_empty());
856    }
857
858    #[test]
859    fn user_override_adds_new_provider() {
860        reset();
861        let toml_src = r#"
862[[provider.my-proxy]]
863model_match = "*"
864native_tools = true
865tool_search = ["hosted"]
866"#;
867        set_user_overrides_toml(toml_src).unwrap();
868        let caps = lookup("my-proxy", "anything");
869        assert!(caps.native_tools);
870        assert_eq!(caps.tool_search, vec!["hosted"]);
871        clear_user_overrides();
872    }
873
874    #[test]
875    fn user_override_takes_precedence_over_builtin() {
876        reset();
877        let toml_src = r#"
878[[provider.anthropic]]
879model_match = "claude-opus-*"
880native_tools = true
881defer_loading = false
882tool_search = []
883"#;
884        set_user_overrides_toml(toml_src).unwrap();
885        let caps = lookup("anthropic", "claude-opus-4-7");
886        assert!(caps.native_tools);
887        assert!(!caps.defer_loading);
888        assert!(caps.tool_search.is_empty());
889        clear_user_overrides();
890    }
891
892    #[test]
893    fn user_override_from_manifest_toml() {
894        reset();
895        let manifest = r#"
896[package]
897name = "demo"
898
899[[capabilities.provider.my-proxy]]
900model_match = "*"
901native_tools = true
902tool_search = ["hosted"]
903"#;
904        set_user_overrides_from_manifest_toml(manifest).unwrap();
905        let caps = lookup("my-proxy", "foo");
906        assert!(caps.native_tools);
907        assert_eq!(caps.tool_search, vec!["hosted"]);
908        clear_user_overrides();
909    }
910
911    #[test]
912    fn version_min_requires_parseable_model() {
913        reset();
914        let toml_src = r#"
915[[provider.custom]]
916model_match = "*"
917version_min = [5, 4]
918native_tools = true
919"#;
920        set_user_overrides_toml(toml_src).unwrap();
921        // Unparseable model ID + version_min → rule doesn't match.
922        let caps = lookup("custom", "mystery-model");
923        assert!(!caps.native_tools);
924        clear_user_overrides();
925    }
926
927    #[test]
928    fn glob_match_substring() {
929        assert!(glob_match("*gpt*", "openai/gpt-5.4"));
930        assert!(glob_match("*claude*", "anthropic/claude-opus-4-7"));
931        assert!(!glob_match("*xyz*", "openai/gpt-5.4"));
932    }
933
934    #[test]
935    fn openrouter_namespaced_anthropic_model() {
936        reset();
937        let caps = lookup("anthropic", "anthropic/claude-opus-4-7");
938        assert!(caps.defer_loading);
939    }
940
941    #[test]
942    fn matrix_rows_include_provider_patterns_and_sources() {
943        reset();
944        let rows = matrix_rows();
945        assert!(rows.iter().any(|row| {
946            row.provider == "openai"
947                && row.model == "gpt-4o*"
948                && row.vision
949                && row.audio
950                && row.json_schema.as_deref() == Some("native")
951                && row.source == "builtin"
952        }));
953    }
954}
harn_vm/llm/capabilities.rs

harn_vm/llm/
capabilities.rs