harn_vm/llm/
capabilities.rs

1//! Data-driven provider capabilities.
2//!
3//! The per-(provider, model) capability matrix (native tools, deferred
4//! tool loading, tool-search variants, prompt caching, extended thinking,
5//! max tool count) lives in the shipped `capabilities.toml` and is
6//! overridable per-project via `[[capabilities.provider.<name>]]` blocks
7//! in `harn.toml`. This module owns:
8//!
9//! - loading the built-in TOML (compiled in via `include_str!`);
10//! - merging user overrides on top;
11//! - matching a `(provider, model)` pair against the rule list with
12//!   glob + semver semantics;
13//! - exposing a stable `Capabilities` struct that the `LlmProvider`
14//!   trait delegates to as the single source of truth.
15//!
16//! Before this module the Anthropic / OpenAI gates were spread across
17//! `providers/anthropic.rs` (`claude_generation`, `claude_model_supports_tool_search`)
18//! and `providers/openai_compat.rs` (`gpt_generation`, `gpt_model_supports_tool_search`).
19//! Those parsers are still used here — they supply the version extractor —
20//! but the boolean gates that used to live alongside them are now data.
21
22use std::cell::RefCell;
23use std::collections::BTreeMap;
24use std::sync::OnceLock;
25
26use serde::{Deserialize, Serialize};
27
28use super::providers::anthropic::claude_generation;
29use super::providers::openai_compat::gpt_generation;
30
31/// Shipped default rules. Compiled into the binary at build time.
32const BUILTIN_TOML: &str = include_str!("capabilities.toml");
33
34/// Parsed on-disk capabilities schema. Public so harn-cli can
35/// construct one directly when wiring harn.toml overrides.
36#[derive(Debug, Clone, Deserialize, Default)]
37pub struct CapabilitiesFile {
38    /// Per-provider ordered rule lists. First matching rule wins.
39    #[serde(default)]
40    pub provider: BTreeMap<String, Vec<ProviderRule>>,
41    /// Sibling → canonical family mapping. Providers with no rule of
42    /// their own fall through to the named family (recursively).
43    #[serde(default)]
44    pub provider_family: BTreeMap<String, String>,
45}
46
47/// One row of the capability matrix.
48#[derive(Debug, Clone, Deserialize)]
49pub struct ProviderRule {
50    /// Glob pattern (supports leading / trailing `*` and a single mid-`*`).
51    /// Matched case-insensitively against the model ID.
52    pub model_match: String,
53    /// Optional `[major, minor]` lower bound. When set, the model ID
54    /// must parse via the provider's version extractor AND compare ≥
55    /// this tuple. Rules with an unparseable `version_min` for the
56    /// given model are skipped, not merged.
57    #[serde(default)]
58    pub version_min: Option<Vec<u32>>,
59    #[serde(default)]
60    pub native_tools: Option<bool>,
61    #[serde(default)]
62    pub defer_loading: Option<bool>,
63    #[serde(default)]
64    pub tool_search: Option<Vec<String>>,
65    #[serde(default)]
66    pub max_tools: Option<u32>,
67    #[serde(default)]
68    pub prompt_caching: Option<bool>,
69    /// Whether this provider/model route accepts image or other visual
70    /// input blocks through Harn's LLM message path.
71    #[serde(default)]
72    pub vision: Option<bool>,
73    /// Whether this provider/model route accepts audio input blocks
74    /// through Harn's LLM message path.
75    #[serde(default, alias = "audio_supported")]
76    pub audio: Option<bool>,
77    /// Whether this provider/model route accepts PDF/document input blocks
78    /// through Harn's LLM message path.
79    #[serde(default, alias = "pdf_supported")]
80    pub pdf: Option<bool>,
81    /// Whether uploaded file references can be reused in message content.
82    #[serde(default)]
83    pub files_api_supported: Option<bool>,
84    /// Structured-output transport strategy. Known values are:
85    /// `native`, `tool_use`, `format_kw`, and `none`.
86    #[serde(default)]
87    pub structured_output: Option<String>,
88    /// Legacy name retained for project overrides written before
89    /// `structured_output` became the canonical capability.
90    #[serde(default)]
91    pub json_schema: Option<String>,
92    /// Supported thinking/reasoning modes for this rule. Values are
93    /// script-facing mode names: `enabled`, `adaptive`, and `effort`.
94    #[serde(default)]
95    pub thinking_modes: Option<Vec<String>>,
96    /// Whether Anthropic interleaved thinking is supported for this
97    /// provider/model route.
98    #[serde(default)]
99    pub interleaved_thinking_supported: Option<bool>,
100    /// Anthropic beta features that should be requested for this route.
101    #[serde(default)]
102    pub anthropic_beta_features: Option<Vec<String>>,
103    /// Legacy override compatibility. New built-in rules should use
104    /// `thinking_modes` so the capability matrix preserves mode detail.
105    #[serde(default)]
106    pub thinking: Option<bool>,
107    /// Whether the model accepts image inputs in chat content.
108    #[serde(default)]
109    pub vision_supported: Option<bool>,
110    /// Carry `<think>...</think>` blocks in assistant history across turns.
111    /// Qwen3.6 exposes this as `chat_template_kwargs.preserve_thinking`;
112    /// Alibaba recommends enabling it for long-horizon agent loops so the
113    /// model doesn't re-derive context it already worked out in prior turns.
114    /// Anthropic's adaptive-thinking signature contract is stricter but plays
115    /// the same role there.
116    #[serde(default)]
117    pub preserve_thinking: Option<bool>,
118    /// Name of any server-side response parser that can transform model
119    /// bytes before Harn sees them. `none` means the provider returns the
120    /// model text/tool channel without an implicit parser.
121    #[serde(default)]
122    pub server_parser: Option<String>,
123    /// Whether provider-specific `chat_template_kwargs` are honored.
124    /// Some OpenAI-compatible servers silently drop unknown kwargs.
125    #[serde(default)]
126    pub honors_chat_template_kwargs: Option<bool>,
127    /// Whether this route requires OpenAI's `max_completion_tokens`
128    /// request field instead of legacy `max_tokens`.
129    #[serde(default)]
130    pub requires_completion_tokens: Option<bool>,
131    /// Whether this route accepts OpenAI's `reasoning_effort` request field.
132    #[serde(default)]
133    pub reasoning_effort_supported: Option<bool>,
134    /// Whether this route accepts `reasoning_effort: "none"` as a true
135    /// reasoning-off setting. Older GPT-5 variants support effort but only
136    /// floor at `minimal`.
137    #[serde(default)]
138    pub reasoning_none_supported: Option<bool>,
139    /// Preferred endpoint family for this provider/model route. Values
140    /// are descriptive labels consumed by providers, e.g.
141    /// `/api/generate-raw` for Ollama raw prompt bypass.
142    #[serde(default)]
143    pub recommended_endpoint: Option<String>,
144    /// Whether Harn's text-tool protocol (`<tool_call>name({...})`) can
145    /// survive the provider route and return in the visible response body.
146    #[serde(default)]
147    pub text_tool_wire_format_supported: Option<bool>,
148    /// In-prompt directive that disables this model's "thinking" mode when
149    /// the API doesn't expose a first-class field (or exposes it
150    /// inconsistently across templates / quantizations). For Qwen3 family
151    /// chat templates this is `/no_think`. When `thinking: false` is
152    /// requested and this is set, Harn auto-prepends the directive to the
153    /// system message so script authors don't need to know it exists.
154    #[serde(default)]
155    pub thinking_disable_directive: Option<String>,
156}
157
158/// Resolved capabilities for a `(provider, model)` pair. Unset rule
159/// fields resolve to `false` / empty / `None` so callers never have to
160/// unwrap an `Option<bool>` for what are really boolean gates.
161#[derive(Debug, Clone, PartialEq, Eq)]
162pub struct Capabilities {
163    pub native_tools: bool,
164    pub defer_loading: bool,
165    pub tool_search: Vec<String>,
166    pub max_tools: Option<u32>,
167    pub prompt_caching: bool,
168    pub vision: bool,
169    pub audio: bool,
170    pub pdf: bool,
171    pub files_api_supported: bool,
172    pub structured_output: Option<String>,
173    /// Legacy mirror for CLI display and older callers.
174    pub json_schema: Option<String>,
175    pub thinking_modes: Vec<String>,
176    pub interleaved_thinking_supported: bool,
177    pub anthropic_beta_features: Vec<String>,
178    pub vision_supported: bool,
179    pub preserve_thinking: bool,
180    pub server_parser: String,
181    pub honors_chat_template_kwargs: bool,
182    pub requires_completion_tokens: bool,
183    pub reasoning_effort_supported: bool,
184    pub reasoning_none_supported: bool,
185    pub recommended_endpoint: Option<String>,
186    pub text_tool_wire_format_supported: bool,
187    pub thinking_disable_directive: Option<String>,
188}
189
190impl Default for Capabilities {
191    fn default() -> Self {
192        Self {
193            native_tools: false,
194            defer_loading: false,
195            tool_search: Vec::new(),
196            max_tools: None,
197            prompt_caching: false,
198            vision: false,
199            audio: false,
200            pdf: false,
201            files_api_supported: false,
202            structured_output: None,
203            json_schema: None,
204            thinking_modes: Vec::new(),
205            interleaved_thinking_supported: false,
206            anthropic_beta_features: Vec::new(),
207            vision_supported: false,
208            preserve_thinking: false,
209            server_parser: "none".to_string(),
210            honors_chat_template_kwargs: false,
211            requires_completion_tokens: false,
212            reasoning_effort_supported: false,
213            reasoning_none_supported: false,
214            recommended_endpoint: None,
215            text_tool_wire_format_supported: true,
216            thinking_disable_directive: None,
217        }
218    }
219}
220
221/// Display-oriented row for `harn check --provider-matrix` and the generated
222/// docs page. Rows are intentionally rule-shaped: `model` is the rule's
223/// `model_match` pattern, because the shipped capability source of truth is a
224/// first-match rule table rather than an exhaustive remote model inventory.
225#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
226pub struct ProviderCapabilityMatrixRow {
227    pub provider: String,
228    pub model: String,
229    pub thinking: Vec<String>,
230    pub vision: bool,
231    pub audio: bool,
232    pub pdf: bool,
233    pub streaming: bool,
234    pub files_api_supported: bool,
235    pub json_schema: Option<String>,
236    pub tools: bool,
237    pub cache: bool,
238    pub source: String,
239}
240
241thread_local! {
242    /// Per-thread user overrides installed by the CLI at startup. Kept
243    /// thread-local (not process-static) to match the rest of the VM
244    /// state model — the VM is !Send and each VM thread owns its own
245    /// configuration.
246    static USER_OVERRIDES: RefCell<Option<CapabilitiesFile>> = const { RefCell::new(None) };
247}
248
249/// Lazily-parsed built-in rules. The `include_str!` content is a static
250/// constant; parsing it once per process is safe and free of ordering
251/// hazards.
252static BUILTIN: OnceLock<CapabilitiesFile> = OnceLock::new();
253
254fn builtin() -> &'static CapabilitiesFile {
255    BUILTIN.get_or_init(|| {
256        toml::from_str::<CapabilitiesFile>(BUILTIN_TOML)
257            .expect("capabilities.toml must parse at build time")
258    })
259}
260
261/// Install project-level overrides for the current thread. Usually
262/// called once at CLI bootstrap after reading `harn.toml`. Passing
263/// `None` clears any prior override.
264pub fn set_user_overrides(file: Option<CapabilitiesFile>) {
265    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = file);
266}
267
268/// Clear any thread-local user overrides. Used between test runs.
269pub fn clear_user_overrides() {
270    set_user_overrides(None);
271}
272
273/// Parse a TOML string containing the capabilities section's own shape
274/// (i.e. top-level `[[provider.X]]` + optional `[provider_family]`, the
275/// same layout used by the built-in `capabilities.toml`) and install as
276/// the current thread's override.
277pub fn set_user_overrides_toml(src: &str) -> Result<(), String> {
278    let parsed: CapabilitiesFile = toml::from_str(src).map_err(|e| e.to_string())?;
279    set_user_overrides(Some(parsed));
280    Ok(())
281}
282
283/// Extract the `[capabilities]` section from a full `harn.toml` source
284/// and install it as the current thread's override. The schema inside
285/// that section mirrors `CapabilitiesFile` but with every key prefixed
286/// by `capabilities.`:
287///
288/// ```toml
289/// [[capabilities.provider.my-proxy]]
290/// model_match = "*"
291/// native_tools = true
292/// tool_search = ["hosted"]
293/// ```
294pub fn set_user_overrides_from_manifest_toml(src: &str) -> Result<(), String> {
295    #[derive(Deserialize)]
296    struct Manifest {
297        #[serde(default)]
298        capabilities: Option<CapabilitiesFile>,
299    }
300    let parsed: Manifest = toml::from_str(src).map_err(|e| e.to_string())?;
301    set_user_overrides(parsed.capabilities);
302    Ok(())
303}
304
305/// Look up effective capabilities for a `(provider, model)` pair.
306/// Walks the provider_family chain until it finds a rule list that
307/// matches. Within any one provider's rule list, user overrides are
308/// consulted before the built-in rules. The first matching rule wins —
309/// later rules (and later layers in the family chain) are ignored.
310pub fn lookup(provider: &str, model: &str) -> Capabilities {
311    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
312    lookup_with(provider, model, builtin(), user.as_ref())
313}
314
315/// Return the currently-effective provider capability rule matrix. User
316/// override rows, when installed for the current thread, are emitted before
317/// built-in rows so the display mirrors lookup precedence.
318pub fn matrix_rows() -> Vec<ProviderCapabilityMatrixRow> {
319    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
320    let mut rows = Vec::new();
321    if let Some(user) = user.as_ref() {
322        push_matrix_rows(&mut rows, user, "project");
323    }
324    push_matrix_rows(&mut rows, builtin(), "builtin");
325    rows
326}
327
328fn push_matrix_rows(
329    rows: &mut Vec<ProviderCapabilityMatrixRow>,
330    file: &CapabilitiesFile,
331    source: &str,
332) {
333    for (provider, rules) in &file.provider {
334        for rule in rules {
335            rows.push(rule_to_matrix_row(provider, rule, source));
336        }
337    }
338}
339
340fn rule_to_matrix_row(
341    provider: &str,
342    rule: &ProviderRule,
343    source: &str,
344) -> ProviderCapabilityMatrixRow {
345    ProviderCapabilityMatrixRow {
346        provider: provider.to_string(),
347        model: rule.model_match.clone(),
348        thinking: rule_thinking_modes(rule),
349        vision: rule_vision(rule),
350        audio: rule.audio.unwrap_or(false),
351        pdf: rule.pdf.unwrap_or(false),
352        streaming: true,
353        files_api_supported: rule.files_api_supported.unwrap_or(false),
354        json_schema: rule_structured_output(rule),
355        tools: rule.native_tools.unwrap_or(false),
356        cache: rule.prompt_caching.unwrap_or(false),
357        source: source.to_string(),
358    }
359}
360
361fn rule_thinking_modes(rule: &ProviderRule) -> Vec<String> {
362    rule.thinking_modes.clone().unwrap_or_else(|| {
363        if rule.thinking.unwrap_or(false) {
364            vec!["enabled".to_string()]
365        } else {
366            Vec::new()
367        }
368    })
369}
370
371fn rule_vision(rule: &ProviderRule) -> bool {
372    rule.vision.or(rule.vision_supported).unwrap_or(false)
373}
374
375fn lookup_with(
376    provider: &str,
377    model: &str,
378    builtin: &CapabilitiesFile,
379    user: Option<&CapabilitiesFile>,
380) -> Capabilities {
381    // Special case: mock spoofs either shape. Try anthropic first
382    // (Claude-shape model strings) so `mock` + `claude-opus-4-7`
383    // resolves to the Anthropic capability row — the same behaviour
384    // the hardcoded dispatch gave before this refactor.
385    if provider == "mock" {
386        if let Some(caps) = try_match_layer(user, builtin, "anthropic", model, provider) {
387            return caps;
388        }
389        if let Some(caps) = try_match_layer(user, builtin, "openai", model, provider) {
390            return caps;
391        }
392        return Capabilities::default();
393    }
394
395    // Normal chain: walk provider → family(provider) → ... with a
396    // visited-guard to avoid cycles in malformed user overrides.
397    let mut current = provider.to_string();
398    let mut visited: std::collections::HashSet<String> = std::collections::HashSet::new();
399    while visited.insert(current.clone()) {
400        if let Some(caps) = try_match_layer(user, builtin, &current, model, provider) {
401            return caps;
402        }
403        let next = user
404            .and_then(|f| f.provider_family.get(&current))
405            .or_else(|| builtin.provider_family.get(&current))
406            .cloned();
407        match next {
408            Some(parent) => current = parent,
409            None => break,
410        }
411    }
412    Capabilities::default()
413}
414
415/// Try the ordered rule list for `layer_provider` (user rules first,
416/// then built-in rules). Returns `Some(caps)` on the first match, else
417/// `None`. `original_provider` is threaded through only for diagnostics.
418fn try_match_layer(
419    user: Option<&CapabilitiesFile>,
420    builtin: &CapabilitiesFile,
421    layer_provider: &str,
422    model: &str,
423    _original_provider: &str,
424) -> Option<Capabilities> {
425    if let Some(user) = user {
426        if let Some(rules) = user.provider.get(layer_provider) {
427            for rule in rules {
428                if rule_matches(rule, model) {
429                    return Some(rule_to_caps(rule));
430                }
431            }
432        }
433    }
434    if let Some(rules) = builtin.provider.get(layer_provider) {
435        for rule in rules {
436            if rule_matches(rule, model) {
437                return Some(rule_to_caps(rule));
438            }
439        }
440    }
441    None
442}
443
444fn rule_to_caps(rule: &ProviderRule) -> Capabilities {
445    let thinking_modes = rule_thinking_modes(rule);
446    Capabilities {
447        native_tools: rule.native_tools.unwrap_or(false),
448        defer_loading: rule.defer_loading.unwrap_or(false),
449        tool_search: rule.tool_search.clone().unwrap_or_default(),
450        max_tools: rule.max_tools,
451        prompt_caching: rule.prompt_caching.unwrap_or(false),
452        vision: rule_vision(rule),
453        audio: rule.audio.unwrap_or(false),
454        pdf: rule.pdf.unwrap_or(false),
455        files_api_supported: rule.files_api_supported.unwrap_or(false),
456        structured_output: rule_structured_output(rule),
457        json_schema: rule_structured_output(rule),
458        thinking_modes,
459        interleaved_thinking_supported: rule.interleaved_thinking_supported.unwrap_or(false),
460        anthropic_beta_features: rule.anthropic_beta_features.clone().unwrap_or_default(),
461        vision_supported: rule.vision_supported.unwrap_or(false),
462        preserve_thinking: rule.preserve_thinking.unwrap_or(false),
463        server_parser: rule
464            .server_parser
465            .clone()
466            .unwrap_or_else(|| "none".to_string()),
467        honors_chat_template_kwargs: rule.honors_chat_template_kwargs.unwrap_or(false),
468        requires_completion_tokens: rule.requires_completion_tokens.unwrap_or(false),
469        reasoning_effort_supported: rule.reasoning_effort_supported.unwrap_or(false),
470        reasoning_none_supported: rule.reasoning_none_supported.unwrap_or(false),
471        recommended_endpoint: rule.recommended_endpoint.clone(),
472        text_tool_wire_format_supported: rule.text_tool_wire_format_supported.unwrap_or(true),
473        thinking_disable_directive: rule.thinking_disable_directive.clone(),
474    }
475}
476
477fn rule_structured_output(rule: &ProviderRule) -> Option<String> {
478    rule.structured_output
479        .clone()
480        .or_else(|| rule.json_schema.clone())
481        .filter(|value| value != "none")
482}
483
484fn rule_matches(rule: &ProviderRule, model: &str) -> bool {
485    let lower = model.to_lowercase();
486    if !glob_match(&rule.model_match.to_lowercase(), &lower) {
487        return false;
488    }
489    if let Some(version_min) = &rule.version_min {
490        if version_min.len() != 2 {
491            return false;
492        }
493        let want = (version_min[0], version_min[1]);
494        let have = match extract_version(model) {
495            Some(v) => v,
496            // `version_min` was set but the model ID can't be parsed.
497            // Fail closed: skip this rule so more permissive catch-all
498            // rules below can still match.
499            None => return false,
500        };
501        if have < want {
502            return false;
503        }
504    }
505    true
506}
507
508/// Extract `(major, minor)` from a model ID by trying the Anthropic
509/// parser first (for `claude-*` shapes) then the OpenAI parser (`gpt-*`).
510/// Both parsers return `None` for shapes they don't recognise so this
511/// never mis-parses across families.
512fn extract_version(model: &str) -> Option<(u32, u32)> {
513    claude_generation(model).or_else(|| gpt_generation(model))
514}
515
516/// Simple glob matching with `*` wildcards. Mirrors the helper in
517/// `llm_config.rs` — keep them in sync if either ever grows regex or
518/// character-class support.
519fn glob_match(pattern: &str, input: &str) -> bool {
520    if let Some(prefix) = pattern.strip_suffix('*') {
521        if let Some(rest) = prefix.strip_prefix('*') {
522            // `*foo*` — substring match.
523            return input.contains(rest);
524        }
525        return input.starts_with(prefix);
526    }
527    if let Some(suffix) = pattern.strip_prefix('*') {
528        return input.ends_with(suffix);
529    }
530    if pattern.contains('*') {
531        let parts: Vec<&str> = pattern.split('*').collect();
532        if parts.len() == 2 {
533            return input.starts_with(parts[0]) && input.ends_with(parts[1]);
534        }
535        return input == pattern;
536    }
537    input == pattern
538}
539
540#[cfg(test)]
541mod tests {
542    use super::*;
543
544    fn reset() {
545        clear_user_overrides();
546    }
547
548    #[test]
549    fn anthropic_opus_47_gets_full_capabilities() {
550        reset();
551        let caps = lookup("anthropic", "claude-opus-4-7");
552        assert!(caps.native_tools);
553        assert!(caps.defer_loading);
554        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
555        assert!(caps.prompt_caching);
556        assert_eq!(caps.thinking_modes, vec!["adaptive"]);
557        assert!(caps.vision_supported);
558        assert!(caps.audio);
559        assert!(caps.pdf);
560        assert!(caps.files_api_supported);
561        assert_eq!(caps.max_tools, Some(10000));
562    }
563
564    #[test]
565    fn anthropic_opus_46_uses_budgeted_thinking() {
566        reset();
567        let caps = lookup("anthropic", "claude-opus-4-6");
568        assert_eq!(caps.thinking_modes, vec!["enabled"]);
569        assert!(caps.interleaved_thinking_supported);
570    }
571
572    #[test]
573    fn anthropic_opus_45_does_not_support_interleaved_thinking() {
574        reset();
575        let caps = lookup("anthropic", "claude-opus-4-5");
576        assert_eq!(caps.thinking_modes, vec!["enabled"]);
577        assert!(!caps.interleaved_thinking_supported);
578    }
579
580    #[test]
581    fn override_can_supply_anthropic_beta_features() {
582        reset();
583        let toml_src = r#"
584[[provider.anthropic]]
585model_match = "claude-custom-*"
586native_tools = true
587anthropic_beta_features = ["fine-grained-tool-streaming-2025-05-14"]
588"#;
589        set_user_overrides_toml(toml_src).unwrap();
590        let caps = lookup("anthropic", "claude-custom-1");
591        assert_eq!(
592            caps.anthropic_beta_features,
593            vec!["fine-grained-tool-streaming-2025-05-14"]
594        );
595        reset();
596    }
597
598    #[test]
599    fn anthropic_haiku_44_has_no_tool_search() {
600        reset();
601        let caps = lookup("anthropic", "claude-haiku-4-4");
602        // Haiku 4.4 falls through to the `claude-*` catch-all row.
603        assert!(caps.native_tools);
604        assert!(caps.prompt_caching);
605        assert!(!caps.defer_loading);
606        assert!(caps.tool_search.is_empty());
607    }
608
609    #[test]
610    fn anthropic_haiku_45_supports_tool_search() {
611        reset();
612        let caps = lookup("anthropic", "claude-haiku-4-5");
613        assert!(caps.defer_loading);
614        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
615    }
616
617    #[test]
618    fn old_claude_gets_catchall() {
619        reset();
620        let caps = lookup("anthropic", "claude-opus-3-5");
621        assert!(caps.native_tools);
622        assert!(caps.prompt_caching);
623        assert!(!caps.defer_loading);
624        assert!(caps.tool_search.is_empty());
625    }
626
627    #[test]
628    fn openai_gpt_54_supports_tool_search() {
629        reset();
630        let caps = lookup("openai", "gpt-5.4");
631        assert!(caps.defer_loading);
632        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
633        assert_eq!(caps.json_schema.as_deref(), Some("native"));
634        assert_eq!(caps.thinking_modes, vec!["effort"]);
635        assert!(caps.reasoning_effort_supported);
636        assert!(caps.reasoning_none_supported);
637    }
638
639    #[test]
640    fn openai_gpt_53_has_reasoning_none_without_tool_search() {
641        reset();
642        let caps = lookup("openai", "gpt-5.3");
643        assert!(caps.native_tools);
644        assert!(!caps.defer_loading);
645        assert!(caps.vision_supported);
646        assert!(caps.tool_search.is_empty());
647        assert_eq!(caps.thinking_modes, vec!["effort"]);
648        assert!(caps.reasoning_effort_supported);
649        assert!(caps.reasoning_none_supported);
650    }
651
652    #[test]
653    fn openai_original_gpt_5_has_reasoning_floor_without_none() {
654        reset();
655        let caps = lookup("openai", "gpt-5");
656        assert!(caps.native_tools);
657        assert!(!caps.defer_loading);
658        assert_eq!(caps.thinking_modes, vec!["effort"]);
659        assert!(caps.reasoning_effort_supported);
660        assert!(!caps.reasoning_none_supported);
661    }
662
663    #[test]
664    fn openai_gpt_4o_matrix_fields_include_multimodal_support() {
665        reset();
666        let caps = lookup("openai", "gpt-4o");
667        assert!(caps.native_tools);
668        assert!(caps.vision);
669        assert!(caps.audio);
670        assert!(!caps.pdf);
671        assert_eq!(caps.json_schema.as_deref(), Some("native"));
672    }
673
674    #[test]
675    fn openai_reasoning_models_support_effort() {
676        reset();
677        let caps = lookup("openai", "o3");
678        assert_eq!(caps.thinking_modes, vec!["effort"]);
679        assert!(caps.requires_completion_tokens);
680        assert!(caps.reasoning_effort_supported);
681        let prefixed = lookup("openrouter", "openai/o4-mini");
682        assert!(prefixed.requires_completion_tokens);
683        assert!(prefixed.reasoning_effort_supported);
684    }
685
686    #[test]
687    fn vision_capability_gates_known_multimodal_models() {
688        reset();
689        assert!(lookup("openai", "gpt-4o").vision_supported);
690        assert!(lookup("openai", "gpt-5.4-preview").vision_supported);
691        assert!(lookup("anthropic", "claude-sonnet-4-6").vision_supported);
692        assert!(lookup("anthropic", "claude-sonnet-4-6").pdf);
693        assert!(lookup("anthropic", "claude-sonnet-4-6").files_api_supported);
694        assert!(lookup("openrouter", "google/gemini-2.5-flash").vision_supported);
695        assert!(lookup("gemini", "gemini-2.5-flash").vision_supported);
696        assert!(lookup("gemini", "gemini-2.5-flash").audio);
697        assert!(lookup("gemini", "gemini-2.5-flash").pdf);
698        assert!(lookup("ollama", "llava:latest").vision_supported);
699        assert!(lookup("ollama", "gemma4:26b").vision_supported);
700        assert!(lookup("ollama", "gemma4-128k:latest").vision_supported);
701        assert!(!lookup("openai", "gpt-3.5-turbo").vision_supported);
702        assert!(!lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4").vision_supported);
703    }
704
705    #[test]
706    fn openrouter_inherits_openai() {
707        reset();
708        let caps = lookup("openrouter", "gpt-5.4");
709        assert!(caps.defer_loading);
710        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
711    }
712
713    #[test]
714    fn groq_inherits_openai_family_only() {
715        reset();
716        let caps = lookup("groq", "gpt-5.5-preview");
717        assert!(caps.defer_loading);
718    }
719
720    #[test]
721    fn mock_with_claude_model_routes_to_anthropic() {
722        reset();
723        let caps = lookup("mock", "claude-sonnet-4-7");
724        assert!(caps.defer_loading);
725        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
726    }
727
728    #[test]
729    fn mock_with_gpt_model_routes_to_openai() {
730        reset();
731        let caps = lookup("mock", "gpt-5.4-preview");
732        assert!(caps.defer_loading);
733        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
734    }
735
736    #[test]
737    fn qwen36_ollama_preserves_thinking() {
738        reset();
739        let caps = lookup("ollama", "qwen3.6:35b-a3b-coding-nvfp4");
740        assert!(caps.native_tools);
741        assert_eq!(caps.json_schema.as_deref(), Some("format_kw"));
742        assert!(!caps.thinking_modes.is_empty());
743        assert!(
744            caps.preserve_thinking,
745            "Qwen3.6 should enable preserve_thinking by default for long-horizon loops"
746        );
747        assert_eq!(caps.server_parser, "ollama_qwen3coder");
748        assert!(!caps.honors_chat_template_kwargs);
749        assert_eq!(
750            caps.recommended_endpoint.as_deref(),
751            Some("/api/generate-raw")
752        );
753        assert!(!caps.text_tool_wire_format_supported);
754    }
755
756    #[test]
757    fn qwen35_ollama_does_not_preserve_thinking() {
758        reset();
759        let caps = lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4");
760        assert!(caps.native_tools);
761        assert!(!caps.thinking_modes.is_empty());
762        assert!(
763            !caps.preserve_thinking,
764            "Qwen3.5 lacks the preserve_thinking kwarg — rely on the chat template's rolling checkpoint instead"
765        );
766        assert_eq!(caps.server_parser, "ollama_qwen3coder");
767        assert!(!caps.text_tool_wire_format_supported);
768    }
769
770    #[test]
771    fn qwen36_routed_providers_all_preserve_thinking() {
772        reset();
773        for (provider, model) in [
774            ("openrouter", "qwen/qwen3.6-plus"),
775            ("together", "Qwen/Qwen3.6-Plus"),
776            ("huggingface", "Qwen/Qwen3.6-35B-A3B"),
777            ("fireworks", "accounts/fireworks/models/qwen3p6-plus"),
778            ("dashscope", "qwen3.6-plus"),
779            ("llamacpp", "unsloth/Qwen3.6-35B-A3B-GGUF"),
780            ("local", "Qwen3.6-35B-A3B"),
781            ("mlx", "unsloth/Qwen3.6-27B-UD-MLX-4bit"),
782            ("mlx", "Qwen/Qwen3.6-27B"),
783        ] {
784            let caps = lookup(provider, model);
785            assert!(
786                !caps.thinking_modes.is_empty(),
787                "{provider}/{model}: thinking"
788            );
789            assert!(
790                caps.preserve_thinking,
791                "{provider}/{model}: preserve_thinking must be on for Qwen3.6"
792            );
793            assert!(caps.native_tools, "{provider}/{model}: native_tools");
794            assert_ne!(
795                caps.server_parser, "ollama_qwen3coder",
796                "{provider}/{model}: only Ollama routes through the qwen3coder response parser"
797            );
798        }
799    }
800
801    #[test]
802    fn qwen_coder_models_do_not_claim_thinking_modes() {
803        reset();
804        for (provider, model) in [
805            ("together", "Qwen/Qwen3-Coder-Next-FP8"),
806            ("together", "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"),
807            ("openrouter", "qwen/qwen3-coder-next"),
808            ("huggingface", "Qwen/Qwen3-Coder-Next"),
809        ] {
810            let caps = lookup(provider, model);
811            assert!(caps.native_tools, "{provider}/{model}: native_tools");
812            assert!(
813                caps.thinking_modes.is_empty(),
814                "{provider}/{model}: coder models are non-thinking routes"
815            );
816            assert!(
817                !caps.preserve_thinking,
818                "{provider}/{model}: preserve_thinking must stay off"
819            );
820            assert!(
821                caps.thinking_disable_directive.is_none(),
822                "{provider}/{model}: no /no_think shim should be needed"
823            );
824        }
825    }
826
827    #[test]
828    fn llamacpp_qwen_keeps_text_tool_wire_format() {
829        reset();
830        let caps = lookup("llamacpp", "unsloth/Qwen3.5-Coder-GGUF");
831        assert_eq!(caps.server_parser, "none");
832        assert!(caps.honors_chat_template_kwargs);
833        assert!(caps.text_tool_wire_format_supported);
834        assert_eq!(
835            caps.recommended_endpoint.as_deref(),
836            Some("/v1/chat/completions")
837        );
838    }
839
840    #[test]
841    fn dashscope_and_llamacpp_resolve_capabilities() {
842        reset();
843        // New sibling providers should fall through to `openai` for
844        // gpt-*  models even without dedicated rules.
845        let caps = lookup("dashscope", "gpt-5.4-preview");
846        assert!(caps.defer_loading);
847        let caps = lookup("llamacpp", "gpt-5.4-preview");
848        assert!(caps.defer_loading);
849    }
850
851    #[test]
852    fn unknown_provider_has_no_capabilities() {
853        reset();
854        let caps = lookup("my-custom-proxy", "foo-bar-1");
855        assert!(!caps.native_tools);
856        assert!(!caps.defer_loading);
857        assert!(caps.tool_search.is_empty());
858    }
859
860    #[test]
861    fn user_override_adds_new_provider() {
862        reset();
863        let toml_src = r#"
864[[provider.my-proxy]]
865model_match = "*"
866native_tools = true
867tool_search = ["hosted"]
868"#;
869        set_user_overrides_toml(toml_src).unwrap();
870        let caps = lookup("my-proxy", "anything");
871        assert!(caps.native_tools);
872        assert_eq!(caps.tool_search, vec!["hosted"]);
873        clear_user_overrides();
874    }
875
876    #[test]
877    fn user_override_takes_precedence_over_builtin() {
878        reset();
879        let toml_src = r#"
880[[provider.anthropic]]
881model_match = "claude-opus-*"
882native_tools = true
883defer_loading = false
884tool_search = []
885"#;
886        set_user_overrides_toml(toml_src).unwrap();
887        let caps = lookup("anthropic", "claude-opus-4-7");
888        assert!(caps.native_tools);
889        assert!(!caps.defer_loading);
890        assert!(caps.tool_search.is_empty());
891        clear_user_overrides();
892    }
893
894    #[test]
895    fn user_override_from_manifest_toml() {
896        reset();
897        let manifest = r#"
898[package]
899name = "demo"
900
901[[capabilities.provider.my-proxy]]
902model_match = "*"
903native_tools = true
904tool_search = ["hosted"]
905"#;
906        set_user_overrides_from_manifest_toml(manifest).unwrap();
907        let caps = lookup("my-proxy", "foo");
908        assert!(caps.native_tools);
909        assert_eq!(caps.tool_search, vec!["hosted"]);
910        clear_user_overrides();
911    }
912
913    #[test]
914    fn version_min_requires_parseable_model() {
915        reset();
916        let toml_src = r#"
917[[provider.custom]]
918model_match = "*"
919version_min = [5, 4]
920native_tools = true
921"#;
922        set_user_overrides_toml(toml_src).unwrap();
923        // Unparseable model ID + version_min → rule doesn't match.
924        let caps = lookup("custom", "mystery-model");
925        assert!(!caps.native_tools);
926        clear_user_overrides();
927    }
928
929    #[test]
930    fn glob_match_substring() {
931        assert!(glob_match("*gpt*", "openai/gpt-5.4"));
932        assert!(glob_match("*claude*", "anthropic/claude-opus-4-7"));
933        assert!(!glob_match("*xyz*", "openai/gpt-5.4"));
934    }
935
936    #[test]
937    fn openrouter_namespaced_anthropic_model() {
938        reset();
939        let caps = lookup("anthropic", "anthropic/claude-opus-4-7");
940        assert!(caps.defer_loading);
941    }
942
943    #[test]
944    fn matrix_rows_include_provider_patterns_and_sources() {
945        reset();
946        let rows = matrix_rows();
947        assert!(rows.iter().any(|row| {
948            row.provider == "openai"
949                && row.model == "gpt-4o*"
950                && row.vision
951                && row.audio
952                && row.json_schema.as_deref() == Some("native")
953                && row.source == "builtin"
954        }));
955    }
956}
harn_vm/llm/capabilities.rs

harn_vm/llm/
capabilities.rs