harn_vm/llm/
capabilities.rs

1//! Data-driven provider capabilities.
2//!
3//! The per-(provider, model) capability matrix (native tools, deferred
4//! tool loading, tool-search variants, prompt caching, extended thinking,
5//! max tool count) lives in the shipped `capabilities.toml` and is
6//! overridable per-project via `[[capabilities.provider.<name>]]` blocks
7//! in `harn.toml`. This module owns:
8//!
9//! - loading the built-in TOML (compiled in via `include_str!`);
10//! - merging user overrides on top;
11//! - matching a `(provider, model)` pair against the rule list with
12//!   glob + semver semantics;
13//! - exposing a stable `Capabilities` struct that the `LlmProvider`
14//!   trait delegates to as the single source of truth.
15//!
16//! Before this module the Anthropic / OpenAI gates were spread across
17//! `providers/anthropic.rs` (`claude_generation`, `claude_model_supports_tool_search`)
18//! and `providers/openai_compat.rs` (`gpt_generation`, `gpt_model_supports_tool_search`).
19//! Those parsers are still used here — they supply the version extractor —
20//! but the boolean gates that used to live alongside them are now data.
21
22use std::cell::RefCell;
23use std::collections::BTreeMap;
24use std::sync::OnceLock;
25
26use serde::{Deserialize, Serialize};
27
28use super::providers::anthropic::claude_generation;
29use super::providers::openai_compat::gpt_generation;
30
31/// Shipped default rules. Compiled into the binary at build time.
32const BUILTIN_TOML: &str = include_str!("capabilities.toml");
33
34/// Parsed on-disk capabilities schema. Public so harn-cli can
35/// construct one directly when wiring harn.toml overrides.
36#[derive(Debug, Clone, Deserialize, Default)]
37pub struct CapabilitiesFile {
38    /// Per-provider ordered rule lists. First matching rule wins.
39    #[serde(default)]
40    pub provider: BTreeMap<String, Vec<ProviderRule>>,
41    /// Sibling → canonical family mapping. Providers with no rule of
42    /// their own fall through to the named family (recursively).
43    #[serde(default)]
44    pub provider_family: BTreeMap<String, String>,
45}
46
47/// One row of the capability matrix.
48#[derive(Debug, Clone, Deserialize)]
49pub struct ProviderRule {
50    /// Glob pattern (supports leading / trailing `*` and a single mid-`*`).
51    /// Matched case-insensitively against the model ID.
52    pub model_match: String,
53    /// Optional `[major, minor]` lower bound. When set, the model ID
54    /// must parse via the provider's version extractor AND compare ≥
55    /// this tuple. Rules with an unparseable `version_min` for the
56    /// given model are skipped, not merged.
57    #[serde(default)]
58    pub version_min: Option<Vec<u32>>,
59    #[serde(default)]
60    pub native_tools: Option<bool>,
61    #[serde(default)]
62    pub defer_loading: Option<bool>,
63    #[serde(default)]
64    pub tool_search: Option<Vec<String>>,
65    #[serde(default)]
66    pub max_tools: Option<u32>,
67    #[serde(default)]
68    pub prompt_caching: Option<bool>,
69    /// Whether this provider/model route accepts image or other visual
70    /// input blocks through Harn's LLM message path.
71    #[serde(default)]
72    pub vision: Option<bool>,
73    /// Whether this provider/model route accepts audio input blocks
74    /// through Harn's LLM message path.
75    #[serde(default, alias = "audio_supported")]
76    pub audio: Option<bool>,
77    /// Whether this provider/model route accepts PDF/document input blocks
78    /// through Harn's LLM message path.
79    #[serde(default, alias = "pdf_supported")]
80    pub pdf: Option<bool>,
81    /// Whether uploaded file references can be reused in message content.
82    #[serde(default)]
83    pub files_api_supported: Option<bool>,
84    /// Structured-output transport strategy. Known values are:
85    /// `native`, `tool_use`, `format_kw`, and `none`.
86    #[serde(default)]
87    pub structured_output: Option<String>,
88    /// Legacy name retained for project overrides written before
89    /// `structured_output` became the canonical capability.
90    #[serde(default)]
91    pub json_schema: Option<String>,
92    /// Supported thinking/reasoning modes for this rule. Values are
93    /// script-facing mode names: `enabled`, `adaptive`, and `effort`.
94    #[serde(default)]
95    pub thinking_modes: Option<Vec<String>>,
96    /// Whether Anthropic interleaved thinking is supported for this
97    /// provider/model route.
98    #[serde(default)]
99    pub interleaved_thinking_supported: Option<bool>,
100    /// Anthropic beta features that should be requested for this route.
101    #[serde(default)]
102    pub anthropic_beta_features: Option<Vec<String>>,
103    /// Legacy override compatibility. New built-in rules should use
104    /// `thinking_modes` so the capability matrix preserves mode detail.
105    #[serde(default)]
106    pub thinking: Option<bool>,
107    /// Whether the model accepts image inputs in chat content.
108    #[serde(default)]
109    pub vision_supported: Option<bool>,
110    /// Carry `<think>...</think>` blocks in assistant history across turns.
111    /// Qwen3.6 exposes this as `chat_template_kwargs.preserve_thinking`;
112    /// Alibaba recommends enabling it for long-horizon agent loops so the
113    /// model doesn't re-derive context it already worked out in prior turns.
114    /// Anthropic's adaptive-thinking signature contract is stricter but plays
115    /// the same role there.
116    #[serde(default)]
117    pub preserve_thinking: Option<bool>,
118    /// Name of any server-side response parser that can transform model
119    /// bytes before Harn sees them. `none` means the provider returns the
120    /// model text/tool channel without an implicit parser.
121    #[serde(default)]
122    pub server_parser: Option<String>,
123    /// Whether provider-specific `chat_template_kwargs` are honored.
124    /// Some OpenAI-compatible servers silently drop unknown kwargs.
125    #[serde(default)]
126    pub honors_chat_template_kwargs: Option<bool>,
127    /// Whether this route requires OpenAI's `max_completion_tokens`
128    /// request field instead of legacy `max_tokens`.
129    #[serde(default)]
130    pub requires_completion_tokens: Option<bool>,
131    /// Whether this route accepts OpenAI's `reasoning_effort` request field.
132    #[serde(default)]
133    pub reasoning_effort_supported: Option<bool>,
134    /// Preferred endpoint family for this provider/model route. Values
135    /// are descriptive labels consumed by providers, e.g.
136    /// `/api/generate-raw` for Ollama raw prompt bypass.
137    #[serde(default)]
138    pub recommended_endpoint: Option<String>,
139    /// Whether Harn's text-tool protocol (`<tool_call>name({...})`) can
140    /// survive the provider route and return in the visible response body.
141    #[serde(default)]
142    pub text_tool_wire_format_supported: Option<bool>,
143    /// In-prompt directive that disables this model's "thinking" mode when
144    /// the API doesn't expose a first-class field (or exposes it
145    /// inconsistently across templates / quantizations). For Qwen3 family
146    /// chat templates this is `/no_think`. When `thinking: false` is
147    /// requested and this is set, Harn auto-prepends the directive to the
148    /// system message so script authors don't need to know it exists.
149    #[serde(default)]
150    pub thinking_disable_directive: Option<String>,
151}
152
153/// Resolved capabilities for a `(provider, model)` pair. Unset rule
154/// fields resolve to `false` / empty / `None` so callers never have to
155/// unwrap an `Option<bool>` for what are really boolean gates.
156#[derive(Debug, Clone, PartialEq, Eq)]
157pub struct Capabilities {
158    pub native_tools: bool,
159    pub defer_loading: bool,
160    pub tool_search: Vec<String>,
161    pub max_tools: Option<u32>,
162    pub prompt_caching: bool,
163    pub vision: bool,
164    pub audio: bool,
165    pub pdf: bool,
166    pub files_api_supported: bool,
167    pub structured_output: Option<String>,
168    /// Legacy mirror for CLI display and older callers.
169    pub json_schema: Option<String>,
170    pub thinking_modes: Vec<String>,
171    pub interleaved_thinking_supported: bool,
172    pub anthropic_beta_features: Vec<String>,
173    pub vision_supported: bool,
174    pub preserve_thinking: bool,
175    pub server_parser: String,
176    pub honors_chat_template_kwargs: bool,
177    pub requires_completion_tokens: bool,
178    pub reasoning_effort_supported: bool,
179    pub recommended_endpoint: Option<String>,
180    pub text_tool_wire_format_supported: bool,
181    pub thinking_disable_directive: Option<String>,
182}
183
184impl Default for Capabilities {
185    fn default() -> Self {
186        Self {
187            native_tools: false,
188            defer_loading: false,
189            tool_search: Vec::new(),
190            max_tools: None,
191            prompt_caching: false,
192            vision: false,
193            audio: false,
194            pdf: false,
195            files_api_supported: false,
196            structured_output: None,
197            json_schema: None,
198            thinking_modes: Vec::new(),
199            interleaved_thinking_supported: false,
200            anthropic_beta_features: Vec::new(),
201            vision_supported: false,
202            preserve_thinking: false,
203            server_parser: "none".to_string(),
204            honors_chat_template_kwargs: false,
205            requires_completion_tokens: false,
206            reasoning_effort_supported: false,
207            recommended_endpoint: None,
208            text_tool_wire_format_supported: true,
209            thinking_disable_directive: None,
210        }
211    }
212}
213
214/// Display-oriented row for `harn check --provider-matrix` and the generated
215/// docs page. Rows are intentionally rule-shaped: `model` is the rule's
216/// `model_match` pattern, because the shipped capability source of truth is a
217/// first-match rule table rather than an exhaustive remote model inventory.
218#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
219pub struct ProviderCapabilityMatrixRow {
220    pub provider: String,
221    pub model: String,
222    pub thinking: Vec<String>,
223    pub vision: bool,
224    pub audio: bool,
225    pub pdf: bool,
226    pub streaming: bool,
227    pub files_api_supported: bool,
228    pub json_schema: Option<String>,
229    pub tools: bool,
230    pub cache: bool,
231    pub source: String,
232}
233
234thread_local! {
235    /// Per-thread user overrides installed by the CLI at startup. Kept
236    /// thread-local (not process-static) to match the rest of the VM
237    /// state model — the VM is !Send and each VM thread owns its own
238    /// configuration.
239    static USER_OVERRIDES: RefCell<Option<CapabilitiesFile>> = const { RefCell::new(None) };
240}
241
242/// Lazily-parsed built-in rules. The `include_str!` content is a static
243/// constant; parsing it once per process is safe and free of ordering
244/// hazards.
245static BUILTIN: OnceLock<CapabilitiesFile> = OnceLock::new();
246
247fn builtin() -> &'static CapabilitiesFile {
248    BUILTIN.get_or_init(|| {
249        toml::from_str::<CapabilitiesFile>(BUILTIN_TOML)
250            .expect("capabilities.toml must parse at build time")
251    })
252}
253
254/// Install project-level overrides for the current thread. Usually
255/// called once at CLI bootstrap after reading `harn.toml`. Passing
256/// `None` clears any prior override.
257pub fn set_user_overrides(file: Option<CapabilitiesFile>) {
258    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = file);
259}
260
261/// Clear any thread-local user overrides. Used between test runs.
262pub fn clear_user_overrides() {
263    set_user_overrides(None);
264}
265
266/// Parse a TOML string containing the capabilities section's own shape
267/// (i.e. top-level `[[provider.X]]` + optional `[provider_family]`, the
268/// same layout used by the built-in `capabilities.toml`) and install as
269/// the current thread's override.
270pub fn set_user_overrides_toml(src: &str) -> Result<(), String> {
271    let parsed: CapabilitiesFile = toml::from_str(src).map_err(|e| e.to_string())?;
272    set_user_overrides(Some(parsed));
273    Ok(())
274}
275
276/// Extract the `[capabilities]` section from a full `harn.toml` source
277/// and install it as the current thread's override. The schema inside
278/// that section mirrors `CapabilitiesFile` but with every key prefixed
279/// by `capabilities.`:
280///
281/// ```toml
282/// [[capabilities.provider.my-proxy]]
283/// model_match = "*"
284/// native_tools = true
285/// tool_search = ["hosted"]
286/// ```
287pub fn set_user_overrides_from_manifest_toml(src: &str) -> Result<(), String> {
288    #[derive(Deserialize)]
289    struct Manifest {
290        #[serde(default)]
291        capabilities: Option<CapabilitiesFile>,
292    }
293    let parsed: Manifest = toml::from_str(src).map_err(|e| e.to_string())?;
294    set_user_overrides(parsed.capabilities);
295    Ok(())
296}
297
298/// Look up effective capabilities for a `(provider, model)` pair.
299/// Walks the provider_family chain until it finds a rule list that
300/// matches. Within any one provider's rule list, user overrides are
301/// consulted before the built-in rules. The first matching rule wins —
302/// later rules (and later layers in the family chain) are ignored.
303pub fn lookup(provider: &str, model: &str) -> Capabilities {
304    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
305    lookup_with(provider, model, builtin(), user.as_ref())
306}
307
308/// Return the currently-effective provider capability rule matrix. User
309/// override rows, when installed for the current thread, are emitted before
310/// built-in rows so the display mirrors lookup precedence.
311pub fn matrix_rows() -> Vec<ProviderCapabilityMatrixRow> {
312    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
313    let mut rows = Vec::new();
314    if let Some(user) = user.as_ref() {
315        push_matrix_rows(&mut rows, user, "project");
316    }
317    push_matrix_rows(&mut rows, builtin(), "builtin");
318    rows
319}
320
321fn push_matrix_rows(
322    rows: &mut Vec<ProviderCapabilityMatrixRow>,
323    file: &CapabilitiesFile,
324    source: &str,
325) {
326    for (provider, rules) in &file.provider {
327        for rule in rules {
328            rows.push(rule_to_matrix_row(provider, rule, source));
329        }
330    }
331}
332
333fn rule_to_matrix_row(
334    provider: &str,
335    rule: &ProviderRule,
336    source: &str,
337) -> ProviderCapabilityMatrixRow {
338    ProviderCapabilityMatrixRow {
339        provider: provider.to_string(),
340        model: rule.model_match.clone(),
341        thinking: rule_thinking_modes(rule),
342        vision: rule_vision(rule),
343        audio: rule.audio.unwrap_or(false),
344        pdf: rule.pdf.unwrap_or(false),
345        streaming: true,
346        files_api_supported: rule.files_api_supported.unwrap_or(false),
347        json_schema: rule_structured_output(rule),
348        tools: rule.native_tools.unwrap_or(false),
349        cache: rule.prompt_caching.unwrap_or(false),
350        source: source.to_string(),
351    }
352}
353
354fn rule_thinking_modes(rule: &ProviderRule) -> Vec<String> {
355    rule.thinking_modes.clone().unwrap_or_else(|| {
356        if rule.thinking.unwrap_or(false) {
357            vec!["enabled".to_string()]
358        } else {
359            Vec::new()
360        }
361    })
362}
363
364fn rule_vision(rule: &ProviderRule) -> bool {
365    rule.vision.or(rule.vision_supported).unwrap_or(false)
366}
367
368fn lookup_with(
369    provider: &str,
370    model: &str,
371    builtin: &CapabilitiesFile,
372    user: Option<&CapabilitiesFile>,
373) -> Capabilities {
374    // Special case: mock spoofs either shape. Try anthropic first
375    // (Claude-shape model strings) so `mock` + `claude-opus-4-7`
376    // resolves to the Anthropic capability row — the same behaviour
377    // the hardcoded dispatch gave before this refactor.
378    if provider == "mock" {
379        if let Some(caps) = try_match_layer(user, builtin, "anthropic", model, provider) {
380            return caps;
381        }
382        if let Some(caps) = try_match_layer(user, builtin, "openai", model, provider) {
383            return caps;
384        }
385        return Capabilities::default();
386    }
387
388    // Normal chain: walk provider → family(provider) → ... with a
389    // visited-guard to avoid cycles in malformed user overrides.
390    let mut current = provider.to_string();
391    let mut visited: std::collections::HashSet<String> = std::collections::HashSet::new();
392    while visited.insert(current.clone()) {
393        if let Some(caps) = try_match_layer(user, builtin, &current, model, provider) {
394            return caps;
395        }
396        let next = user
397            .and_then(|f| f.provider_family.get(&current))
398            .or_else(|| builtin.provider_family.get(&current))
399            .cloned();
400        match next {
401            Some(parent) => current = parent,
402            None => break,
403        }
404    }
405    Capabilities::default()
406}
407
408/// Try the ordered rule list for `layer_provider` (user rules first,
409/// then built-in rules). Returns `Some(caps)` on the first match, else
410/// `None`. `original_provider` is threaded through only for diagnostics.
411fn try_match_layer(
412    user: Option<&CapabilitiesFile>,
413    builtin: &CapabilitiesFile,
414    layer_provider: &str,
415    model: &str,
416    _original_provider: &str,
417) -> Option<Capabilities> {
418    if let Some(user) = user {
419        if let Some(rules) = user.provider.get(layer_provider) {
420            for rule in rules {
421                if rule_matches(rule, model) {
422                    return Some(rule_to_caps(rule));
423                }
424            }
425        }
426    }
427    if let Some(rules) = builtin.provider.get(layer_provider) {
428        for rule in rules {
429            if rule_matches(rule, model) {
430                return Some(rule_to_caps(rule));
431            }
432        }
433    }
434    None
435}
436
437fn rule_to_caps(rule: &ProviderRule) -> Capabilities {
438    let thinking_modes = rule_thinking_modes(rule);
439    Capabilities {
440        native_tools: rule.native_tools.unwrap_or(false),
441        defer_loading: rule.defer_loading.unwrap_or(false),
442        tool_search: rule.tool_search.clone().unwrap_or_default(),
443        max_tools: rule.max_tools,
444        prompt_caching: rule.prompt_caching.unwrap_or(false),
445        vision: rule_vision(rule),
446        audio: rule.audio.unwrap_or(false),
447        pdf: rule.pdf.unwrap_or(false),
448        files_api_supported: rule.files_api_supported.unwrap_or(false),
449        structured_output: rule_structured_output(rule),
450        json_schema: rule_structured_output(rule),
451        thinking_modes,
452        interleaved_thinking_supported: rule.interleaved_thinking_supported.unwrap_or(false),
453        anthropic_beta_features: rule.anthropic_beta_features.clone().unwrap_or_default(),
454        vision_supported: rule.vision_supported.unwrap_or(false),
455        preserve_thinking: rule.preserve_thinking.unwrap_or(false),
456        server_parser: rule
457            .server_parser
458            .clone()
459            .unwrap_or_else(|| "none".to_string()),
460        honors_chat_template_kwargs: rule.honors_chat_template_kwargs.unwrap_or(false),
461        requires_completion_tokens: rule.requires_completion_tokens.unwrap_or(false),
462        reasoning_effort_supported: rule.reasoning_effort_supported.unwrap_or(false),
463        recommended_endpoint: rule.recommended_endpoint.clone(),
464        text_tool_wire_format_supported: rule.text_tool_wire_format_supported.unwrap_or(true),
465        thinking_disable_directive: rule.thinking_disable_directive.clone(),
466    }
467}
468
469fn rule_structured_output(rule: &ProviderRule) -> Option<String> {
470    rule.structured_output
471        .clone()
472        .or_else(|| rule.json_schema.clone())
473        .filter(|value| value != "none")
474}
475
476fn rule_matches(rule: &ProviderRule, model: &str) -> bool {
477    let lower = model.to_lowercase();
478    if !glob_match(&rule.model_match.to_lowercase(), &lower) {
479        return false;
480    }
481    if let Some(version_min) = &rule.version_min {
482        if version_min.len() != 2 {
483            return false;
484        }
485        let want = (version_min[0], version_min[1]);
486        let have = match extract_version(model) {
487            Some(v) => v,
488            // `version_min` was set but the model ID can't be parsed.
489            // Fail closed: skip this rule so more permissive catch-all
490            // rules below can still match.
491            None => return false,
492        };
493        if have < want {
494            return false;
495        }
496    }
497    true
498}
499
500/// Extract `(major, minor)` from a model ID by trying the Anthropic
501/// parser first (for `claude-*` shapes) then the OpenAI parser (`gpt-*`).
502/// Both parsers return `None` for shapes they don't recognise so this
503/// never mis-parses across families.
504fn extract_version(model: &str) -> Option<(u32, u32)> {
505    claude_generation(model).or_else(|| gpt_generation(model))
506}
507
508/// Simple glob matching with `*` wildcards. Mirrors the helper in
509/// `llm_config.rs` — keep them in sync if either ever grows regex or
510/// character-class support.
511fn glob_match(pattern: &str, input: &str) -> bool {
512    if let Some(prefix) = pattern.strip_suffix('*') {
513        if let Some(rest) = prefix.strip_prefix('*') {
514            // `*foo*` — substring match.
515            return input.contains(rest);
516        }
517        return input.starts_with(prefix);
518    }
519    if let Some(suffix) = pattern.strip_prefix('*') {
520        return input.ends_with(suffix);
521    }
522    if pattern.contains('*') {
523        let parts: Vec<&str> = pattern.split('*').collect();
524        if parts.len() == 2 {
525            return input.starts_with(parts[0]) && input.ends_with(parts[1]);
526        }
527        return input == pattern;
528    }
529    input == pattern
530}
531
532#[cfg(test)]
533mod tests {
534    use super::*;
535
536    fn reset() {
537        clear_user_overrides();
538    }
539
540    #[test]
541    fn anthropic_opus_47_gets_full_capabilities() {
542        reset();
543        let caps = lookup("anthropic", "claude-opus-4-7");
544        assert!(caps.native_tools);
545        assert!(caps.defer_loading);
546        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
547        assert!(caps.prompt_caching);
548        assert_eq!(caps.thinking_modes, vec!["adaptive"]);
549        assert!(caps.vision_supported);
550        assert!(caps.audio);
551        assert!(caps.pdf);
552        assert!(caps.files_api_supported);
553        assert_eq!(caps.max_tools, Some(10000));
554    }
555
556    #[test]
557    fn anthropic_opus_46_uses_budgeted_thinking() {
558        reset();
559        let caps = lookup("anthropic", "claude-opus-4-6");
560        assert_eq!(caps.thinking_modes, vec!["enabled"]);
561        assert!(caps.interleaved_thinking_supported);
562    }
563
564    #[test]
565    fn anthropic_opus_45_does_not_support_interleaved_thinking() {
566        reset();
567        let caps = lookup("anthropic", "claude-opus-4-5");
568        assert_eq!(caps.thinking_modes, vec!["enabled"]);
569        assert!(!caps.interleaved_thinking_supported);
570    }
571
572    #[test]
573    fn override_can_supply_anthropic_beta_features() {
574        reset();
575        let toml_src = r#"
576[[provider.anthropic]]
577model_match = "claude-custom-*"
578native_tools = true
579anthropic_beta_features = ["fine-grained-tool-streaming-2025-05-14"]
580"#;
581        set_user_overrides_toml(toml_src).unwrap();
582        let caps = lookup("anthropic", "claude-custom-1");
583        assert_eq!(
584            caps.anthropic_beta_features,
585            vec!["fine-grained-tool-streaming-2025-05-14"]
586        );
587        reset();
588    }
589
590    #[test]
591    fn anthropic_haiku_44_has_no_tool_search() {
592        reset();
593        let caps = lookup("anthropic", "claude-haiku-4-4");
594        // Haiku 4.4 falls through to the `claude-*` catch-all row.
595        assert!(caps.native_tools);
596        assert!(caps.prompt_caching);
597        assert!(!caps.defer_loading);
598        assert!(caps.tool_search.is_empty());
599    }
600
601    #[test]
602    fn anthropic_haiku_45_supports_tool_search() {
603        reset();
604        let caps = lookup("anthropic", "claude-haiku-4-5");
605        assert!(caps.defer_loading);
606        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
607    }
608
609    #[test]
610    fn old_claude_gets_catchall() {
611        reset();
612        let caps = lookup("anthropic", "claude-opus-3-5");
613        assert!(caps.native_tools);
614        assert!(caps.prompt_caching);
615        assert!(!caps.defer_loading);
616        assert!(caps.tool_search.is_empty());
617    }
618
619    #[test]
620    fn openai_gpt_54_supports_tool_search() {
621        reset();
622        let caps = lookup("openai", "gpt-5.4");
623        assert!(caps.defer_loading);
624        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
625        assert_eq!(caps.json_schema.as_deref(), Some("native"));
626    }
627
628    #[test]
629    fn openai_gpt_53_has_native_tools_only() {
630        reset();
631        let caps = lookup("openai", "gpt-5.3");
632        assert!(caps.native_tools);
633        assert!(!caps.defer_loading);
634        assert!(!caps.vision_supported);
635        assert!(caps.tool_search.is_empty());
636    }
637
638    #[test]
639    fn openai_gpt_4o_matrix_fields_include_multimodal_support() {
640        reset();
641        let caps = lookup("openai", "gpt-4o");
642        assert!(caps.native_tools);
643        assert!(caps.vision);
644        assert!(caps.audio);
645        assert!(!caps.pdf);
646        assert_eq!(caps.json_schema.as_deref(), Some("native"));
647    }
648
649    #[test]
650    fn openai_reasoning_models_support_effort() {
651        reset();
652        let caps = lookup("openai", "o3");
653        assert_eq!(caps.thinking_modes, vec!["effort"]);
654        assert!(caps.requires_completion_tokens);
655        assert!(caps.reasoning_effort_supported);
656        let prefixed = lookup("openrouter", "openai/o4-mini");
657        assert!(prefixed.requires_completion_tokens);
658        assert!(prefixed.reasoning_effort_supported);
659    }
660
661    #[test]
662    fn vision_capability_gates_known_multimodal_models() {
663        reset();
664        assert!(lookup("openai", "gpt-4o").vision_supported);
665        assert!(lookup("openai", "gpt-5.4-preview").vision_supported);
666        assert!(lookup("anthropic", "claude-sonnet-4-6").vision_supported);
667        assert!(lookup("anthropic", "claude-sonnet-4-6").pdf);
668        assert!(lookup("anthropic", "claude-sonnet-4-6").files_api_supported);
669        assert!(lookup("openrouter", "google/gemini-2.5-flash").vision_supported);
670        assert!(lookup("gemini", "gemini-2.5-flash").vision_supported);
671        assert!(lookup("gemini", "gemini-2.5-flash").audio);
672        assert!(lookup("gemini", "gemini-2.5-flash").pdf);
673        assert!(lookup("ollama", "llava:latest").vision_supported);
674        assert!(!lookup("openai", "gpt-3.5-turbo").vision_supported);
675        assert!(!lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4").vision_supported);
676    }
677
678    #[test]
679    fn openrouter_inherits_openai() {
680        reset();
681        let caps = lookup("openrouter", "gpt-5.4");
682        assert!(caps.defer_loading);
683        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
684    }
685
686    #[test]
687    fn groq_inherits_openai_family_only() {
688        reset();
689        let caps = lookup("groq", "gpt-5.5-preview");
690        assert!(caps.defer_loading);
691    }
692
693    #[test]
694    fn mock_with_claude_model_routes_to_anthropic() {
695        reset();
696        let caps = lookup("mock", "claude-sonnet-4-7");
697        assert!(caps.defer_loading);
698        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
699    }
700
701    #[test]
702    fn mock_with_gpt_model_routes_to_openai() {
703        reset();
704        let caps = lookup("mock", "gpt-5.4-preview");
705        assert!(caps.defer_loading);
706        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
707    }
708
709    #[test]
710    fn qwen36_ollama_preserves_thinking() {
711        reset();
712        let caps = lookup("ollama", "qwen3.6:35b-a3b-coding-nvfp4");
713        assert!(caps.native_tools);
714        assert_eq!(caps.json_schema.as_deref(), Some("format_kw"));
715        assert!(!caps.thinking_modes.is_empty());
716        assert!(
717            caps.preserve_thinking,
718            "Qwen3.6 should enable preserve_thinking by default for long-horizon loops"
719        );
720        assert_eq!(caps.server_parser, "ollama_qwen3coder");
721        assert!(!caps.honors_chat_template_kwargs);
722        assert_eq!(
723            caps.recommended_endpoint.as_deref(),
724            Some("/api/generate-raw")
725        );
726        assert!(!caps.text_tool_wire_format_supported);
727    }
728
729    #[test]
730    fn qwen35_ollama_does_not_preserve_thinking() {
731        reset();
732        let caps = lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4");
733        assert!(caps.native_tools);
734        assert!(!caps.thinking_modes.is_empty());
735        assert!(
736            !caps.preserve_thinking,
737            "Qwen3.5 lacks the preserve_thinking kwarg — rely on the chat template's rolling checkpoint instead"
738        );
739        assert_eq!(caps.server_parser, "ollama_qwen3coder");
740        assert!(!caps.text_tool_wire_format_supported);
741    }
742
743    #[test]
744    fn qwen36_routed_providers_all_preserve_thinking() {
745        reset();
746        for (provider, model) in [
747            ("openrouter", "qwen/qwen3.6-plus"),
748            ("together", "Qwen/Qwen3.6-35B-A3B"),
749            ("huggingface", "Qwen/Qwen3.6-35B-A3B"),
750            ("fireworks", "accounts/fireworks/models/qwen3p6-plus"),
751            ("dashscope", "qwen3.6-plus"),
752            ("llamacpp", "unsloth/Qwen3.6-35B-A3B-GGUF"),
753            ("local", "Qwen3.6-35B-A3B"),
754            ("mlx", "unsloth/Qwen3.6-27B-UD-MLX-4bit"),
755            ("mlx", "Qwen/Qwen3.6-27B"),
756        ] {
757            let caps = lookup(provider, model);
758            assert!(
759                !caps.thinking_modes.is_empty(),
760                "{provider}/{model}: thinking"
761            );
762            assert!(
763                caps.preserve_thinking,
764                "{provider}/{model}: preserve_thinking must be on for Qwen3.6"
765            );
766            assert!(caps.native_tools, "{provider}/{model}: native_tools");
767            assert_ne!(
768                caps.server_parser, "ollama_qwen3coder",
769                "{provider}/{model}: only Ollama routes through the qwen3coder response parser"
770            );
771        }
772    }
773
774    #[test]
775    fn llamacpp_qwen_keeps_text_tool_wire_format() {
776        reset();
777        let caps = lookup("llamacpp", "unsloth/Qwen3.5-Coder-GGUF");
778        assert_eq!(caps.server_parser, "none");
779        assert!(caps.honors_chat_template_kwargs);
780        assert!(caps.text_tool_wire_format_supported);
781        assert_eq!(
782            caps.recommended_endpoint.as_deref(),
783            Some("/v1/chat/completions")
784        );
785    }
786
787    #[test]
788    fn dashscope_and_llamacpp_resolve_capabilities() {
789        reset();
790        // New sibling providers should fall through to `openai` for
791        // gpt-*  models even without dedicated rules.
792        let caps = lookup("dashscope", "gpt-5.4-preview");
793        assert!(caps.defer_loading);
794        let caps = lookup("llamacpp", "gpt-5.4-preview");
795        assert!(caps.defer_loading);
796    }
797
798    #[test]
799    fn unknown_provider_has_no_capabilities() {
800        reset();
801        let caps = lookup("my-custom-proxy", "foo-bar-1");
802        assert!(!caps.native_tools);
803        assert!(!caps.defer_loading);
804        assert!(caps.tool_search.is_empty());
805    }
806
807    #[test]
808    fn user_override_adds_new_provider() {
809        reset();
810        let toml_src = r#"
811[[provider.my-proxy]]
812model_match = "*"
813native_tools = true
814tool_search = ["hosted"]
815"#;
816        set_user_overrides_toml(toml_src).unwrap();
817        let caps = lookup("my-proxy", "anything");
818        assert!(caps.native_tools);
819        assert_eq!(caps.tool_search, vec!["hosted"]);
820        clear_user_overrides();
821    }
822
823    #[test]
824    fn user_override_takes_precedence_over_builtin() {
825        reset();
826        let toml_src = r#"
827[[provider.anthropic]]
828model_match = "claude-opus-*"
829native_tools = true
830defer_loading = false
831tool_search = []
832"#;
833        set_user_overrides_toml(toml_src).unwrap();
834        let caps = lookup("anthropic", "claude-opus-4-7");
835        assert!(caps.native_tools);
836        assert!(!caps.defer_loading);
837        assert!(caps.tool_search.is_empty());
838        clear_user_overrides();
839    }
840
841    #[test]
842    fn user_override_from_manifest_toml() {
843        reset();
844        let manifest = r#"
845[package]
846name = "demo"
847
848[[capabilities.provider.my-proxy]]
849model_match = "*"
850native_tools = true
851tool_search = ["hosted"]
852"#;
853        set_user_overrides_from_manifest_toml(manifest).unwrap();
854        let caps = lookup("my-proxy", "foo");
855        assert!(caps.native_tools);
856        assert_eq!(caps.tool_search, vec!["hosted"]);
857        clear_user_overrides();
858    }
859
860    #[test]
861    fn version_min_requires_parseable_model() {
862        reset();
863        let toml_src = r#"
864[[provider.custom]]
865model_match = "*"
866version_min = [5, 4]
867native_tools = true
868"#;
869        set_user_overrides_toml(toml_src).unwrap();
870        // Unparseable model ID + version_min → rule doesn't match.
871        let caps = lookup("custom", "mystery-model");
872        assert!(!caps.native_tools);
873        clear_user_overrides();
874    }
875
876    #[test]
877    fn glob_match_substring() {
878        assert!(glob_match("*gpt*", "openai/gpt-5.4"));
879        assert!(glob_match("*claude*", "anthropic/claude-opus-4-7"));
880        assert!(!glob_match("*xyz*", "openai/gpt-5.4"));
881    }
882
883    #[test]
884    fn openrouter_namespaced_anthropic_model() {
885        reset();
886        let caps = lookup("anthropic", "anthropic/claude-opus-4-7");
887        assert!(caps.defer_loading);
888    }
889
890    #[test]
891    fn matrix_rows_include_provider_patterns_and_sources() {
892        reset();
893        let rows = matrix_rows();
894        assert!(rows.iter().any(|row| {
895            row.provider == "openai"
896                && row.model == "gpt-4o*"
897                && row.vision
898                && row.audio
899                && row.json_schema.as_deref() == Some("native")
900                && row.source == "builtin"
901        }));
902    }
903}
harn_vm/llm/capabilities.rs

harn_vm/llm/
capabilities.rs