harn_vm/llm/
capabilities.rs

1//! Data-driven provider capabilities.
2//!
3//! The per-(provider, model) capability matrix (native tools, deferred
4//! tool loading, tool-search variants, prompt caching, extended thinking,
5//! max tool count) lives in the shipped `capabilities.toml` and is
6//! overridable per-project via `[[capabilities.provider.<name>]]` blocks
7//! in `harn.toml`. This module owns:
8//!
9//! - loading the built-in TOML (compiled in via `include_str!`);
10//! - merging user overrides on top;
11//! - matching a `(provider, model)` pair against the rule list with
12//!   glob + semver semantics;
13//! - exposing a stable `Capabilities` struct that the `LlmProvider`
14//!   trait delegates to as the single source of truth.
15//!
16//! Before this module the Anthropic / OpenAI gates were spread across
17//! `providers/anthropic.rs` (`claude_generation`, `claude_model_supports_tool_search`)
18//! and `providers/openai_compat.rs` (`gpt_generation`, `gpt_model_supports_tool_search`).
19//! Those parsers are still used here — they supply the version extractor —
20//! but the boolean gates that used to live alongside them are now data.
21
22use std::cell::RefCell;
23use std::collections::BTreeMap;
24use std::sync::OnceLock;
25
26use serde::{Deserialize, Serialize};
27
28use super::providers::anthropic::claude_generation;
29use super::providers::openai_compat::gpt_generation;
30
31/// Shipped default rules. Compiled into the binary at build time.
32const BUILTIN_TOML: &str = include_str!("capabilities.toml");
33
34/// Parsed on-disk capabilities schema. Public so harn-cli can
35/// construct one directly when wiring harn.toml overrides.
36#[derive(Debug, Clone, Deserialize, Default)]
37pub struct CapabilitiesFile {
38    /// Per-provider ordered rule lists. First matching rule wins.
39    #[serde(default)]
40    pub provider: BTreeMap<String, Vec<ProviderRule>>,
41    /// Sibling → canonical family mapping. Providers with no rule of
42    /// their own fall through to the named family (recursively).
43    #[serde(default)]
44    pub provider_family: BTreeMap<String, String>,
45}
46
47/// One row of the capability matrix.
48#[derive(Debug, Clone, Deserialize)]
49pub struct ProviderRule {
50    /// Glob pattern (supports leading / trailing `*` and a single mid-`*`).
51    /// Matched case-insensitively against the model ID.
52    pub model_match: String,
53    /// Optional `[major, minor]` lower bound. When set, the model ID
54    /// must parse via the provider's version extractor AND compare ≥
55    /// this tuple. Rules with an unparseable `version_min` for the
56    /// given model are skipped, not merged.
57    #[serde(default)]
58    pub version_min: Option<Vec<u32>>,
59    #[serde(default)]
60    pub native_tools: Option<bool>,
61    #[serde(default)]
62    pub defer_loading: Option<bool>,
63    #[serde(default)]
64    pub tool_search: Option<Vec<String>>,
65    #[serde(default)]
66    pub max_tools: Option<u32>,
67    #[serde(default)]
68    pub prompt_caching: Option<bool>,
69    /// Whether this provider/model route accepts image or other visual
70    /// input blocks through Harn's LLM message path.
71    #[serde(default)]
72    pub vision: Option<bool>,
73    /// Whether this provider/model route accepts audio input blocks
74    /// through Harn's LLM message path.
75    #[serde(default)]
76    pub audio: Option<bool>,
77    /// Whether this provider/model route accepts PDF/document input blocks
78    /// through Harn's LLM message path.
79    #[serde(default)]
80    pub pdf: Option<bool>,
81    /// Structured-output transport strategy. Known values are:
82    /// `native`, `tool_use`, `format_kw`, and `none`.
83    #[serde(default)]
84    pub structured_output: Option<String>,
85    /// Legacy name retained for project overrides written before
86    /// `structured_output` became the canonical capability.
87    #[serde(default)]
88    pub json_schema: Option<String>,
89    /// Supported thinking/reasoning modes for this rule. Values are
90    /// script-facing mode names: `enabled`, `adaptive`, and `effort`.
91    #[serde(default)]
92    pub thinking_modes: Option<Vec<String>>,
93    /// Whether Anthropic interleaved thinking is supported for this
94    /// provider/model route.
95    #[serde(default)]
96    pub interleaved_thinking_supported: Option<bool>,
97    /// Anthropic beta features that should be requested for this route.
98    #[serde(default)]
99    pub anthropic_beta_features: Option<Vec<String>>,
100    /// Legacy override compatibility. New built-in rules should use
101    /// `thinking_modes` so the capability matrix preserves mode detail.
102    #[serde(default)]
103    pub thinking: Option<bool>,
104    /// Whether the model accepts image inputs in chat content.
105    #[serde(default)]
106    pub vision_supported: Option<bool>,
107    /// Carry `<think>...</think>` blocks in assistant history across turns.
108    /// Qwen3.6 exposes this as `chat_template_kwargs.preserve_thinking`;
109    /// Alibaba recommends enabling it for long-horizon agent loops so the
110    /// model doesn't re-derive context it already worked out in prior turns.
111    /// Anthropic's adaptive-thinking signature contract is stricter but plays
112    /// the same role there.
113    #[serde(default)]
114    pub preserve_thinking: Option<bool>,
115    /// Name of any server-side response parser that can transform model
116    /// bytes before Harn sees them. `none` means the provider returns the
117    /// model text/tool channel without an implicit parser.
118    #[serde(default)]
119    pub server_parser: Option<String>,
120    /// Whether provider-specific `chat_template_kwargs` are honored.
121    /// Some OpenAI-compatible servers silently drop unknown kwargs.
122    #[serde(default)]
123    pub honors_chat_template_kwargs: Option<bool>,
124    /// Whether this route requires OpenAI's `max_completion_tokens`
125    /// request field instead of legacy `max_tokens`.
126    #[serde(default)]
127    pub requires_completion_tokens: Option<bool>,
128    /// Whether this route accepts OpenAI's `reasoning_effort` request field.
129    #[serde(default)]
130    pub reasoning_effort_supported: Option<bool>,
131    /// Preferred endpoint family for this provider/model route. Values
132    /// are descriptive labels consumed by providers, e.g.
133    /// `/api/generate-raw` for Ollama raw prompt bypass.
134    #[serde(default)]
135    pub recommended_endpoint: Option<String>,
136    /// Whether Harn's text-tool protocol (`<tool_call>name({...})`) can
137    /// survive the provider route and return in the visible response body.
138    #[serde(default)]
139    pub text_tool_wire_format_supported: Option<bool>,
140}
141
142/// Resolved capabilities for a `(provider, model)` pair. Unset rule
143/// fields resolve to `false` / empty / `None` so callers never have to
144/// unwrap an `Option<bool>` for what are really boolean gates.
145#[derive(Debug, Clone, PartialEq, Eq)]
146pub struct Capabilities {
147    pub native_tools: bool,
148    pub defer_loading: bool,
149    pub tool_search: Vec<String>,
150    pub max_tools: Option<u32>,
151    pub prompt_caching: bool,
152    pub vision: bool,
153    pub audio: bool,
154    pub pdf: bool,
155    pub structured_output: Option<String>,
156    /// Legacy mirror for CLI display and older callers.
157    pub json_schema: Option<String>,
158    pub thinking_modes: Vec<String>,
159    pub interleaved_thinking_supported: bool,
160    pub anthropic_beta_features: Vec<String>,
161    pub vision_supported: bool,
162    pub preserve_thinking: bool,
163    pub server_parser: String,
164    pub honors_chat_template_kwargs: bool,
165    pub requires_completion_tokens: bool,
166    pub reasoning_effort_supported: bool,
167    pub recommended_endpoint: Option<String>,
168    pub text_tool_wire_format_supported: bool,
169}
170
171impl Default for Capabilities {
172    fn default() -> Self {
173        Self {
174            native_tools: false,
175            defer_loading: false,
176            tool_search: Vec::new(),
177            max_tools: None,
178            prompt_caching: false,
179            vision: false,
180            audio: false,
181            pdf: false,
182            structured_output: None,
183            json_schema: None,
184            thinking_modes: Vec::new(),
185            interleaved_thinking_supported: false,
186            anthropic_beta_features: Vec::new(),
187            vision_supported: false,
188            preserve_thinking: false,
189            server_parser: "none".to_string(),
190            honors_chat_template_kwargs: false,
191            requires_completion_tokens: false,
192            reasoning_effort_supported: false,
193            recommended_endpoint: None,
194            text_tool_wire_format_supported: true,
195        }
196    }
197}
198
199/// Display-oriented row for `harn check --provider-matrix` and the generated
200/// docs page. Rows are intentionally rule-shaped: `model` is the rule's
201/// `model_match` pattern, because the shipped capability source of truth is a
202/// first-match rule table rather than an exhaustive remote model inventory.
203#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
204pub struct ProviderCapabilityMatrixRow {
205    pub provider: String,
206    pub model: String,
207    pub thinking: Vec<String>,
208    pub vision: bool,
209    pub audio: bool,
210    pub pdf: bool,
211    pub streaming: bool,
212    pub json_schema: Option<String>,
213    pub tools: bool,
214    pub cache: bool,
215    pub source: String,
216}
217
218thread_local! {
219    /// Per-thread user overrides installed by the CLI at startup. Kept
220    /// thread-local (not process-static) to match the rest of the VM
221    /// state model — the VM is !Send and each VM thread owns its own
222    /// configuration.
223    static USER_OVERRIDES: RefCell<Option<CapabilitiesFile>> = const { RefCell::new(None) };
224}
225
226/// Lazily-parsed built-in rules. The `include_str!` content is a static
227/// constant; parsing it once per process is safe and free of ordering
228/// hazards.
229static BUILTIN: OnceLock<CapabilitiesFile> = OnceLock::new();
230
231fn builtin() -> &'static CapabilitiesFile {
232    BUILTIN.get_or_init(|| {
233        toml::from_str::<CapabilitiesFile>(BUILTIN_TOML)
234            .expect("capabilities.toml must parse at build time")
235    })
236}
237
238/// Install project-level overrides for the current thread. Usually
239/// called once at CLI bootstrap after reading `harn.toml`. Passing
240/// `None` clears any prior override.
241pub fn set_user_overrides(file: Option<CapabilitiesFile>) {
242    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = file);
243}
244
245/// Clear any thread-local user overrides. Used between test runs.
246pub fn clear_user_overrides() {
247    set_user_overrides(None);
248}
249
250/// Parse a TOML string containing the capabilities section's own shape
251/// (i.e. top-level `[[provider.X]]` + optional `[provider_family]`, the
252/// same layout used by the built-in `capabilities.toml`) and install as
253/// the current thread's override.
254pub fn set_user_overrides_toml(src: &str) -> Result<(), String> {
255    let parsed: CapabilitiesFile = toml::from_str(src).map_err(|e| e.to_string())?;
256    set_user_overrides(Some(parsed));
257    Ok(())
258}
259
260/// Extract the `[capabilities]` section from a full `harn.toml` source
261/// and install it as the current thread's override. The schema inside
262/// that section mirrors `CapabilitiesFile` but with every key prefixed
263/// by `capabilities.`:
264///
265/// ```toml
266/// [[capabilities.provider.my-proxy]]
267/// model_match = "*"
268/// native_tools = true
269/// tool_search = ["hosted"]
270/// ```
271pub fn set_user_overrides_from_manifest_toml(src: &str) -> Result<(), String> {
272    #[derive(Deserialize)]
273    struct Manifest {
274        #[serde(default)]
275        capabilities: Option<CapabilitiesFile>,
276    }
277    let parsed: Manifest = toml::from_str(src).map_err(|e| e.to_string())?;
278    set_user_overrides(parsed.capabilities);
279    Ok(())
280}
281
282/// Look up effective capabilities for a `(provider, model)` pair.
283/// Walks the provider_family chain until it finds a rule list that
284/// matches. Within any one provider's rule list, user overrides are
285/// consulted before the built-in rules. The first matching rule wins —
286/// later rules (and later layers in the family chain) are ignored.
287pub fn lookup(provider: &str, model: &str) -> Capabilities {
288    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
289    lookup_with(provider, model, builtin(), user.as_ref())
290}
291
292/// Return the currently-effective provider capability rule matrix. User
293/// override rows, when installed for the current thread, are emitted before
294/// built-in rows so the display mirrors lookup precedence.
295pub fn matrix_rows() -> Vec<ProviderCapabilityMatrixRow> {
296    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
297    let mut rows = Vec::new();
298    if let Some(user) = user.as_ref() {
299        push_matrix_rows(&mut rows, user, "project");
300    }
301    push_matrix_rows(&mut rows, builtin(), "builtin");
302    rows
303}
304
305fn push_matrix_rows(
306    rows: &mut Vec<ProviderCapabilityMatrixRow>,
307    file: &CapabilitiesFile,
308    source: &str,
309) {
310    for (provider, rules) in &file.provider {
311        for rule in rules {
312            rows.push(rule_to_matrix_row(provider, rule, source));
313        }
314    }
315}
316
317fn rule_to_matrix_row(
318    provider: &str,
319    rule: &ProviderRule,
320    source: &str,
321) -> ProviderCapabilityMatrixRow {
322    ProviderCapabilityMatrixRow {
323        provider: provider.to_string(),
324        model: rule.model_match.clone(),
325        thinking: rule_thinking_modes(rule),
326        vision: rule_vision(rule),
327        audio: rule.audio.unwrap_or(false),
328        pdf: rule.pdf.unwrap_or(false),
329        streaming: true,
330        json_schema: rule_structured_output(rule),
331        tools: rule.native_tools.unwrap_or(false),
332        cache: rule.prompt_caching.unwrap_or(false),
333        source: source.to_string(),
334    }
335}
336
337fn rule_thinking_modes(rule: &ProviderRule) -> Vec<String> {
338    rule.thinking_modes.clone().unwrap_or_else(|| {
339        if rule.thinking.unwrap_or(false) {
340            vec!["enabled".to_string()]
341        } else {
342            Vec::new()
343        }
344    })
345}
346
347fn rule_vision(rule: &ProviderRule) -> bool {
348    rule.vision.or(rule.vision_supported).unwrap_or(false)
349}
350
351fn lookup_with(
352    provider: &str,
353    model: &str,
354    builtin: &CapabilitiesFile,
355    user: Option<&CapabilitiesFile>,
356) -> Capabilities {
357    // Special case: mock spoofs either shape. Try anthropic first
358    // (Claude-shape model strings) so `mock` + `claude-opus-4-7`
359    // resolves to the Anthropic capability row — the same behaviour
360    // the hardcoded dispatch gave before this refactor.
361    if provider == "mock" {
362        if let Some(caps) = try_match_layer(user, builtin, "anthropic", model, provider) {
363            return caps;
364        }
365        if let Some(caps) = try_match_layer(user, builtin, "openai", model, provider) {
366            return caps;
367        }
368        return Capabilities::default();
369    }
370
371    // Normal chain: walk provider → family(provider) → ... with a
372    // visited-guard to avoid cycles in malformed user overrides.
373    let mut current = provider.to_string();
374    let mut visited: std::collections::HashSet<String> = std::collections::HashSet::new();
375    while visited.insert(current.clone()) {
376        if let Some(caps) = try_match_layer(user, builtin, &current, model, provider) {
377            return caps;
378        }
379        let next = user
380            .and_then(|f| f.provider_family.get(&current))
381            .or_else(|| builtin.provider_family.get(&current))
382            .cloned();
383        match next {
384            Some(parent) => current = parent,
385            None => break,
386        }
387    }
388    Capabilities::default()
389}
390
391/// Try the ordered rule list for `layer_provider` (user rules first,
392/// then built-in rules). Returns `Some(caps)` on the first match, else
393/// `None`. `original_provider` is threaded through only for diagnostics.
394fn try_match_layer(
395    user: Option<&CapabilitiesFile>,
396    builtin: &CapabilitiesFile,
397    layer_provider: &str,
398    model: &str,
399    _original_provider: &str,
400) -> Option<Capabilities> {
401    if let Some(user) = user {
402        if let Some(rules) = user.provider.get(layer_provider) {
403            for rule in rules {
404                if rule_matches(rule, model) {
405                    return Some(rule_to_caps(rule));
406                }
407            }
408        }
409    }
410    if let Some(rules) = builtin.provider.get(layer_provider) {
411        for rule in rules {
412            if rule_matches(rule, model) {
413                return Some(rule_to_caps(rule));
414            }
415        }
416    }
417    None
418}
419
420fn rule_to_caps(rule: &ProviderRule) -> Capabilities {
421    let thinking_modes = rule_thinking_modes(rule);
422    Capabilities {
423        native_tools: rule.native_tools.unwrap_or(false),
424        defer_loading: rule.defer_loading.unwrap_or(false),
425        tool_search: rule.tool_search.clone().unwrap_or_default(),
426        max_tools: rule.max_tools,
427        prompt_caching: rule.prompt_caching.unwrap_or(false),
428        vision: rule_vision(rule),
429        audio: rule.audio.unwrap_or(false),
430        pdf: rule.pdf.unwrap_or(false),
431        structured_output: rule_structured_output(rule),
432        json_schema: rule_structured_output(rule),
433        thinking_modes,
434        interleaved_thinking_supported: rule.interleaved_thinking_supported.unwrap_or(false),
435        anthropic_beta_features: rule.anthropic_beta_features.clone().unwrap_or_default(),
436        vision_supported: rule.vision_supported.unwrap_or(false),
437        preserve_thinking: rule.preserve_thinking.unwrap_or(false),
438        server_parser: rule
439            .server_parser
440            .clone()
441            .unwrap_or_else(|| "none".to_string()),
442        honors_chat_template_kwargs: rule.honors_chat_template_kwargs.unwrap_or(false),
443        requires_completion_tokens: rule.requires_completion_tokens.unwrap_or(false),
444        reasoning_effort_supported: rule.reasoning_effort_supported.unwrap_or(false),
445        recommended_endpoint: rule.recommended_endpoint.clone(),
446        text_tool_wire_format_supported: rule.text_tool_wire_format_supported.unwrap_or(true),
447    }
448}
449
450fn rule_structured_output(rule: &ProviderRule) -> Option<String> {
451    rule.structured_output
452        .clone()
453        .or_else(|| rule.json_schema.clone())
454        .filter(|value| value != "none")
455}
456
457fn rule_matches(rule: &ProviderRule, model: &str) -> bool {
458    let lower = model.to_lowercase();
459    if !glob_match(&rule.model_match.to_lowercase(), &lower) {
460        return false;
461    }
462    if let Some(version_min) = &rule.version_min {
463        if version_min.len() != 2 {
464            return false;
465        }
466        let want = (version_min[0], version_min[1]);
467        let have = match extract_version(model) {
468            Some(v) => v,
469            // `version_min` was set but the model ID can't be parsed.
470            // Fail closed: skip this rule so more permissive catch-all
471            // rules below can still match.
472            None => return false,
473        };
474        if have < want {
475            return false;
476        }
477    }
478    true
479}
480
481/// Extract `(major, minor)` from a model ID by trying the Anthropic
482/// parser first (for `claude-*` shapes) then the OpenAI parser (`gpt-*`).
483/// Both parsers return `None` for shapes they don't recognise so this
484/// never mis-parses across families.
485fn extract_version(model: &str) -> Option<(u32, u32)> {
486    claude_generation(model).or_else(|| gpt_generation(model))
487}
488
489/// Simple glob matching with `*` wildcards. Mirrors the helper in
490/// `llm_config.rs` — keep them in sync if either ever grows regex or
491/// character-class support.
492fn glob_match(pattern: &str, input: &str) -> bool {
493    if let Some(prefix) = pattern.strip_suffix('*') {
494        if let Some(rest) = prefix.strip_prefix('*') {
495            // `*foo*` — substring match.
496            return input.contains(rest);
497        }
498        return input.starts_with(prefix);
499    }
500    if let Some(suffix) = pattern.strip_prefix('*') {
501        return input.ends_with(suffix);
502    }
503    if pattern.contains('*') {
504        let parts: Vec<&str> = pattern.split('*').collect();
505        if parts.len() == 2 {
506            return input.starts_with(parts[0]) && input.ends_with(parts[1]);
507        }
508        return input == pattern;
509    }
510    input == pattern
511}
512
513#[cfg(test)]
514mod tests {
515    use super::*;
516
517    fn reset() {
518        clear_user_overrides();
519    }
520
521    #[test]
522    fn anthropic_opus_47_gets_full_capabilities() {
523        reset();
524        let caps = lookup("anthropic", "claude-opus-4-7");
525        assert!(caps.native_tools);
526        assert!(caps.defer_loading);
527        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
528        assert!(caps.prompt_caching);
529        assert_eq!(caps.thinking_modes, vec!["adaptive"]);
530        assert!(caps.vision_supported);
531        assert_eq!(caps.max_tools, Some(10000));
532    }
533
534    #[test]
535    fn anthropic_opus_46_uses_budgeted_thinking() {
536        reset();
537        let caps = lookup("anthropic", "claude-opus-4-6");
538        assert_eq!(caps.thinking_modes, vec!["enabled"]);
539        assert!(caps.interleaved_thinking_supported);
540    }
541
542    #[test]
543    fn anthropic_opus_45_does_not_support_interleaved_thinking() {
544        reset();
545        let caps = lookup("anthropic", "claude-opus-4-5");
546        assert_eq!(caps.thinking_modes, vec!["enabled"]);
547        assert!(!caps.interleaved_thinking_supported);
548    }
549
550    #[test]
551    fn override_can_supply_anthropic_beta_features() {
552        reset();
553        let toml_src = r#"
554[[provider.anthropic]]
555model_match = "claude-custom-*"
556native_tools = true
557anthropic_beta_features = ["fine-grained-tool-streaming-2025-05-14"]
558"#;
559        set_user_overrides_toml(toml_src).unwrap();
560        let caps = lookup("anthropic", "claude-custom-1");
561        assert_eq!(
562            caps.anthropic_beta_features,
563            vec!["fine-grained-tool-streaming-2025-05-14"]
564        );
565        reset();
566    }
567
568    #[test]
569    fn anthropic_haiku_44_has_no_tool_search() {
570        reset();
571        let caps = lookup("anthropic", "claude-haiku-4-4");
572        // Haiku 4.4 falls through to the `claude-*` catch-all row.
573        assert!(caps.native_tools);
574        assert!(caps.prompt_caching);
575        assert!(!caps.defer_loading);
576        assert!(caps.tool_search.is_empty());
577    }
578
579    #[test]
580    fn anthropic_haiku_45_supports_tool_search() {
581        reset();
582        let caps = lookup("anthropic", "claude-haiku-4-5");
583        assert!(caps.defer_loading);
584        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
585    }
586
587    #[test]
588    fn old_claude_gets_catchall() {
589        reset();
590        let caps = lookup("anthropic", "claude-opus-3-5");
591        assert!(caps.native_tools);
592        assert!(caps.prompt_caching);
593        assert!(!caps.defer_loading);
594        assert!(caps.tool_search.is_empty());
595    }
596
597    #[test]
598    fn openai_gpt_54_supports_tool_search() {
599        reset();
600        let caps = lookup("openai", "gpt-5.4");
601        assert!(caps.defer_loading);
602        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
603        assert_eq!(caps.json_schema.as_deref(), Some("native"));
604    }
605
606    #[test]
607    fn openai_gpt_53_has_native_tools_only() {
608        reset();
609        let caps = lookup("openai", "gpt-5.3");
610        assert!(caps.native_tools);
611        assert!(!caps.defer_loading);
612        assert!(!caps.vision_supported);
613        assert!(caps.tool_search.is_empty());
614    }
615
616    #[test]
617    fn openai_gpt_4o_matrix_fields_include_multimodal_support() {
618        reset();
619        let caps = lookup("openai", "gpt-4o");
620        assert!(caps.native_tools);
621        assert!(caps.vision);
622        assert!(caps.audio);
623        assert_eq!(caps.json_schema.as_deref(), Some("native"));
624    }
625
626    #[test]
627    fn openai_reasoning_models_support_effort() {
628        reset();
629        let caps = lookup("openai", "o3");
630        assert_eq!(caps.thinking_modes, vec!["effort"]);
631        assert!(caps.requires_completion_tokens);
632        assert!(caps.reasoning_effort_supported);
633        let prefixed = lookup("openrouter", "openai/o4-mini");
634        assert!(prefixed.requires_completion_tokens);
635        assert!(prefixed.reasoning_effort_supported);
636    }
637
638    #[test]
639    fn vision_capability_gates_known_multimodal_models() {
640        reset();
641        assert!(lookup("openai", "gpt-4o").vision_supported);
642        assert!(lookup("openai", "gpt-5.4-preview").vision_supported);
643        assert!(lookup("anthropic", "claude-sonnet-4-6").vision_supported);
644        assert!(lookup("openrouter", "google/gemini-2.5-flash").vision_supported);
645        assert!(lookup("gemini", "gemini-2.5-flash").vision_supported);
646        assert!(lookup("ollama", "llava:latest").vision_supported);
647        assert!(!lookup("openai", "gpt-3.5-turbo").vision_supported);
648        assert!(!lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4").vision_supported);
649    }
650
651    #[test]
652    fn openrouter_inherits_openai() {
653        reset();
654        let caps = lookup("openrouter", "gpt-5.4");
655        assert!(caps.defer_loading);
656        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
657    }
658
659    #[test]
660    fn groq_inherits_openai_family_only() {
661        reset();
662        let caps = lookup("groq", "gpt-5.5-preview");
663        assert!(caps.defer_loading);
664    }
665
666    #[test]
667    fn mock_with_claude_model_routes_to_anthropic() {
668        reset();
669        let caps = lookup("mock", "claude-sonnet-4-7");
670        assert!(caps.defer_loading);
671        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
672    }
673
674    #[test]
675    fn mock_with_gpt_model_routes_to_openai() {
676        reset();
677        let caps = lookup("mock", "gpt-5.4-preview");
678        assert!(caps.defer_loading);
679        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
680    }
681
682    #[test]
683    fn qwen36_ollama_preserves_thinking() {
684        reset();
685        let caps = lookup("ollama", "qwen3.6:35b-a3b-coding-nvfp4");
686        assert!(caps.native_tools);
687        assert_eq!(caps.json_schema.as_deref(), Some("format_kw"));
688        assert!(!caps.thinking_modes.is_empty());
689        assert!(
690            caps.preserve_thinking,
691            "Qwen3.6 should enable preserve_thinking by default for long-horizon loops"
692        );
693        assert_eq!(caps.server_parser, "ollama_qwen3coder");
694        assert!(!caps.honors_chat_template_kwargs);
695        assert_eq!(
696            caps.recommended_endpoint.as_deref(),
697            Some("/api/generate-raw")
698        );
699        assert!(!caps.text_tool_wire_format_supported);
700    }
701
702    #[test]
703    fn qwen35_ollama_does_not_preserve_thinking() {
704        reset();
705        let caps = lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4");
706        assert!(caps.native_tools);
707        assert!(!caps.thinking_modes.is_empty());
708        assert!(
709            !caps.preserve_thinking,
710            "Qwen3.5 lacks the preserve_thinking kwarg — rely on the chat template's rolling checkpoint instead"
711        );
712        assert_eq!(caps.server_parser, "ollama_qwen3coder");
713        assert!(!caps.text_tool_wire_format_supported);
714    }
715
716    #[test]
717    fn qwen36_routed_providers_all_preserve_thinking() {
718        reset();
719        for (provider, model) in [
720            ("openrouter", "qwen/qwen3.6-plus"),
721            ("together", "Qwen/Qwen3.6-35B-A3B"),
722            ("huggingface", "Qwen/Qwen3.6-35B-A3B"),
723            ("fireworks", "accounts/fireworks/models/qwen3p6-plus"),
724            ("dashscope", "qwen3.6-plus"),
725            ("llamacpp", "unsloth/Qwen3.6-35B-A3B-GGUF"),
726            ("local", "Qwen3.6-35B-A3B"),
727            ("mlx", "unsloth/Qwen3.6-27B-UD-MLX-4bit"),
728            ("mlx", "Qwen/Qwen3.6-27B"),
729        ] {
730            let caps = lookup(provider, model);
731            assert!(
732                !caps.thinking_modes.is_empty(),
733                "{provider}/{model}: thinking"
734            );
735            assert!(
736                caps.preserve_thinking,
737                "{provider}/{model}: preserve_thinking must be on for Qwen3.6"
738            );
739            assert!(caps.native_tools, "{provider}/{model}: native_tools");
740            assert_ne!(
741                caps.server_parser, "ollama_qwen3coder",
742                "{provider}/{model}: only Ollama routes through the qwen3coder response parser"
743            );
744        }
745    }
746
747    #[test]
748    fn llamacpp_qwen_keeps_text_tool_wire_format() {
749        reset();
750        let caps = lookup("llamacpp", "unsloth/Qwen3.5-Coder-GGUF");
751        assert_eq!(caps.server_parser, "none");
752        assert!(caps.honors_chat_template_kwargs);
753        assert!(caps.text_tool_wire_format_supported);
754        assert_eq!(
755            caps.recommended_endpoint.as_deref(),
756            Some("/v1/chat/completions")
757        );
758    }
759
760    #[test]
761    fn dashscope_and_llamacpp_resolve_capabilities() {
762        reset();
763        // New sibling providers should fall through to `openai` for
764        // gpt-*  models even without dedicated rules.
765        let caps = lookup("dashscope", "gpt-5.4-preview");
766        assert!(caps.defer_loading);
767        let caps = lookup("llamacpp", "gpt-5.4-preview");
768        assert!(caps.defer_loading);
769    }
770
771    #[test]
772    fn unknown_provider_has_no_capabilities() {
773        reset();
774        let caps = lookup("my-custom-proxy", "foo-bar-1");
775        assert!(!caps.native_tools);
776        assert!(!caps.defer_loading);
777        assert!(caps.tool_search.is_empty());
778    }
779
780    #[test]
781    fn user_override_adds_new_provider() {
782        reset();
783        let toml_src = r#"
784[[provider.my-proxy]]
785model_match = "*"
786native_tools = true
787tool_search = ["hosted"]
788"#;
789        set_user_overrides_toml(toml_src).unwrap();
790        let caps = lookup("my-proxy", "anything");
791        assert!(caps.native_tools);
792        assert_eq!(caps.tool_search, vec!["hosted"]);
793        clear_user_overrides();
794    }
795
796    #[test]
797    fn user_override_takes_precedence_over_builtin() {
798        reset();
799        let toml_src = r#"
800[[provider.anthropic]]
801model_match = "claude-opus-*"
802native_tools = true
803defer_loading = false
804tool_search = []
805"#;
806        set_user_overrides_toml(toml_src).unwrap();
807        let caps = lookup("anthropic", "claude-opus-4-7");
808        assert!(caps.native_tools);
809        assert!(!caps.defer_loading);
810        assert!(caps.tool_search.is_empty());
811        clear_user_overrides();
812    }
813
814    #[test]
815    fn user_override_from_manifest_toml() {
816        reset();
817        let manifest = r#"
818[package]
819name = "demo"
820
821[[capabilities.provider.my-proxy]]
822model_match = "*"
823native_tools = true
824tool_search = ["hosted"]
825"#;
826        set_user_overrides_from_manifest_toml(manifest).unwrap();
827        let caps = lookup("my-proxy", "foo");
828        assert!(caps.native_tools);
829        assert_eq!(caps.tool_search, vec!["hosted"]);
830        clear_user_overrides();
831    }
832
833    #[test]
834    fn version_min_requires_parseable_model() {
835        reset();
836        let toml_src = r#"
837[[provider.custom]]
838model_match = "*"
839version_min = [5, 4]
840native_tools = true
841"#;
842        set_user_overrides_toml(toml_src).unwrap();
843        // Unparseable model ID + version_min → rule doesn't match.
844        let caps = lookup("custom", "mystery-model");
845        assert!(!caps.native_tools);
846        clear_user_overrides();
847    }
848
849    #[test]
850    fn glob_match_substring() {
851        assert!(glob_match("*gpt*", "openai/gpt-5.4"));
852        assert!(glob_match("*claude*", "anthropic/claude-opus-4-7"));
853        assert!(!glob_match("*xyz*", "openai/gpt-5.4"));
854    }
855
856    #[test]
857    fn openrouter_namespaced_anthropic_model() {
858        reset();
859        let caps = lookup("anthropic", "anthropic/claude-opus-4-7");
860        assert!(caps.defer_loading);
861    }
862
863    #[test]
864    fn matrix_rows_include_provider_patterns_and_sources() {
865        reset();
866        let rows = matrix_rows();
867        assert!(rows.iter().any(|row| {
868            row.provider == "openai"
869                && row.model == "gpt-4o*"
870                && row.vision
871                && row.audio
872                && row.json_schema.as_deref() == Some("native")
873                && row.source == "builtin"
874        }));
875    }
876}
harn_vm/llm/capabilities.rs

harn_vm/llm/
capabilities.rs