Skip to main content

harn_vm/llm/
capabilities.rs

1//! Data-driven provider capabilities.
2//!
3//! The per-(provider, model) capability matrix (native tools, deferred
4//! tool loading, tool-search variants, prompt caching, extended thinking,
5//! max tool count) lives in the shipped `capabilities.toml` and is
6//! overridable per-project via `[[capabilities.provider.<name>]]` blocks
7//! in `harn.toml`. This module owns:
8//!
9//! - loading the built-in TOML (compiled in via `include_str!`);
10//! - merging user overrides on top;
11//! - matching a `(provider, model)` pair against the rule list with
12//!   glob + semver semantics;
13//! - exposing a stable `Capabilities` struct that the `LlmProvider`
14//!   trait delegates to as the single source of truth.
15//!
16//! Before this module the Anthropic / OpenAI gates were spread across
17//! `providers/anthropic.rs` (`claude_generation`, `claude_model_supports_tool_search`)
18//! and `providers/openai_compat.rs` (`gpt_generation`, `gpt_model_supports_tool_search`).
19//! Those parsers are still used here — they supply the version extractor —
20//! but the boolean gates that used to live alongside them are now data.
21
22use std::cell::RefCell;
23use std::collections::BTreeMap;
24use std::sync::OnceLock;
25
26use serde::Deserialize;
27
28use super::providers::anthropic::claude_generation;
29use super::providers::openai_compat::gpt_generation;
30
31/// Shipped default rules. Compiled into the binary at build time.
32const BUILTIN_TOML: &str = include_str!("capabilities.toml");
33
34/// Parsed on-disk capabilities schema. Public so harn-cli can
35/// construct one directly when wiring harn.toml overrides.
36#[derive(Debug, Clone, Deserialize, Default)]
37pub struct CapabilitiesFile {
38    /// Per-provider ordered rule lists. First matching rule wins.
39    #[serde(default)]
40    pub provider: BTreeMap<String, Vec<ProviderRule>>,
41    /// Sibling → canonical family mapping. Providers with no rule of
42    /// their own fall through to the named family (recursively).
43    #[serde(default)]
44    pub provider_family: BTreeMap<String, String>,
45}
46
47/// One row of the capability matrix.
48#[derive(Debug, Clone, Deserialize)]
49pub struct ProviderRule {
50    /// Glob pattern (supports leading / trailing `*` and a single mid-`*`).
51    /// Matched case-insensitively against the model ID.
52    pub model_match: String,
53    /// Optional `[major, minor]` lower bound. When set, the model ID
54    /// must parse via the provider's version extractor AND compare ≥
55    /// this tuple. Rules with an unparseable `version_min` for the
56    /// given model are skipped, not merged.
57    #[serde(default)]
58    pub version_min: Option<Vec<u32>>,
59    #[serde(default)]
60    pub native_tools: Option<bool>,
61    #[serde(default)]
62    pub defer_loading: Option<bool>,
63    #[serde(default)]
64    pub tool_search: Option<Vec<String>>,
65    #[serde(default)]
66    pub max_tools: Option<u32>,
67    #[serde(default)]
68    pub prompt_caching: Option<bool>,
69    #[serde(default)]
70    pub thinking: Option<bool>,
71    /// Carry `<think>...</think>` blocks in assistant history across turns.
72    /// Qwen3.6 exposes this as `chat_template_kwargs.preserve_thinking`;
73    /// Alibaba recommends enabling it for long-horizon agent loops so the
74    /// model doesn't re-derive context it already worked out in prior turns.
75    /// Anthropic's adaptive-thinking signature contract is stricter but plays
76    /// the same role there.
77    #[serde(default)]
78    pub preserve_thinking: Option<bool>,
79    /// Name of any server-side response parser that can transform model
80    /// bytes before Harn sees them. `none` means the provider returns the
81    /// model text/tool channel without an implicit parser.
82    #[serde(default)]
83    pub server_parser: Option<String>,
84    /// Whether provider-specific `chat_template_kwargs` are honored.
85    /// Some OpenAI-compatible servers silently drop unknown kwargs.
86    #[serde(default)]
87    pub honors_chat_template_kwargs: Option<bool>,
88    /// Preferred endpoint family for this provider/model route. Values
89    /// are descriptive labels consumed by providers, e.g.
90    /// `/api/generate-raw` for Ollama raw prompt bypass.
91    #[serde(default)]
92    pub recommended_endpoint: Option<String>,
93    /// Whether Harn's text-tool protocol (`<tool_call>name({...})`) can
94    /// survive the provider route and return in the visible response body.
95    #[serde(default)]
96    pub text_tool_wire_format_supported: Option<bool>,
97}
98
99/// Resolved capabilities for a `(provider, model)` pair. Unset rule
100/// fields resolve to `false` / empty / `None` so callers never have to
101/// unwrap an `Option<bool>` for what are really boolean gates.
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub struct Capabilities {
104    pub native_tools: bool,
105    pub defer_loading: bool,
106    pub tool_search: Vec<String>,
107    pub max_tools: Option<u32>,
108    pub prompt_caching: bool,
109    pub thinking: bool,
110    pub preserve_thinking: bool,
111    pub server_parser: String,
112    pub honors_chat_template_kwargs: bool,
113    pub recommended_endpoint: Option<String>,
114    pub text_tool_wire_format_supported: bool,
115}
116
117impl Default for Capabilities {
118    fn default() -> Self {
119        Self {
120            native_tools: false,
121            defer_loading: false,
122            tool_search: Vec::new(),
123            max_tools: None,
124            prompt_caching: false,
125            thinking: false,
126            preserve_thinking: false,
127            server_parser: "none".to_string(),
128            honors_chat_template_kwargs: false,
129            recommended_endpoint: None,
130            text_tool_wire_format_supported: true,
131        }
132    }
133}
134
135thread_local! {
136    /// Per-thread user overrides installed by the CLI at startup. Kept
137    /// thread-local (not process-static) to match the rest of the VM
138    /// state model — the VM is !Send and each VM thread owns its own
139    /// configuration.
140    static USER_OVERRIDES: RefCell<Option<CapabilitiesFile>> = const { RefCell::new(None) };
141}
142
143/// Lazily-parsed built-in rules. The `include_str!` content is a static
144/// constant; parsing it once per process is safe and free of ordering
145/// hazards.
146static BUILTIN: OnceLock<CapabilitiesFile> = OnceLock::new();
147
148fn builtin() -> &'static CapabilitiesFile {
149    BUILTIN.get_or_init(|| {
150        toml::from_str::<CapabilitiesFile>(BUILTIN_TOML)
151            .expect("capabilities.toml must parse at build time")
152    })
153}
154
155/// Install project-level overrides for the current thread. Usually
156/// called once at CLI bootstrap after reading `harn.toml`. Passing
157/// `None` clears any prior override.
158pub fn set_user_overrides(file: Option<CapabilitiesFile>) {
159    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = file);
160}
161
162/// Clear any thread-local user overrides. Used between test runs.
163pub fn clear_user_overrides() {
164    set_user_overrides(None);
165}
166
167/// Parse a TOML string containing the capabilities section's own shape
168/// (i.e. top-level `[[provider.X]]` + optional `[provider_family]`, the
169/// same layout used by the built-in `capabilities.toml`) and install as
170/// the current thread's override.
171pub fn set_user_overrides_toml(src: &str) -> Result<(), String> {
172    let parsed: CapabilitiesFile = toml::from_str(src).map_err(|e| e.to_string())?;
173    set_user_overrides(Some(parsed));
174    Ok(())
175}
176
177/// Extract the `[capabilities]` section from a full `harn.toml` source
178/// and install it as the current thread's override. The schema inside
179/// that section mirrors `CapabilitiesFile` but with every key prefixed
180/// by `capabilities.`:
181///
182/// ```toml
183/// [[capabilities.provider.my-proxy]]
184/// model_match = "*"
185/// native_tools = true
186/// tool_search = ["hosted"]
187/// ```
188pub fn set_user_overrides_from_manifest_toml(src: &str) -> Result<(), String> {
189    #[derive(Deserialize)]
190    struct Manifest {
191        #[serde(default)]
192        capabilities: Option<CapabilitiesFile>,
193    }
194    let parsed: Manifest = toml::from_str(src).map_err(|e| e.to_string())?;
195    set_user_overrides(parsed.capabilities);
196    Ok(())
197}
198
199/// Look up effective capabilities for a `(provider, model)` pair.
200/// Walks the provider_family chain until it finds a rule list that
201/// matches. Within any one provider's rule list, user overrides are
202/// consulted before the built-in rules. The first matching rule wins —
203/// later rules (and later layers in the family chain) are ignored.
204pub fn lookup(provider: &str, model: &str) -> Capabilities {
205    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
206    lookup_with(provider, model, builtin(), user.as_ref())
207}
208
209fn lookup_with(
210    provider: &str,
211    model: &str,
212    builtin: &CapabilitiesFile,
213    user: Option<&CapabilitiesFile>,
214) -> Capabilities {
215    // Special case: mock spoofs either shape. Try anthropic first
216    // (Claude-shape model strings) so `mock` + `claude-opus-4-7`
217    // resolves to the Anthropic capability row — the same behaviour
218    // the hardcoded dispatch gave before this refactor.
219    if provider == "mock" {
220        if let Some(caps) = try_match_layer(user, builtin, "anthropic", model, provider) {
221            return caps;
222        }
223        if let Some(caps) = try_match_layer(user, builtin, "openai", model, provider) {
224            return caps;
225        }
226        return Capabilities::default();
227    }
228
229    // Normal chain: walk provider → family(provider) → ... with a
230    // visited-guard to avoid cycles in malformed user overrides.
231    let mut current = provider.to_string();
232    let mut visited: std::collections::HashSet<String> = std::collections::HashSet::new();
233    while visited.insert(current.clone()) {
234        if let Some(caps) = try_match_layer(user, builtin, &current, model, provider) {
235            return caps;
236        }
237        let next = user
238            .and_then(|f| f.provider_family.get(&current))
239            .or_else(|| builtin.provider_family.get(&current))
240            .cloned();
241        match next {
242            Some(parent) => current = parent,
243            None => break,
244        }
245    }
246    Capabilities::default()
247}
248
249/// Try the ordered rule list for `layer_provider` (user rules first,
250/// then built-in rules). Returns `Some(caps)` on the first match, else
251/// `None`. `original_provider` is threaded through only for diagnostics.
252fn try_match_layer(
253    user: Option<&CapabilitiesFile>,
254    builtin: &CapabilitiesFile,
255    layer_provider: &str,
256    model: &str,
257    _original_provider: &str,
258) -> Option<Capabilities> {
259    if let Some(user) = user {
260        if let Some(rules) = user.provider.get(layer_provider) {
261            for rule in rules {
262                if rule_matches(rule, model) {
263                    return Some(rule_to_caps(rule));
264                }
265            }
266        }
267    }
268    if let Some(rules) = builtin.provider.get(layer_provider) {
269        for rule in rules {
270            if rule_matches(rule, model) {
271                return Some(rule_to_caps(rule));
272            }
273        }
274    }
275    None
276}
277
278fn rule_to_caps(rule: &ProviderRule) -> Capabilities {
279    Capabilities {
280        native_tools: rule.native_tools.unwrap_or(false),
281        defer_loading: rule.defer_loading.unwrap_or(false),
282        tool_search: rule.tool_search.clone().unwrap_or_default(),
283        max_tools: rule.max_tools,
284        prompt_caching: rule.prompt_caching.unwrap_or(false),
285        thinking: rule.thinking.unwrap_or(false),
286        preserve_thinking: rule.preserve_thinking.unwrap_or(false),
287        server_parser: rule
288            .server_parser
289            .clone()
290            .unwrap_or_else(|| "none".to_string()),
291        honors_chat_template_kwargs: rule.honors_chat_template_kwargs.unwrap_or(false),
292        recommended_endpoint: rule.recommended_endpoint.clone(),
293        text_tool_wire_format_supported: rule.text_tool_wire_format_supported.unwrap_or(true),
294    }
295}
296
297fn rule_matches(rule: &ProviderRule, model: &str) -> bool {
298    let lower = model.to_lowercase();
299    if !glob_match(&rule.model_match.to_lowercase(), &lower) {
300        return false;
301    }
302    if let Some(version_min) = &rule.version_min {
303        if version_min.len() != 2 {
304            return false;
305        }
306        let want = (version_min[0], version_min[1]);
307        let have = match extract_version(model) {
308            Some(v) => v,
309            // `version_min` was set but the model ID can't be parsed.
310            // Fail closed: skip this rule so more permissive catch-all
311            // rules below can still match.
312            None => return false,
313        };
314        if have < want {
315            return false;
316        }
317    }
318    true
319}
320
321/// Extract `(major, minor)` from a model ID by trying the Anthropic
322/// parser first (for `claude-*` shapes) then the OpenAI parser (`gpt-*`).
323/// Both parsers return `None` for shapes they don't recognise so this
324/// never mis-parses across families.
325fn extract_version(model: &str) -> Option<(u32, u32)> {
326    claude_generation(model).or_else(|| gpt_generation(model))
327}
328
329/// Simple glob matching with `*` wildcards. Mirrors the helper in
330/// `llm_config.rs` — keep them in sync if either ever grows regex or
331/// character-class support.
332fn glob_match(pattern: &str, input: &str) -> bool {
333    if let Some(prefix) = pattern.strip_suffix('*') {
334        if let Some(rest) = prefix.strip_prefix('*') {
335            // `*foo*` — substring match.
336            return input.contains(rest);
337        }
338        return input.starts_with(prefix);
339    }
340    if let Some(suffix) = pattern.strip_prefix('*') {
341        return input.ends_with(suffix);
342    }
343    if pattern.contains('*') {
344        let parts: Vec<&str> = pattern.split('*').collect();
345        if parts.len() == 2 {
346            return input.starts_with(parts[0]) && input.ends_with(parts[1]);
347        }
348        return input == pattern;
349    }
350    input == pattern
351}
352
353#[cfg(test)]
354mod tests {
355    use super::*;
356
357    fn reset() {
358        clear_user_overrides();
359    }
360
361    #[test]
362    fn anthropic_opus_47_gets_full_capabilities() {
363        reset();
364        let caps = lookup("anthropic", "claude-opus-4-7");
365        assert!(caps.native_tools);
366        assert!(caps.defer_loading);
367        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
368        assert!(caps.prompt_caching);
369        assert!(caps.thinking);
370        assert_eq!(caps.max_tools, Some(10000));
371    }
372
373    #[test]
374    fn anthropic_haiku_44_has_no_tool_search() {
375        reset();
376        let caps = lookup("anthropic", "claude-haiku-4-4");
377        // Haiku 4.4 falls through to the `claude-*` catch-all row.
378        assert!(caps.native_tools);
379        assert!(caps.prompt_caching);
380        assert!(!caps.defer_loading);
381        assert!(caps.tool_search.is_empty());
382    }
383
384    #[test]
385    fn anthropic_haiku_45_supports_tool_search() {
386        reset();
387        let caps = lookup("anthropic", "claude-haiku-4-5");
388        assert!(caps.defer_loading);
389        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
390    }
391
392    #[test]
393    fn old_claude_gets_catchall() {
394        reset();
395        let caps = lookup("anthropic", "claude-opus-3-5");
396        assert!(caps.native_tools);
397        assert!(caps.prompt_caching);
398        assert!(!caps.defer_loading);
399        assert!(caps.tool_search.is_empty());
400    }
401
402    #[test]
403    fn openai_gpt_54_supports_tool_search() {
404        reset();
405        let caps = lookup("openai", "gpt-5.4");
406        assert!(caps.defer_loading);
407        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
408    }
409
410    #[test]
411    fn openai_gpt_53_has_native_tools_only() {
412        reset();
413        let caps = lookup("openai", "gpt-5.3");
414        assert!(caps.native_tools);
415        assert!(!caps.defer_loading);
416        assert!(caps.tool_search.is_empty());
417    }
418
419    #[test]
420    fn openrouter_inherits_openai() {
421        reset();
422        let caps = lookup("openrouter", "gpt-5.4");
423        assert!(caps.defer_loading);
424        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
425    }
426
427    #[test]
428    fn groq_inherits_openai_family_only() {
429        reset();
430        let caps = lookup("groq", "gpt-5.5-preview");
431        assert!(caps.defer_loading);
432    }
433
434    #[test]
435    fn mock_with_claude_model_routes_to_anthropic() {
436        reset();
437        let caps = lookup("mock", "claude-sonnet-4-7");
438        assert!(caps.defer_loading);
439        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
440    }
441
442    #[test]
443    fn mock_with_gpt_model_routes_to_openai() {
444        reset();
445        let caps = lookup("mock", "gpt-5.4-preview");
446        assert!(caps.defer_loading);
447        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
448    }
449
450    #[test]
451    fn qwen36_ollama_preserves_thinking() {
452        reset();
453        let caps = lookup("ollama", "qwen3.6:35b-a3b-coding-nvfp4");
454        assert!(caps.native_tools);
455        assert!(caps.thinking);
456        assert!(
457            caps.preserve_thinking,
458            "Qwen3.6 should enable preserve_thinking by default for long-horizon loops"
459        );
460        assert_eq!(caps.server_parser, "ollama_qwen3coder");
461        assert!(!caps.honors_chat_template_kwargs);
462        assert_eq!(
463            caps.recommended_endpoint.as_deref(),
464            Some("/api/generate-raw")
465        );
466        assert!(!caps.text_tool_wire_format_supported);
467    }
468
469    #[test]
470    fn qwen35_ollama_does_not_preserve_thinking() {
471        reset();
472        let caps = lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4");
473        assert!(caps.native_tools);
474        assert!(caps.thinking);
475        assert!(
476            !caps.preserve_thinking,
477            "Qwen3.5 lacks the preserve_thinking kwarg — rely on the chat template's rolling checkpoint instead"
478        );
479        assert_eq!(caps.server_parser, "ollama_qwen3coder");
480        assert!(!caps.text_tool_wire_format_supported);
481    }
482
483    #[test]
484    fn qwen36_routed_providers_all_preserve_thinking() {
485        reset();
486        for (provider, model) in [
487            ("openrouter", "qwen/qwen3.6-plus"),
488            ("together", "Qwen/Qwen3.6-35B-A3B"),
489            ("huggingface", "Qwen/Qwen3.6-35B-A3B"),
490            ("fireworks", "accounts/fireworks/models/qwen3p6-plus"),
491            ("dashscope", "qwen3.6-plus"),
492            ("llamacpp", "unsloth/Qwen3.6-35B-A3B-GGUF"),
493            ("local", "Qwen3.6-35B-A3B"),
494            ("mlx", "unsloth/Qwen3.6-27B-UD-MLX-4bit"),
495            ("mlx", "Qwen/Qwen3.6-27B"),
496        ] {
497            let caps = lookup(provider, model);
498            assert!(caps.thinking, "{provider}/{model}: thinking");
499            assert!(
500                caps.preserve_thinking,
501                "{provider}/{model}: preserve_thinking must be on for Qwen3.6"
502            );
503            assert!(caps.native_tools, "{provider}/{model}: native_tools");
504            assert_ne!(
505                caps.server_parser, "ollama_qwen3coder",
506                "{provider}/{model}: only Ollama routes through the qwen3coder response parser"
507            );
508        }
509    }
510
511    #[test]
512    fn llamacpp_qwen_keeps_text_tool_wire_format() {
513        reset();
514        let caps = lookup("llamacpp", "unsloth/Qwen3.5-Coder-GGUF");
515        assert_eq!(caps.server_parser, "none");
516        assert!(caps.honors_chat_template_kwargs);
517        assert!(caps.text_tool_wire_format_supported);
518        assert_eq!(
519            caps.recommended_endpoint.as_deref(),
520            Some("/v1/chat/completions")
521        );
522    }
523
524    #[test]
525    fn dashscope_and_llamacpp_resolve_capabilities() {
526        reset();
527        // New sibling providers should fall through to `openai` for
528        // gpt-*  models even without dedicated rules.
529        let caps = lookup("dashscope", "gpt-5.4-preview");
530        assert!(caps.defer_loading);
531        let caps = lookup("llamacpp", "gpt-5.4-preview");
532        assert!(caps.defer_loading);
533    }
534
535    #[test]
536    fn unknown_provider_has_no_capabilities() {
537        reset();
538        let caps = lookup("my-custom-proxy", "foo-bar-1");
539        assert!(!caps.native_tools);
540        assert!(!caps.defer_loading);
541        assert!(caps.tool_search.is_empty());
542    }
543
544    #[test]
545    fn user_override_adds_new_provider() {
546        reset();
547        let toml_src = r#"
548[[provider.my-proxy]]
549model_match = "*"
550native_tools = true
551tool_search = ["hosted"]
552"#;
553        set_user_overrides_toml(toml_src).unwrap();
554        let caps = lookup("my-proxy", "anything");
555        assert!(caps.native_tools);
556        assert_eq!(caps.tool_search, vec!["hosted"]);
557        clear_user_overrides();
558    }
559
560    #[test]
561    fn user_override_takes_precedence_over_builtin() {
562        reset();
563        let toml_src = r#"
564[[provider.anthropic]]
565model_match = "claude-opus-*"
566native_tools = true
567defer_loading = false
568tool_search = []
569"#;
570        set_user_overrides_toml(toml_src).unwrap();
571        let caps = lookup("anthropic", "claude-opus-4-7");
572        assert!(caps.native_tools);
573        assert!(!caps.defer_loading);
574        assert!(caps.tool_search.is_empty());
575        clear_user_overrides();
576    }
577
578    #[test]
579    fn user_override_from_manifest_toml() {
580        reset();
581        let manifest = r#"
582[package]
583name = "demo"
584
585[[capabilities.provider.my-proxy]]
586model_match = "*"
587native_tools = true
588tool_search = ["hosted"]
589"#;
590        set_user_overrides_from_manifest_toml(manifest).unwrap();
591        let caps = lookup("my-proxy", "foo");
592        assert!(caps.native_tools);
593        assert_eq!(caps.tool_search, vec!["hosted"]);
594        clear_user_overrides();
595    }
596
597    #[test]
598    fn version_min_requires_parseable_model() {
599        reset();
600        let toml_src = r#"
601[[provider.custom]]
602model_match = "*"
603version_min = [5, 4]
604native_tools = true
605"#;
606        set_user_overrides_toml(toml_src).unwrap();
607        // Unparseable model ID + version_min → rule doesn't match.
608        let caps = lookup("custom", "mystery-model");
609        assert!(!caps.native_tools);
610        clear_user_overrides();
611    }
612
613    #[test]
614    fn glob_match_substring() {
615        assert!(glob_match("*gpt*", "openai/gpt-5.4"));
616        assert!(glob_match("*claude*", "anthropic/claude-opus-4-7"));
617        assert!(!glob_match("*xyz*", "openai/gpt-5.4"));
618    }
619
620    #[test]
621    fn openrouter_namespaced_anthropic_model() {
622        reset();
623        let caps = lookup("anthropic", "anthropic/claude-opus-4-7");
624        assert!(caps.defer_loading);
625    }
626}