Skip to main content

harn_vm/llm/
capabilities.rs

1//! Data-driven provider capabilities.
2//!
3//! The per-(provider, model) capability matrix (native tools, deferred
4//! tool loading, tool-search variants, prompt caching, extended thinking,
5//! max tool count) lives in the shipped `capabilities.toml` and is
6//! overridable per-project via `[[capabilities.provider.<name>]]` blocks
7//! in `harn.toml`. This module owns:
8//!
9//! - loading the built-in TOML (compiled in via `include_str!`);
10//! - merging user overrides on top;
11//! - matching a `(provider, model)` pair against the rule list with
12//!   glob + semver semantics;
13//! - exposing a stable `Capabilities` struct that the `LlmProvider`
14//!   trait delegates to as the single source of truth.
15//!
16//! Before this module the Anthropic / OpenAI gates were spread across
17//! `providers/anthropic.rs` (`claude_generation`, `claude_model_supports_tool_search`)
18//! and `providers/openai_compat.rs` (`gpt_generation`, `gpt_model_supports_tool_search`).
19//! Those parsers are still used here — they supply the version extractor —
20//! but the boolean gates that used to live alongside them are now data.
21
22use std::cell::RefCell;
23use std::collections::BTreeMap;
24use std::sync::OnceLock;
25
26use serde::Deserialize;
27
28use super::providers::anthropic::claude_generation;
29use super::providers::openai_compat::gpt_generation;
30
31/// Shipped default rules. Compiled into the binary at build time.
32const BUILTIN_TOML: &str = include_str!("capabilities.toml");
33
34/// Parsed on-disk capabilities schema. Public so harn-cli can
35/// construct one directly when wiring harn.toml overrides.
36#[derive(Debug, Clone, Deserialize, Default)]
37pub struct CapabilitiesFile {
38    /// Per-provider ordered rule lists. First matching rule wins.
39    #[serde(default)]
40    pub provider: BTreeMap<String, Vec<ProviderRule>>,
41    /// Sibling → canonical family mapping. Providers with no rule of
42    /// their own fall through to the named family (recursively).
43    #[serde(default)]
44    pub provider_family: BTreeMap<String, String>,
45}
46
47/// One row of the capability matrix.
48#[derive(Debug, Clone, Deserialize)]
49pub struct ProviderRule {
50    /// Glob pattern (supports leading / trailing `*` and a single mid-`*`).
51    /// Matched case-insensitively against the model ID.
52    pub model_match: String,
53    /// Optional `[major, minor]` lower bound. When set, the model ID
54    /// must parse via the provider's version extractor AND compare ≥
55    /// this tuple. Rules with an unparseable `version_min` for the
56    /// given model are skipped, not merged.
57    #[serde(default)]
58    pub version_min: Option<Vec<u32>>,
59    #[serde(default)]
60    pub native_tools: Option<bool>,
61    #[serde(default)]
62    pub defer_loading: Option<bool>,
63    #[serde(default)]
64    pub tool_search: Option<Vec<String>>,
65    #[serde(default)]
66    pub max_tools: Option<u32>,
67    #[serde(default)]
68    pub prompt_caching: Option<bool>,
69    #[serde(default)]
70    pub thinking: Option<bool>,
71    /// Carry `<think>...</think>` blocks in assistant history across turns.
72    /// Qwen3.6 exposes this as `chat_template_kwargs.preserve_thinking`;
73    /// Alibaba recommends enabling it for long-horizon agent loops so the
74    /// model doesn't re-derive context it already worked out in prior turns.
75    /// Anthropic's adaptive-thinking signature contract is stricter but plays
76    /// the same role there.
77    #[serde(default)]
78    pub preserve_thinking: Option<bool>,
79}
80
81/// Resolved capabilities for a `(provider, model)` pair. Unset rule
82/// fields resolve to `false` / empty / `None` so callers never have to
83/// unwrap an `Option<bool>` for what are really boolean gates.
84#[derive(Debug, Clone, Default, PartialEq, Eq)]
85pub struct Capabilities {
86    pub native_tools: bool,
87    pub defer_loading: bool,
88    pub tool_search: Vec<String>,
89    pub max_tools: Option<u32>,
90    pub prompt_caching: bool,
91    pub thinking: bool,
92    pub preserve_thinking: bool,
93}
94
95thread_local! {
96    /// Per-thread user overrides installed by the CLI at startup. Kept
97    /// thread-local (not process-static) to match the rest of the VM
98    /// state model — the VM is !Send and each VM thread owns its own
99    /// configuration.
100    static USER_OVERRIDES: RefCell<Option<CapabilitiesFile>> = const { RefCell::new(None) };
101}
102
103/// Lazily-parsed built-in rules. The `include_str!` content is a static
104/// constant; parsing it once per process is safe and free of ordering
105/// hazards.
106static BUILTIN: OnceLock<CapabilitiesFile> = OnceLock::new();
107
108fn builtin() -> &'static CapabilitiesFile {
109    BUILTIN.get_or_init(|| {
110        toml::from_str::<CapabilitiesFile>(BUILTIN_TOML)
111            .expect("capabilities.toml must parse at build time")
112    })
113}
114
115/// Install project-level overrides for the current thread. Usually
116/// called once at CLI bootstrap after reading `harn.toml`. Passing
117/// `None` clears any prior override.
118pub fn set_user_overrides(file: Option<CapabilitiesFile>) {
119    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = file);
120}
121
122/// Clear any thread-local user overrides. Used between test runs.
123pub fn clear_user_overrides() {
124    set_user_overrides(None);
125}
126
127/// Parse a TOML string containing the capabilities section's own shape
128/// (i.e. top-level `[[provider.X]]` + optional `[provider_family]`, the
129/// same layout used by the built-in `capabilities.toml`) and install as
130/// the current thread's override.
131pub fn set_user_overrides_toml(src: &str) -> Result<(), String> {
132    let parsed: CapabilitiesFile = toml::from_str(src).map_err(|e| e.to_string())?;
133    set_user_overrides(Some(parsed));
134    Ok(())
135}
136
137/// Extract the `[capabilities]` section from a full `harn.toml` source
138/// and install it as the current thread's override. The schema inside
139/// that section mirrors `CapabilitiesFile` but with every key prefixed
140/// by `capabilities.`:
141///
142/// ```toml
143/// [[capabilities.provider.my-proxy]]
144/// model_match = "*"
145/// native_tools = true
146/// tool_search = ["hosted"]
147/// ```
148pub fn set_user_overrides_from_manifest_toml(src: &str) -> Result<(), String> {
149    #[derive(Deserialize)]
150    struct Manifest {
151        #[serde(default)]
152        capabilities: Option<CapabilitiesFile>,
153    }
154    let parsed: Manifest = toml::from_str(src).map_err(|e| e.to_string())?;
155    set_user_overrides(parsed.capabilities);
156    Ok(())
157}
158
159/// Look up effective capabilities for a `(provider, model)` pair.
160/// Walks the provider_family chain until it finds a rule list that
161/// matches. Within any one provider's rule list, user overrides are
162/// consulted before the built-in rules. The first matching rule wins —
163/// later rules (and later layers in the family chain) are ignored.
164pub fn lookup(provider: &str, model: &str) -> Capabilities {
165    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
166    lookup_with(provider, model, builtin(), user.as_ref())
167}
168
169fn lookup_with(
170    provider: &str,
171    model: &str,
172    builtin: &CapabilitiesFile,
173    user: Option<&CapabilitiesFile>,
174) -> Capabilities {
175    // Special case: mock spoofs either shape. Try anthropic first
176    // (Claude-shape model strings) so `mock` + `claude-opus-4-7`
177    // resolves to the Anthropic capability row — the same behaviour
178    // the hardcoded dispatch gave before this refactor.
179    if provider == "mock" {
180        if let Some(caps) = try_match_layer(user, builtin, "anthropic", model, provider) {
181            return caps;
182        }
183        if let Some(caps) = try_match_layer(user, builtin, "openai", model, provider) {
184            return caps;
185        }
186        return Capabilities::default();
187    }
188
189    // Normal chain: walk provider → family(provider) → ... with a
190    // visited-guard to avoid cycles in malformed user overrides.
191    let mut current = provider.to_string();
192    let mut visited: std::collections::HashSet<String> = std::collections::HashSet::new();
193    while visited.insert(current.clone()) {
194        if let Some(caps) = try_match_layer(user, builtin, &current, model, provider) {
195            return caps;
196        }
197        let next = user
198            .and_then(|f| f.provider_family.get(&current))
199            .or_else(|| builtin.provider_family.get(&current))
200            .cloned();
201        match next {
202            Some(parent) => current = parent,
203            None => break,
204        }
205    }
206    Capabilities::default()
207}
208
209/// Try the ordered rule list for `layer_provider` (user rules first,
210/// then built-in rules). Returns `Some(caps)` on the first match, else
211/// `None`. `original_provider` is threaded through only for diagnostics.
212fn try_match_layer(
213    user: Option<&CapabilitiesFile>,
214    builtin: &CapabilitiesFile,
215    layer_provider: &str,
216    model: &str,
217    _original_provider: &str,
218) -> Option<Capabilities> {
219    if let Some(user) = user {
220        if let Some(rules) = user.provider.get(layer_provider) {
221            for rule in rules {
222                if rule_matches(rule, model) {
223                    return Some(rule_to_caps(rule));
224                }
225            }
226        }
227    }
228    if let Some(rules) = builtin.provider.get(layer_provider) {
229        for rule in rules {
230            if rule_matches(rule, model) {
231                return Some(rule_to_caps(rule));
232            }
233        }
234    }
235    None
236}
237
238fn rule_to_caps(rule: &ProviderRule) -> Capabilities {
239    Capabilities {
240        native_tools: rule.native_tools.unwrap_or(false),
241        defer_loading: rule.defer_loading.unwrap_or(false),
242        tool_search: rule.tool_search.clone().unwrap_or_default(),
243        max_tools: rule.max_tools,
244        prompt_caching: rule.prompt_caching.unwrap_or(false),
245        thinking: rule.thinking.unwrap_or(false),
246        preserve_thinking: rule.preserve_thinking.unwrap_or(false),
247    }
248}
249
250fn rule_matches(rule: &ProviderRule, model: &str) -> bool {
251    let lower = model.to_lowercase();
252    if !glob_match(&rule.model_match.to_lowercase(), &lower) {
253        return false;
254    }
255    if let Some(version_min) = &rule.version_min {
256        if version_min.len() != 2 {
257            return false;
258        }
259        let want = (version_min[0], version_min[1]);
260        let have = match extract_version(model) {
261            Some(v) => v,
262            // `version_min` was set but the model ID can't be parsed.
263            // Fail closed: skip this rule so more permissive catch-all
264            // rules below can still match.
265            None => return false,
266        };
267        if have < want {
268            return false;
269        }
270    }
271    true
272}
273
274/// Extract `(major, minor)` from a model ID by trying the Anthropic
275/// parser first (for `claude-*` shapes) then the OpenAI parser (`gpt-*`).
276/// Both parsers return `None` for shapes they don't recognise so this
277/// never mis-parses across families.
278fn extract_version(model: &str) -> Option<(u32, u32)> {
279    claude_generation(model).or_else(|| gpt_generation(model))
280}
281
282/// Simple glob matching with `*` wildcards. Mirrors the helper in
283/// `llm_config.rs` — keep them in sync if either ever grows regex or
284/// character-class support.
285fn glob_match(pattern: &str, input: &str) -> bool {
286    if let Some(prefix) = pattern.strip_suffix('*') {
287        if let Some(rest) = prefix.strip_prefix('*') {
288            // `*foo*` — substring match.
289            return input.contains(rest);
290        }
291        return input.starts_with(prefix);
292    }
293    if let Some(suffix) = pattern.strip_prefix('*') {
294        return input.ends_with(suffix);
295    }
296    if pattern.contains('*') {
297        let parts: Vec<&str> = pattern.split('*').collect();
298        if parts.len() == 2 {
299            return input.starts_with(parts[0]) && input.ends_with(parts[1]);
300        }
301        return input == pattern;
302    }
303    input == pattern
304}
305
306#[cfg(test)]
307mod tests {
308    use super::*;
309
310    fn reset() {
311        clear_user_overrides();
312    }
313
314    #[test]
315    fn anthropic_opus_47_gets_full_capabilities() {
316        reset();
317        let caps = lookup("anthropic", "claude-opus-4-7");
318        assert!(caps.native_tools);
319        assert!(caps.defer_loading);
320        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
321        assert!(caps.prompt_caching);
322        assert!(caps.thinking);
323        assert_eq!(caps.max_tools, Some(10000));
324    }
325
326    #[test]
327    fn anthropic_haiku_44_has_no_tool_search() {
328        reset();
329        let caps = lookup("anthropic", "claude-haiku-4-4");
330        // Haiku 4.4 falls through to the `claude-*` catch-all row.
331        assert!(caps.native_tools);
332        assert!(caps.prompt_caching);
333        assert!(!caps.defer_loading);
334        assert!(caps.tool_search.is_empty());
335    }
336
337    #[test]
338    fn anthropic_haiku_45_supports_tool_search() {
339        reset();
340        let caps = lookup("anthropic", "claude-haiku-4-5");
341        assert!(caps.defer_loading);
342        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
343    }
344
345    #[test]
346    fn old_claude_gets_catchall() {
347        reset();
348        let caps = lookup("anthropic", "claude-opus-3-5");
349        assert!(caps.native_tools);
350        assert!(caps.prompt_caching);
351        assert!(!caps.defer_loading);
352        assert!(caps.tool_search.is_empty());
353    }
354
355    #[test]
356    fn openai_gpt_54_supports_tool_search() {
357        reset();
358        let caps = lookup("openai", "gpt-5.4");
359        assert!(caps.defer_loading);
360        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
361    }
362
363    #[test]
364    fn openai_gpt_53_has_native_tools_only() {
365        reset();
366        let caps = lookup("openai", "gpt-5.3");
367        assert!(caps.native_tools);
368        assert!(!caps.defer_loading);
369        assert!(caps.tool_search.is_empty());
370    }
371
372    #[test]
373    fn openrouter_inherits_openai() {
374        reset();
375        let caps = lookup("openrouter", "gpt-5.4");
376        assert!(caps.defer_loading);
377        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
378    }
379
380    #[test]
381    fn groq_inherits_openai_family_only() {
382        reset();
383        let caps = lookup("groq", "gpt-5.5-preview");
384        assert!(caps.defer_loading);
385    }
386
387    #[test]
388    fn mock_with_claude_model_routes_to_anthropic() {
389        reset();
390        let caps = lookup("mock", "claude-sonnet-4-7");
391        assert!(caps.defer_loading);
392        assert_eq!(caps.tool_search, vec!["bm25", "regex"]);
393    }
394
395    #[test]
396    fn mock_with_gpt_model_routes_to_openai() {
397        reset();
398        let caps = lookup("mock", "gpt-5.4-preview");
399        assert!(caps.defer_loading);
400        assert_eq!(caps.tool_search, vec!["hosted", "client"]);
401    }
402
403    #[test]
404    fn qwen36_ollama_preserves_thinking() {
405        reset();
406        let caps = lookup("ollama", "qwen3.6:35b-a3b-coding-nvfp4");
407        assert!(caps.native_tools);
408        assert!(caps.thinking);
409        assert!(
410            caps.preserve_thinking,
411            "Qwen3.6 should enable preserve_thinking by default for long-horizon loops"
412        );
413    }
414
415    #[test]
416    fn qwen35_ollama_does_not_preserve_thinking() {
417        reset();
418        let caps = lookup("ollama", "qwen3.5:35b-a3b-coding-nvfp4");
419        assert!(caps.native_tools);
420        assert!(caps.thinking);
421        assert!(
422            !caps.preserve_thinking,
423            "Qwen3.5 lacks the preserve_thinking kwarg — rely on the chat template's rolling checkpoint instead"
424        );
425    }
426
427    #[test]
428    fn qwen36_routed_providers_all_preserve_thinking() {
429        reset();
430        for (provider, model) in [
431            ("openrouter", "qwen/qwen3.6-plus"),
432            ("together", "Qwen/Qwen3.6-35B-A3B"),
433            ("huggingface", "Qwen/Qwen3.6-35B-A3B"),
434            ("fireworks", "accounts/fireworks/models/qwen3p6-plus"),
435            ("dashscope", "qwen3.6-plus"),
436            ("llamacpp", "unsloth/Qwen3.6-35B-A3B-GGUF"),
437            ("local", "Qwen3.6-35B-A3B"),
438            ("mlx", "unsloth/Qwen3.6-27B-UD-MLX-4bit"),
439            ("mlx", "Qwen/Qwen3.6-27B"),
440        ] {
441            let caps = lookup(provider, model);
442            assert!(caps.thinking, "{provider}/{model}: thinking");
443            assert!(
444                caps.preserve_thinking,
445                "{provider}/{model}: preserve_thinking must be on for Qwen3.6"
446            );
447            assert!(caps.native_tools, "{provider}/{model}: native_tools");
448        }
449    }
450
451    #[test]
452    fn dashscope_and_llamacpp_resolve_capabilities() {
453        reset();
454        // New sibling providers should fall through to `openai` for
455        // gpt-*  models even without dedicated rules.
456        let caps = lookup("dashscope", "gpt-5.4-preview");
457        assert!(caps.defer_loading);
458        let caps = lookup("llamacpp", "gpt-5.4-preview");
459        assert!(caps.defer_loading);
460    }
461
462    #[test]
463    fn unknown_provider_has_no_capabilities() {
464        reset();
465        let caps = lookup("my-custom-proxy", "foo-bar-1");
466        assert!(!caps.native_tools);
467        assert!(!caps.defer_loading);
468        assert!(caps.tool_search.is_empty());
469    }
470
471    #[test]
472    fn user_override_adds_new_provider() {
473        reset();
474        let toml_src = r#"
475[[provider.my-proxy]]
476model_match = "*"
477native_tools = true
478tool_search = ["hosted"]
479"#;
480        set_user_overrides_toml(toml_src).unwrap();
481        let caps = lookup("my-proxy", "anything");
482        assert!(caps.native_tools);
483        assert_eq!(caps.tool_search, vec!["hosted"]);
484        clear_user_overrides();
485    }
486
487    #[test]
488    fn user_override_takes_precedence_over_builtin() {
489        reset();
490        let toml_src = r#"
491[[provider.anthropic]]
492model_match = "claude-opus-*"
493native_tools = true
494defer_loading = false
495tool_search = []
496"#;
497        set_user_overrides_toml(toml_src).unwrap();
498        let caps = lookup("anthropic", "claude-opus-4-7");
499        assert!(caps.native_tools);
500        assert!(!caps.defer_loading);
501        assert!(caps.tool_search.is_empty());
502        clear_user_overrides();
503    }
504
505    #[test]
506    fn user_override_from_manifest_toml() {
507        reset();
508        let manifest = r#"
509[package]
510name = "demo"
511
512[[capabilities.provider.my-proxy]]
513model_match = "*"
514native_tools = true
515tool_search = ["hosted"]
516"#;
517        set_user_overrides_from_manifest_toml(manifest).unwrap();
518        let caps = lookup("my-proxy", "foo");
519        assert!(caps.native_tools);
520        assert_eq!(caps.tool_search, vec!["hosted"]);
521        clear_user_overrides();
522    }
523
524    #[test]
525    fn version_min_requires_parseable_model() {
526        reset();
527        let toml_src = r#"
528[[provider.custom]]
529model_match = "*"
530version_min = [5, 4]
531native_tools = true
532"#;
533        set_user_overrides_toml(toml_src).unwrap();
534        // Unparseable model ID + version_min → rule doesn't match.
535        let caps = lookup("custom", "mystery-model");
536        assert!(!caps.native_tools);
537        clear_user_overrides();
538    }
539
540    #[test]
541    fn glob_match_substring() {
542        assert!(glob_match("*gpt*", "openai/gpt-5.4"));
543        assert!(glob_match("*claude*", "anthropic/claude-opus-4-7"));
544        assert!(!glob_match("*xyz*", "openai/gpt-5.4"));
545    }
546
547    #[test]
548    fn openrouter_namespaced_anthropic_model() {
549        reset();
550        let caps = lookup("anthropic", "anthropic/claude-opus-4-7");
551        assert!(caps.defer_loading);
552    }
553}