Skip to main content

harn_vm/llm/capabilities/
audit.rs

1//! Capability audit and the display/JSON provider matrix.
2//!
3//! Owns the [`ProviderCapabilityMatrixRow`] projection used by the CLI matrix
4//! surfaces (`matrix_rows`, `push_matrix_rows`, `rule_to_matrix_row`) and the
5//! tool-capability coverage audit that flags priced catalog models missing an
6//! explicit `native_tools` / `preferred_tool_format` rule
7//! (`audit_tool_capability_coverage` and the suggested-default helpers).
8
9use serde::Serialize;
10
11use super::lookup::{builtin, USER_OVERRIDES};
12use super::model::CapabilitiesFile;
13use super::rule::{
14    first_matching_rule, rule_preferred_tool_format, rule_structured_output,
15    rule_structured_output_mode, rule_thinking_block_style, rule_thinking_modes,
16    rule_tool_mode_parity, rule_vision, MatchedCapabilityRule, ProviderRule,
17};
18use super::BUILTIN_PROVIDERS_TOML;
19
20/// Display-oriented row for `harn provider catalog matrix`, the legacy
21/// `harn check --provider-matrix` surface, and the generated docs page. Rows
22/// are intentionally rule-shaped: `model` is the rule's `model_match` pattern,
23/// because the shipped capability source of truth is a first-match rule table
24/// rather than an exhaustive remote model inventory.
25#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
26pub struct ProviderCapabilityMatrixRow {
27    pub provider: String,
28    pub model: String,
29    pub version_min: Option<Vec<u32>>,
30    /// Whether this rule opts into field-wise fall-through
31    /// ([`ProviderRule::extends`]). Rows in this matrix are rule-shaped, so
32    /// an `extends` row honestly reports its OWN fields only — for a
33    /// matching model, unset fields resolve from later matching rows and
34    /// provider defaults rather than the printed per-rule values.
35    pub extends: bool,
36    pub thinking: Vec<String>,
37    pub vision: bool,
38    pub audio: bool,
39    pub pdf: bool,
40    pub video: bool,
41    pub streaming: bool,
42    pub files_api_supported: bool,
43    pub json_schema: Option<String>,
44    pub prefers_xml_scaffolding: bool,
45    pub reserved_tool_call_token: bool,
46    pub prefers_markdown_scaffolding: bool,
47    pub structured_output_mode: String,
48    pub supports_assistant_prefill: bool,
49    pub prefers_role_developer: bool,
50    pub prefers_xml_tools: bool,
51    pub thinking_block_style: String,
52    pub native_tools: bool,
53    pub text_tools: bool,
54    pub preferred_tool_format: String,
55    pub tool_mode_parity: String,
56    pub tools: bool,
57    pub cache: bool,
58    /// Serving-quality / precision trust verdict for this route. See
59    /// [`ProviderRule::serving_precision`]. `"unverified"` when unset.
60    pub serving_precision: String,
61    pub source: String,
62}
63
64#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
65pub struct ToolCapabilityAuditReport {
66    pub audited_models: usize,
67    pub gaps: Vec<ToolCapabilityAuditGap>,
68}
69
70impl ToolCapabilityAuditReport {
71    pub fn ok(&self) -> bool {
72        self.gaps.is_empty()
73    }
74
75    pub fn render_human(&self) -> String {
76        if self.gaps.is_empty() {
77            return format!(
78                "provider capability audit OK: {} priced chat models have explicit native_tools and preferred_tool_format rules",
79                self.audited_models
80            );
81        }
82
83        let mut out = format!(
84            "provider capability audit found {} catalog gaps among {} priced chat models:",
85            self.gaps.len(),
86            self.audited_models
87        );
88        for gap in &self.gaps {
89            let matched = match (&gap.rule_provider, &gap.rule_model_match) {
90                (Some(provider), Some(model_match)) => {
91                    format!("provider.{provider} model_match=\"{model_match}\"")
92                }
93                _ => "no matching rule".to_string(),
94            };
95            out.push_str(&format!(
96                "\n- {}:{} ({matched}) missing {}; suggest native_tools = {}, preferred_tool_format = \"{}\"",
97                gap.provider,
98                gap.model,
99                gap.missing_fields.join(", "),
100                gap.suggested_native_tools,
101                gap.suggested_preferred_tool_format,
102            ));
103        }
104        out
105    }
106}
107
108#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
109pub struct ToolCapabilityAuditGap {
110    pub provider: String,
111    pub model: String,
112    pub rule_provider: Option<String>,
113    pub rule_model_match: Option<String>,
114    pub missing_fields: Vec<String>,
115    pub suggested_native_tools: bool,
116    pub suggested_preferred_tool_format: String,
117}
118
119/// Return the currently-effective provider capability rule matrix. User
120/// override rows, when installed for the current thread, are emitted before
121/// built-in rows so the display mirrors lookup precedence.
122pub fn matrix_rows() -> Vec<ProviderCapabilityMatrixRow> {
123    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
124    let mut rows = Vec::new();
125    if let Some(user) = user.as_ref() {
126        push_matrix_rows(&mut rows, user, "project");
127    }
128    push_matrix_rows(&mut rows, builtin(), "builtin");
129    rows
130}
131
132/// Audit the currently effective provider/model catalog against the currently
133/// effective capability rules. This is the user-facing path used by the CLI
134/// when authors are adding provider catalog or capability override rows.
135pub fn audit_catalogued_chat_model_tool_capabilities() -> ToolCapabilityAuditReport {
136    let user = USER_OVERRIDES.with(|cell| cell.borrow().clone());
137    audit_tool_capability_coverage(
138        crate::llm_config::model_catalog_entries(),
139        builtin(),
140        user.as_ref(),
141    )
142}
143
144/// Audit the built-in catalog only. The CI test uses this path so external
145/// provider config cannot hide a gap in the shipped TOML assets.
146pub fn audit_builtin_catalogued_chat_model_tool_capabilities() -> ToolCapabilityAuditReport {
147    let catalog = crate::llm_config::parse_config_toml(BUILTIN_PROVIDERS_TOML)
148        .expect("providers.toml must parse at build time");
149    audit_tool_capability_coverage(catalog.models, builtin(), None)
150}
151
152fn audit_tool_capability_coverage<I>(
153    models: I,
154    builtin: &CapabilitiesFile,
155    user: Option<&CapabilitiesFile>,
156) -> ToolCapabilityAuditReport
157where
158    I: IntoIterator<Item = (String, crate::llm_config::ModelDef)>,
159{
160    let mut gaps = Vec::new();
161    let mut audited_models = 0;
162
163    for (model_id, model) in models {
164        if model.pricing.is_none() {
165            continue;
166        }
167        audited_models += 1;
168        let matched = first_matching_rule(user, builtin, &model.provider, &model_id);
169        let mut missing_fields = Vec::new();
170        match matched.as_ref().map(|matched| &matched.rule) {
171            Some(rule) => {
172                if rule.native_tools.is_none() {
173                    missing_fields.push("native_tools".to_string());
174                }
175                if rule.preferred_tool_format.is_none() {
176                    missing_fields.push("preferred_tool_format".to_string());
177                }
178            }
179            None => {
180                missing_fields.push("native_tools".to_string());
181                missing_fields.push("preferred_tool_format".to_string());
182            }
183        }
184        if missing_fields.is_empty() {
185            continue;
186        }
187
188        let (suggested_native_tools, suggested_preferred_tool_format) =
189            suggested_tool_capability_defaults(
190                &model.provider,
191                &model_id,
192                &model,
193                matched.as_ref(),
194            );
195        gaps.push(ToolCapabilityAuditGap {
196            provider: model.provider,
197            model: model_id,
198            rule_provider: matched.as_ref().map(|matched| matched.provider.clone()),
199            // Honest per-rule provenance: an `extends` fall-through chain
200            // reports every absorbed rule pattern in precedence order, not a
201            // fake single source row.
202            rule_model_match: matched.map(|matched| matched.matched_patterns.join(" -> ")),
203            missing_fields,
204            suggested_native_tools,
205            suggested_preferred_tool_format,
206        });
207    }
208
209    gaps.sort_by(|left, right| {
210        left.provider
211            .cmp(&right.provider)
212            .then_with(|| left.model.cmp(&right.model))
213    });
214    ToolCapabilityAuditReport {
215        audited_models,
216        gaps,
217    }
218}
219
220fn suggested_tool_capability_defaults(
221    provider: &str,
222    model_id: &str,
223    model: &crate::llm_config::ModelDef,
224    matched: Option<&MatchedCapabilityRule>,
225) -> (bool, String) {
226    if let Some(rule) = matched.map(|matched| &matched.rule) {
227        let native_tools = rule.native_tools.unwrap_or_else(|| {
228            // Resolve native_tools from the pinned tool_format via its channel
229            // so `json` (a TEXT-channel format) correctly implies
230            // native_tools = false, identically to `text`. Falling through to
231            // the provider heuristic for `json` would wrongly mark a gemini /
232            // cerebras row native. Unknown formats keep the heuristic.
233            match rule
234                .preferred_tool_format
235                .as_deref()
236                .and_then(crate::llm_config::tool_format_channel)
237            {
238                Some(crate::llm_config::ToolFormatChannel::Native) => true,
239                Some(crate::llm_config::ToolFormatChannel::Text) => false,
240                None => suggested_native_tools(provider, model_id, model),
241            }
242        });
243        let preferred_tool_format = rule
244            .preferred_tool_format
245            .clone()
246            .unwrap_or_else(|| tool_format_for_native(native_tools));
247        return (native_tools, preferred_tool_format);
248    }
249
250    let native_tools = suggested_native_tools(provider, model_id, model);
251    (native_tools, tool_format_for_native(native_tools))
252}
253
254fn suggested_native_tools(
255    provider: &str,
256    model_id: &str,
257    model: &crate::llm_config::ModelDef,
258) -> bool {
259    if provider == "anthropic" || model_id.contains("claude") {
260        return true;
261    }
262    if matches!(
263        provider,
264        "openai" | "gemini" | "cerebras" | "bedrock" | "azure_openai" | "vertex"
265    ) {
266        return true;
267    }
268    model
269        .capabilities
270        .iter()
271        .any(|capability| capability == "tools")
272}
273
274/// The derived `preferred_tool_format` for a capability row (or unmatched
275/// model) that does not pin one. Native-capable models derive `native`;
276/// text-channel models derive `json` (fenced-JSON), the GLOBAL text-channel
277/// default. Heredoc (`text`) is never auto-derived — it is reachable only via
278/// an explicit `preferred_tool_format = "text"` pin or an explicit request (the
279/// reverse safety valve). This is the primary default site: it fires for every
280/// model that matches a capability row without an explicit format pin.
281fn tool_format_for_native(native_tools: bool) -> String {
282    if native_tools {
283        "native".to_string()
284    } else {
285        "json".to_string()
286    }
287}
288
289fn push_matrix_rows(
290    rows: &mut Vec<ProviderCapabilityMatrixRow>,
291    file: &CapabilitiesFile,
292    source: &str,
293) {
294    for (provider, rules) in &file.provider {
295        for rule in rules {
296            rows.push(rule_to_matrix_row(provider, rule, source));
297        }
298    }
299}
300
301fn rule_to_matrix_row(
302    provider: &str,
303    rule: &ProviderRule,
304    source: &str,
305) -> ProviderCapabilityMatrixRow {
306    ProviderCapabilityMatrixRow {
307        provider: provider.to_string(),
308        model: rule.model_match.clone(),
309        version_min: rule.version_min.clone(),
310        extends: rule.extends,
311        thinking: rule_thinking_modes(rule),
312        vision: rule_vision(rule),
313        audio: rule.audio.unwrap_or(false),
314        pdf: rule.pdf.unwrap_or(false),
315        video: rule.video.unwrap_or(false),
316        streaming: true,
317        files_api_supported: rule.files_api_supported.unwrap_or(false),
318        json_schema: rule_structured_output(rule),
319        prefers_xml_scaffolding: rule.prefers_xml_scaffolding.unwrap_or(false),
320        reserved_tool_call_token: rule.reserved_tool_call_token.unwrap_or(false),
321        prefers_markdown_scaffolding: rule.prefers_markdown_scaffolding.unwrap_or(false),
322        structured_output_mode: rule_structured_output_mode(rule),
323        supports_assistant_prefill: rule.supports_assistant_prefill.unwrap_or(false),
324        prefers_role_developer: rule
325            .prefers_role_developer
326            .unwrap_or_else(|| rule.requires_completion_tokens.unwrap_or(false)),
327        prefers_xml_tools: rule.prefers_xml_tools.unwrap_or(false),
328        thinking_block_style: rule_thinking_block_style(rule),
329        native_tools: rule.native_tools.unwrap_or(false),
330        text_tools: rule.text_tool_wire_format_supported.unwrap_or(true),
331        preferred_tool_format: rule_preferred_tool_format(rule),
332        tool_mode_parity: rule_tool_mode_parity(rule),
333        tools: rule.native_tools.unwrap_or(false)
334            || rule.text_tool_wire_format_supported.unwrap_or(true),
335        cache: rule.prompt_caching.unwrap_or(false),
336        serving_precision: rule
337            .serving_precision
338            .clone()
339            .unwrap_or_else(|| "unverified".to_string()),
340        source: source.to_string(),
341    }
342}
343
344#[cfg(test)]
345mod tests {
346    use super::super::lookup::clear_user_overrides;
347    use super::*;
348
349    fn reset() {
350        clear_user_overrides();
351    }
352
353    #[test]
354    fn every_catalogued_chat_model_has_explicit_tool_capabilities() {
355        reset();
356        let report = audit_builtin_catalogued_chat_model_tool_capabilities();
357        assert!(report.ok(), "{}", report.render_human());
358    }
359
360    #[test]
361    fn every_catalogued_alias_has_explicit_tool_capabilities() {
362        // The model-level audit only covers priced catalog `models`, so a
363        // `[[provider.local]]` / Ollama alias (e.g. the local gemma-4 route in
364        // Fix A) could omit native_tools/preferred_tool_format and silently
365        // degrade to text tools without tripping a test. Walk every alias's
366        // (provider, id) through the same matcher and require explicit fields.
367        reset();
368        let catalog = crate::llm_config::parse_config_toml(BUILTIN_PROVIDERS_TOML)
369            .expect("providers.toml must parse at build time");
370        let builtin = builtin();
371        let mut gaps = Vec::new();
372        for (alias, def) in &catalog.aliases {
373            let matched = first_matching_rule(None, builtin, &def.provider, &def.id);
374            let explicit = matched
375                .as_ref()
376                .map(|matched| {
377                    matched.rule.native_tools.is_some()
378                        && matched.rule.preferred_tool_format.is_some()
379                })
380                .unwrap_or(false);
381            if !explicit {
382                gaps.push(format!(
383                    "{alias} -> {}:{} (rule={})",
384                    def.provider,
385                    def.id,
386                    matched
387                        .as_ref()
388                        .map(|matched| matched.rule.model_match.as_str())
389                        .unwrap_or("<none>")
390                ));
391            }
392        }
393        assert!(
394            gaps.is_empty(),
395            "aliases missing explicit native_tools/preferred_tool_format:\n- {}",
396            gaps.join("\n- ")
397        );
398    }
399
400    #[test]
401    fn tool_capability_audit_reports_suggested_defaults() {
402        reset();
403        let capabilities: CapabilitiesFile = toml::from_str(
404            r#"
405[[provider.acme]]
406model_match = "acme-good-*"
407preferred_tool_format = "native"
408"#,
409        )
410        .unwrap();
411        let report = audit_tool_capability_coverage(
412            vec![(
413                "acme-good-1".to_string(),
414                crate::llm_config::ModelDef {
415                    name: "Acme Good".to_string(),
416                    provider: "acme".to_string(),
417                    context_window: 128_000,
418                    logical_model: None,
419                    equivalence_group: None,
420                    served_variant: None,
421                    wire_model: None,
422                    api_dialect: None,
423                    rate_limits: None,
424                    performance: None,
425                    architecture: None,
426                    local_memory: None,
427                    runtime_context_window: None,
428                    stream_timeout: None,
429                    capabilities: Vec::new(),
430                    pricing: Some(crate::llm_config::ModelPricing {
431                        input_per_mtok: 1.0,
432                        output_per_mtok: 2.0,
433                        cache_read_per_mtok: None,
434                        cache_write_per_mtok: None,
435                    }),
436                    deprecated: false,
437                    deprecation_note: None,
438                    superseded_by: None,
439                    fast_mode: None,
440                    quality_tags: Vec::new(),
441                    availability: crate::llm_config::ModelAvailability::Serverless,
442                    tier: None,
443                    open_weight: None,
444                    strengths: Vec::new(),
445                    benchmarks: std::collections::BTreeMap::new(),
446                    family: None,
447                    lineage: None,
448                    complementary_with: Vec::new(),
449                    avoid_as_reviewer_for: Vec::new(),
450                },
451            )],
452            &capabilities,
453            None,
454        );
455
456        assert!(!report.ok());
457        assert_eq!(report.audited_models, 1);
458        assert_eq!(report.gaps.len(), 1);
459        assert_eq!(report.gaps[0].missing_fields, ["native_tools"]);
460        assert!(report.gaps[0].suggested_native_tools);
461        assert_eq!(report.gaps[0].suggested_preferred_tool_format, "native");
462        assert!(report.render_human().contains(
463            "acme:acme-good-1 (provider.acme model_match=\"acme-good-*\") missing native_tools; suggest native_tools = true, preferred_tool_format = \"native\""
464        ));
465    }
466
467    #[test]
468    fn matrix_rows_include_provider_patterns_and_sources() {
469        reset();
470        let rows = matrix_rows();
471        assert!(rows.iter().any(|row| {
472            row.provider == "openai"
473                && row.model == "gpt-4o*"
474                && row.vision
475                && row.audio
476                && row.json_schema.as_deref() == Some("native")
477                && row.source == "builtin"
478        }));
479    }
480}