Skip to main content

harn_vm/
llm_config.rs

1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::BTreeMap;
4use std::sync::OnceLock;
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8
9thread_local! {
10    /// Thread-local provider config overlays installed by the CLI after it
11    /// reads the nearest `harn.toml` plus any installed package manifests.
12    /// Kept thread-local so tests and multi-VM hosts can scope extensions to
13    /// the current run without mutating the process-wide default config.
14    static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
15}
16
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct ProvidersConfig {
19    #[serde(default)]
20    pub default_provider: Option<String>,
21    #[serde(default)]
22    pub providers: BTreeMap<String, ProviderDef>,
23    #[serde(default)]
24    pub aliases: BTreeMap<String, AliasDef>,
25    #[serde(default)]
26    pub alias_tool_calling: BTreeMap<String, AliasToolCallingDef>,
27    #[serde(default)]
28    pub models: BTreeMap<String, ModelDef>,
29    #[serde(default)]
30    pub qc_defaults: BTreeMap<String, String>,
31    #[serde(default)]
32    pub inference_rules: Vec<InferenceRule>,
33    #[serde(default)]
34    pub tier_rules: Vec<TierRule>,
35    #[serde(default)]
36    pub tier_defaults: TierDefaults,
37    #[serde(default)]
38    pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
39}
40
41impl ProvidersConfig {
42    pub fn is_empty(&self) -> bool {
43        self.default_provider.is_none()
44            && self.providers.is_empty()
45            && self.aliases.is_empty()
46            && self.alias_tool_calling.is_empty()
47            && self.models.is_empty()
48            && self.qc_defaults.is_empty()
49            && self.inference_rules.is_empty()
50            && self.tier_rules.is_empty()
51            && self.model_defaults.is_empty()
52            && self.tier_defaults.default == default_mid()
53    }
54
55    pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
56        self.providers.extend(overlay.providers.clone());
57        self.aliases.extend(overlay.aliases.clone());
58        self.alias_tool_calling
59            .extend(overlay.alias_tool_calling.clone());
60        self.models.extend(overlay.models.clone());
61        self.qc_defaults.extend(overlay.qc_defaults.clone());
62
63        if overlay.default_provider.is_some() {
64            self.default_provider = overlay.default_provider.clone();
65        }
66
67        if !overlay.inference_rules.is_empty() {
68            let mut merged = overlay.inference_rules.clone();
69            merged.extend(self.inference_rules.clone());
70            self.inference_rules = merged;
71        }
72
73        if !overlay.tier_rules.is_empty() {
74            let mut merged = overlay.tier_rules.clone();
75            merged.extend(self.tier_rules.clone());
76            self.tier_rules = merged;
77        }
78
79        if overlay.tier_defaults.default != default_mid() {
80            self.tier_defaults = overlay.tier_defaults.clone();
81        }
82
83        for (pattern, defaults) in &overlay.model_defaults {
84            self.model_defaults
85                .entry(pattern.clone())
86                .or_default()
87                .extend(defaults.clone());
88        }
89    }
90}
91
92#[derive(Debug, Clone, Deserialize)]
93pub struct ProviderDef {
94    #[serde(default)]
95    pub display_name: Option<String>,
96    #[serde(default)]
97    pub icon: Option<String>,
98    pub base_url: String,
99    #[serde(default)]
100    pub base_url_env: Option<String>,
101    #[serde(default = "default_bearer")]
102    pub auth_style: String,
103    #[serde(default)]
104    pub auth_header: Option<String>,
105    #[serde(default)]
106    pub auth_env: AuthEnv,
107    #[serde(default)]
108    pub extra_headers: BTreeMap<String, String>,
109    #[serde(default)]
110    pub chat_endpoint: String,
111    #[serde(default)]
112    pub completion_endpoint: Option<String>,
113    #[serde(default)]
114    pub healthcheck: Option<HealthcheckDef>,
115    #[serde(default)]
116    pub features: Vec<String>,
117    /// Fallback provider name to try if this provider fails.
118    #[serde(default)]
119    pub fallback: Option<String>,
120    /// Number of retries before falling back (default 0).
121    #[serde(default)]
122    pub retry_count: Option<u32>,
123    /// Delay between retries in milliseconds (default 1000).
124    #[serde(default)]
125    pub retry_delay_ms: Option<u64>,
126    /// Maximum requests per minute. None = unlimited.
127    #[serde(default)]
128    pub rpm: Option<u32>,
129    /// Provider/catalog pricing in USD per 1k input tokens.
130    #[serde(default)]
131    pub cost_per_1k_in: Option<f64>,
132    /// Provider/catalog pricing in USD per 1k output tokens.
133    #[serde(default)]
134    pub cost_per_1k_out: Option<f64>,
135    /// Observed or configured p50 latency in milliseconds.
136    #[serde(default)]
137    pub latency_p50_ms: Option<u64>,
138}
139
140impl Default for ProviderDef {
141    fn default() -> Self {
142        Self {
143            display_name: None,
144            icon: None,
145            base_url: String::new(),
146            base_url_env: None,
147            auth_style: default_bearer(),
148            auth_header: None,
149            auth_env: AuthEnv::None,
150            extra_headers: BTreeMap::new(),
151            chat_endpoint: String::new(),
152            completion_endpoint: None,
153            healthcheck: None,
154            features: Vec::new(),
155            fallback: None,
156            retry_count: None,
157            retry_delay_ms: None,
158            rpm: None,
159            cost_per_1k_in: None,
160            cost_per_1k_out: None,
161            latency_p50_ms: None,
162        }
163    }
164}
165
166fn default_bearer() -> String {
167    "bearer".to_string()
168}
169
170/// Auth env var name(s) for the provider. Can be a single string or an array
171/// (tried in order until one is set).
172#[derive(Debug, Clone, Deserialize, Default)]
173#[serde(untagged)]
174pub enum AuthEnv {
175    #[default]
176    None,
177    Single(String),
178    Multiple(Vec<String>),
179}
180
181#[derive(Debug, Clone, Deserialize)]
182pub struct HealthcheckDef {
183    pub method: String,
184    #[serde(default)]
185    pub path: Option<String>,
186    #[serde(default)]
187    pub url: Option<String>,
188    #[serde(default)]
189    pub body: Option<String>,
190}
191
192#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
193pub struct AliasDef {
194    pub id: String,
195    pub provider: String,
196    /// Per-model tool format override: "native" or "text". When set, this
197    /// takes precedence over the provider-level default. Models with strong
198    /// tool-calling fine-tuning (Kimi-K2.5, GPT-4o) should use "native";
199    /// models better served by text-based tool calling use "text".
200    #[serde(default)]
201    pub tool_format: Option<String>,
202}
203
204#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
205pub struct AliasToolCallingDef {
206    #[serde(default)]
207    #[serde(skip_serializing_if = "Option::is_none")]
208    pub native: Option<String>,
209    #[serde(default)]
210    #[serde(skip_serializing_if = "Option::is_none")]
211    pub text: Option<String>,
212    #[serde(default)]
213    #[serde(skip_serializing_if = "Option::is_none")]
214    pub streaming_native: Option<String>,
215    #[serde(default)]
216    #[serde(skip_serializing_if = "Option::is_none")]
217    pub fallback_mode: Option<String>,
218    #[serde(default)]
219    #[serde(skip_serializing_if = "Option::is_none")]
220    pub failure_reason: Option<String>,
221    #[serde(default)]
222    #[serde(skip_serializing_if = "Option::is_none")]
223    pub last_probe_at: Option<String>,
224}
225
226#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
227pub struct ModelPricing {
228    pub input_per_mtok: f64,
229    pub output_per_mtok: f64,
230    #[serde(default)]
231    pub cache_read_per_mtok: Option<f64>,
232    #[serde(default)]
233    pub cache_write_per_mtok: Option<f64>,
234}
235
236#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
237pub struct ModelDef {
238    pub name: String,
239    pub provider: String,
240    pub context_window: u64,
241    #[serde(default)]
242    pub runtime_context_window: Option<u64>,
243    #[serde(default)]
244    pub stream_timeout: Option<f64>,
245    #[serde(default)]
246    pub capabilities: Vec<String>,
247    #[serde(default)]
248    pub pricing: Option<ModelPricing>,
249    #[serde(default)]
250    pub deprecated: bool,
251    #[serde(default)]
252    pub deprecation_note: Option<String>,
253    #[serde(default)]
254    pub quality_tags: Vec<String>,
255    #[serde(default)]
256    pub prefer_prefill_done: Option<bool>,
257}
258
259#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
260pub struct ResolvedModel {
261    pub id: String,
262    pub provider: String,
263    pub alias: Option<String>,
264    pub tool_format: String,
265    pub tier: String,
266}
267
268#[derive(Debug, Clone, Deserialize)]
269pub struct InferenceRule {
270    #[serde(default)]
271    pub pattern: Option<String>,
272    #[serde(default)]
273    pub contains: Option<String>,
274    #[serde(default)]
275    pub exact: Option<String>,
276    pub provider: String,
277}
278
279#[derive(Debug, Clone, Deserialize)]
280pub struct TierRule {
281    #[serde(default)]
282    pub pattern: Option<String>,
283    #[serde(default)]
284    pub contains: Option<String>,
285    #[serde(default)]
286    pub exact: Option<String>,
287    pub tier: String,
288}
289
290#[derive(Debug, Clone, Deserialize)]
291pub struct TierDefaults {
292    #[serde(default = "default_mid")]
293    pub default: String,
294}
295
296impl Default for TierDefaults {
297    fn default() -> Self {
298        Self {
299            default: default_mid(),
300        }
301    }
302}
303
304fn default_mid() -> String {
305    "mid".to_string()
306}
307
308/// Load and cache the providers config. Called once at VM startup.
309pub fn load_config() -> &'static ProvidersConfig {
310    CONFIG.get_or_init(|| {
311        let mut config = default_config();
312        let verbose_config_logging = matches!(
313            std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
314            Some("1" | "true" | "TRUE" | "yes" | "YES")
315        ) || matches!(
316            std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
317            Some("1" | "true" | "TRUE" | "yes" | "YES")
318        );
319        if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
320            if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
321                config.merge_from(&overlay);
322                let _ = CONFIG_PATH.set(path);
323                return config;
324            }
325        }
326        if let Some(home) = dirs_or_home() {
327            let path = format!("{home}/.config/harn/providers.toml");
328            if let Some(overlay) = read_external_config(&path, false) {
329                config.merge_from(&overlay);
330                let _ = CONFIG_PATH.set(path);
331                return config;
332            }
333        }
334        config
335    })
336}
337
338fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
339    match std::fs::read_to_string(path) {
340        Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
341            Ok(config) => {
342                if verbose {
343                    eprintln!(
344                        "[llm_config] Loaded {} providers, {} aliases from {}",
345                        config.providers.len(),
346                        config.aliases.len(),
347                        path
348                    );
349                }
350                Some(config)
351            }
352            Err(error) => {
353                eprintln!("[llm_config] TOML parse error in {}: {}", path, error);
354                None
355            }
356        },
357        Err(error) => {
358            if verbose {
359                eprintln!("[llm_config] Cannot read {}: {}", path, error);
360            }
361            None
362        }
363    }
364}
365
366/// Parse a provider/model catalog overlay in the same shape as
367/// `providers.toml` or `[llm]` package-manifest sections.
368pub fn parse_config_toml(src: &str) -> Result<ProvidersConfig, toml::de::Error> {
369    toml::from_str::<ProvidersConfig>(src)
370}
371
372/// Returns the filesystem path of the currently-loaded providers config, if
373/// any. Returns `None` when built-in defaults are active.
374pub fn loaded_config_path() -> Option<std::path::PathBuf> {
375    // Force lazy init so CONFIG_PATH is populated if a file was loaded.
376    let _ = load_config();
377    CONFIG_PATH.get().map(std::path::PathBuf::from)
378}
379
380/// Install per-run provider config overlays. The overlay uses the same shape as
381/// `providers.toml`, but lives under `[llm]` in `harn.toml` and package
382/// manifests. Passing `None` clears the overlay.
383pub fn set_user_overrides(config: Option<ProvidersConfig>) {
384    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
385}
386
387/// Clear per-run provider config overlays.
388pub fn clear_user_overrides() {
389    set_user_overrides(None);
390}
391
392fn effective_config() -> ProvidersConfig {
393    let mut merged = load_config().clone();
394    USER_OVERRIDES.with(|cell| {
395        if let Some(overlay) = cell.borrow().as_ref() {
396            merged.merge_from(overlay);
397        }
398    });
399    merged
400}
401
402/// Resolve a model alias to (model_id, provider_name).
403pub fn resolve_model(alias: &str) -> (String, Option<String>) {
404    let config = effective_config();
405    if let Some(a) = config.aliases.get(alias) {
406        return (a.id.clone(), Some(a.provider.clone()));
407    }
408    (normalize_model_id(alias), None)
409}
410
411/// Strip host/provider selector prefixes that identify transport, not the
412/// provider-native model id. This mirrors Burin's existing normalization so
413/// `ollama:qwen3:30b` reaches Ollama as `qwen3:30b` instead of an invalid
414/// model named `ollama`.
415pub fn normalize_model_id(raw: &str) -> String {
416    for prefix in ["ollama:", "local:", "huggingface:", "hf:"] {
417        if let Some(stripped) = raw.strip_prefix(prefix) {
418            return stripped.to_string();
419        }
420    }
421    raw.to_string()
422}
423
424/// Resolve an alias or selector into the complete catalog identity hosts need:
425/// provider inference, prefix-normalized model id, default tool format, and tier.
426pub fn resolve_model_info(selector: &str) -> ResolvedModel {
427    let config = effective_config();
428    if let Some(alias) = config.aliases.get(selector) {
429        let id = alias.id.clone();
430        let provider = alias.provider.clone();
431        let tool_format = alias
432            .tool_format
433            .clone()
434            .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
435        return ResolvedModel {
436            tier: model_tier_with_config(&config, &id),
437            id,
438            provider,
439            alias: Some(selector.to_string()),
440            tool_format,
441        };
442    }
443
444    let provider = infer_provider_with_config(&config, selector).provider;
445    let id = normalize_model_id(selector);
446    let tool_format = default_tool_format_with_config(&config, &id, &provider);
447    let tier = model_tier_with_config(&config, &id);
448    ResolvedModel {
449        id,
450        provider,
451        alias: None,
452        tool_format,
453        tier,
454    }
455}
456
457/// Infer provider from a model ID using inference rules.
458pub fn infer_provider(model_id: &str) -> String {
459    infer_provider_detail(model_id).provider
460}
461
462/// Infer provider from a model ID and retain whether the configured default was used.
463pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
464    let config = effective_config();
465    infer_provider_with_config(&config, model_id)
466}
467
468fn infer_provider_with_config(
469    config: &ProvidersConfig,
470    model_id: &str,
471) -> crate::llm::provider::ProviderInference {
472    if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
473        return crate::llm::provider::ProviderInference::builtin("ollama");
474    }
475    if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
476        return crate::llm::provider::ProviderInference::builtin("huggingface");
477    }
478    for rule in &config.inference_rules {
479        if let Some(exact) = &rule.exact {
480            if model_id == exact {
481                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
482            }
483        }
484        if let Some(pattern) = &rule.pattern {
485            if glob_match(pattern, model_id) {
486                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
487            }
488        }
489        if let Some(substr) = &rule.contains {
490            if model_id.contains(substr.as_str()) {
491                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
492            }
493        }
494    }
495    crate::llm::provider::infer_provider_from_model_id(
496        model_id,
497        &default_provider_with_config(config),
498    )
499}
500
501pub fn default_provider() -> String {
502    let config = effective_config();
503    default_provider_with_config(&config)
504}
505
506fn default_provider_with_config(config: &ProvidersConfig) -> String {
507    std::env::var("HARN_DEFAULT_PROVIDER")
508        .ok()
509        .map(|value| value.trim().to_string())
510        .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
511        .or_else(|| {
512            config
513                .default_provider
514                .as_deref()
515                .map(str::trim)
516                .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
517                .map(str::to_string)
518        })
519        .unwrap_or_else(|| "anthropic".to_string())
520}
521
522/// Get model tier ("small", "mid", "frontier").
523pub fn model_tier(model_id: &str) -> String {
524    let config = effective_config();
525    model_tier_with_config(&config, model_id)
526}
527
528fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
529    for rule in &config.tier_rules {
530        if let Some(exact) = &rule.exact {
531            if model_id == exact {
532                return rule.tier.clone();
533            }
534        }
535        if let Some(pattern) = &rule.pattern {
536            if glob_match(pattern, model_id) {
537                return rule.tier.clone();
538            }
539        }
540        if let Some(substr) = &rule.contains {
541            if model_id.contains(substr.as_str()) {
542                return rule.tier.clone();
543            }
544        }
545    }
546    let lower = model_id.to_lowercase();
547    if lower.contains("9b") || lower.contains("a3b") {
548        return "small".to_string();
549    }
550    if lower.starts_with("claude-") || lower == "gpt-4o" {
551        return "frontier".to_string();
552    }
553    config.tier_defaults.default.clone()
554}
555
556/// Get provider config for resolving base_url, auth, etc.
557pub fn provider_config(name: &str) -> Option<ProviderDef> {
558    effective_config().providers.get(name).cloned()
559}
560
561/// Get model-specific default parameters (temperature, etc.).
562/// Matches glob patterns in model_defaults keys.
563pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
564    let config = effective_config();
565    let mut params = BTreeMap::new();
566    for (pattern, defaults) in &config.model_defaults {
567        if glob_match(pattern, model_id) {
568            for (k, v) in defaults {
569                params.insert(k.clone(), v.clone());
570            }
571        }
572    }
573    params
574}
575
576/// Get list of configured provider names.
577pub fn provider_names() -> Vec<String> {
578    effective_config().providers.keys().cloned().collect()
579}
580
581/// Return every configured alias name, sorted deterministically.
582pub fn known_model_names() -> Vec<String> {
583    effective_config().aliases.keys().cloned().collect()
584}
585
586pub fn alias_entries() -> Vec<(String, AliasDef)> {
587    effective_config().aliases.into_iter().collect()
588}
589
590pub fn alias_tool_calling_entry(alias: &str) -> Option<AliasToolCallingDef> {
591    effective_config().alias_tool_calling.get(alias).cloned()
592}
593
594/// Return every configured model-catalog entry, sorted by provider then id.
595pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
596    let mut entries: Vec<_> = effective_config()
597        .models
598        .into_iter()
599        .map(|(id, model)| {
600            let provider = model.provider.clone();
601            (
602                id.clone(),
603                with_effective_capability_tags(id, provider, model),
604            )
605        })
606        .collect();
607    entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
608        model_a
609            .provider
610            .cmp(&model_b.provider)
611            .then_with(|| id_a.cmp(id_b))
612    });
613    entries
614}
615
616pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
617    effective_config()
618        .models
619        .get(model_id)
620        .cloned()
621        .map(|model| {
622            let provider = model.provider.clone();
623            with_effective_capability_tags(model_id.to_string(), provider, model)
624        })
625}
626
627pub fn qc_default_model(provider: &str) -> Option<String> {
628    std::env::var("BURIN_QC_MODEL")
629        .ok()
630        .filter(|value| !value.trim().is_empty())
631        .or_else(|| {
632            effective_config()
633                .qc_defaults
634                .get(&provider.to_lowercase())
635                .cloned()
636        })
637}
638
639pub fn default_model_for_provider(provider: &str) -> String {
640    match provider {
641        "local" => std::env::var("LOCAL_LLM_MODEL")
642            .or_else(|_| std::env::var("HARN_LLM_MODEL"))
643            .unwrap_or_else(|_| "gemma-4-26b-a4b-it".to_string()),
644        "mlx" => std::env::var("MLX_MODEL_ID")
645            .unwrap_or_else(|_| "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string()),
646        "openai" => "gpt-4o-mini".to_string(),
647        "ollama" => "llama3.2".to_string(),
648        "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
649        _ => "claude-sonnet-4-6".to_string(),
650    }
651}
652
653pub fn qc_defaults() -> BTreeMap<String, String> {
654    effective_config().qc_defaults
655}
656
657pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
658    effective_config()
659        .models
660        .get(model_id)
661        .and_then(|model| model.pricing.clone())
662}
663
664pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
665    model_pricing_per_mtok(model_id)
666        .map(|pricing| {
667            (
668                pricing.input_per_mtok / 1000.0,
669                pricing.output_per_mtok / 1000.0,
670            )
671        })
672        .or_else(|| {
673            let (input, output, _) = provider_economics(provider);
674            match (input, output) {
675                (Some(input), Some(output)) => Some((input, output)),
676                _ => None,
677            }
678        })
679}
680
681pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
682    match auth_env {
683        AuthEnv::None => Vec::new(),
684        AuthEnv::Single(name) => vec![name.clone()],
685        AuthEnv::Multiple(names) => names.clone(),
686    }
687}
688
689pub fn provider_key_available(provider: &str) -> bool {
690    let Some(pdef) = provider_config(provider) else {
691        return provider == "ollama";
692    };
693    if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
694        return true;
695    }
696    auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
697        std::env::var(env_name)
698            .ok()
699            .is_some_and(|value| !value.trim().is_empty())
700    })
701}
702
703pub fn available_provider_names() -> Vec<String> {
704    provider_names()
705        .into_iter()
706        .filter(|provider| provider_key_available(provider))
707        .collect()
708}
709
710/// Check if a provider advertises a legacy provider-level feature.
711pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
712    provider_config(provider)
713        .map(|p| p.features.iter().any(|f| f == feature))
714        .unwrap_or(false)
715}
716
717/// Provider-level catalog pricing/latency. Model-specific catalog pricing
718/// wins when available; this is the adapter-level fallback used by routing
719/// and portal summaries when a model has no explicit catalog entry.
720pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
721    provider_config(provider)
722        .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
723        .unwrap_or((None, None, None))
724}
725
726/// Resolve the default tool format for a model+provider combination.
727/// Priority: alias `tool_format` (matched by model ID) > provider/model
728/// capability matrix > legacy provider feature > "text".
729pub fn default_tool_format(model: &str, provider: &str) -> String {
730    let config = effective_config();
731    default_tool_format_with_config(&config, model, provider)
732}
733
734fn default_tool_format_with_config(
735    config: &ProvidersConfig,
736    model: &str,
737    provider: &str,
738) -> String {
739    // Aliases match by model ID + provider, or by alias name.
740    for (name, alias) in &config.aliases {
741        let matches = (alias.id == model && alias.provider == provider) || name == model;
742        if matches {
743            if let Some(ref fmt) = alias.tool_format {
744                return fmt.clone();
745            }
746        }
747    }
748    let capability_matrix_native = crate::llm::capabilities::lookup(provider, model).native_tools;
749    let legacy_provider_native = config
750        .providers
751        .get(provider)
752        .map(|p| p.features.iter().any(|f| f == "native_tools"))
753        .unwrap_or(false);
754    if capability_matrix_native || legacy_provider_native {
755        "native".to_string()
756    } else {
757        "text".to_string()
758    }
759}
760
761fn with_effective_capability_tags(
762    model_id: String,
763    provider: String,
764    mut model: ModelDef,
765) -> ModelDef {
766    model.capabilities = effective_model_capability_tags(&provider, &model_id);
767    model
768}
769
770/// Legacy display tags derived from the canonical provider/model capability
771/// matrix. The matrix is the source of truth; `models.*.capabilities` in
772/// providers.toml is accepted only for backwards-compatible parsing.
773pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
774    let caps = crate::llm::capabilities::lookup(provider, model_id);
775    let mut tags = Vec::new();
776    // Today all Harn chat providers expose streaming. Keep this as a
777    // transport baseline rather than a duplicated per-model declaration.
778    tags.push("streaming".to_string());
779    if caps.native_tools || caps.text_tool_wire_format_supported {
780        tags.push("tools".to_string());
781    }
782    if !caps.tool_search.is_empty() {
783        tags.push("tool_search".to_string());
784    }
785    if caps.vision || caps.vision_supported {
786        tags.push("vision".to_string());
787    }
788    if caps.audio {
789        tags.push("audio".to_string());
790    }
791    if caps.pdf {
792        tags.push("pdf".to_string());
793    }
794    if caps.files_api_supported {
795        tags.push("files".to_string());
796    }
797    if caps.prompt_caching {
798        tags.push("prompt_caching".to_string());
799    }
800    if !caps.thinking_modes.is_empty() {
801        tags.push("thinking".to_string());
802    }
803    if caps.interleaved_thinking_supported
804        || caps
805            .thinking_modes
806            .iter()
807            .any(|mode| mode == "adaptive" || mode == "effort")
808    {
809        tags.push("extended_thinking".to_string());
810    }
811    if caps.json_schema.is_some() {
812        tags.push("structured_output".to_string());
813    }
814    tags
815}
816
817/// Resolve a tier or alias into a concrete model/provider pair.
818pub fn resolve_tier_model(
819    target: &str,
820    preferred_provider: Option<&str>,
821) -> Option<(String, String)> {
822    let config = effective_config();
823
824    if let Some(alias) = config.aliases.get(target) {
825        return Some((alias.id.clone(), alias.provider.clone()));
826    }
827
828    let candidate_aliases = if let Some(provider) = preferred_provider {
829        vec![
830            format!("{provider}/{target}"),
831            format!("{provider}:{target}"),
832            format!("tier/{target}"),
833            target.to_string(),
834        ]
835    } else {
836        vec![format!("tier/{target}"), target.to_string()]
837    };
838
839    for alias_name in candidate_aliases {
840        if let Some(alias) = config.aliases.get(&alias_name) {
841            return Some((alias.id.clone(), alias.provider.clone()));
842        }
843    }
844
845    None
846}
847
848/// Return all configured alias-backed model/provider pairs whose resolved
849/// model falls into the requested capability tier. The result is de-duplicated
850/// and sorted deterministically by provider then model id.
851pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
852    let config = effective_config();
853    let mut seen = std::collections::BTreeSet::new();
854    let mut candidates = Vec::new();
855
856    for alias in config.aliases.values() {
857        let pair = (alias.id.clone(), alias.provider.clone());
858        if seen.contains(&pair) {
859            continue;
860        }
861        if model_tier(&alias.id) == target {
862            seen.insert(pair.clone());
863            candidates.push(pair);
864        }
865    }
866
867    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
868        provider_a
869            .cmp(provider_b)
870            .then_with(|| model_a.cmp(model_b))
871    });
872    candidates
873}
874
875/// Return all configured alias-backed model/provider pairs. Used by routing
876/// policies that need to compare alternatives across tiers.
877pub fn all_model_candidates() -> Vec<(String, String)> {
878    let config = effective_config();
879    let mut seen = std::collections::BTreeSet::new();
880    let mut candidates = Vec::new();
881
882    for alias in config.aliases.values() {
883        let pair = (alias.id.clone(), alias.provider.clone());
884        if seen.insert(pair.clone()) {
885            candidates.push(pair);
886        }
887    }
888
889    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
890        provider_a
891            .cmp(provider_b)
892            .then_with(|| model_a.cmp(model_b))
893    });
894    candidates
895}
896
897/// Simple glob matching for patterns like "claude-*", "qwen/*", "ollama:*".
898fn glob_match(pattern: &str, input: &str) -> bool {
899    if let Some(prefix) = pattern.strip_suffix('*') {
900        input.starts_with(prefix)
901    } else if let Some(suffix) = pattern.strip_prefix('*') {
902        input.ends_with(suffix)
903    } else if pattern.contains('*') {
904        let parts: Vec<&str> = pattern.split('*').collect();
905        if parts.len() == 2 {
906            input.starts_with(parts[0]) && input.ends_with(parts[1])
907        } else {
908            input == pattern
909        }
910    } else {
911        input == pattern
912    }
913}
914
915fn dirs_or_home() -> Option<String> {
916    std::env::var("HOME").ok()
917}
918
919/// Resolve the effective base URL for a provider, checking the `base_url_env`
920/// override first, then falling back to the configured `base_url`.
921pub fn resolve_base_url(pdef: &ProviderDef) -> String {
922    if let Some(env_name) = &pdef.base_url_env {
923        if let Ok(val) = std::env::var(env_name) {
924            // Strip surrounding quotes that some .env parsers leave intact.
925            let trimmed = val.trim().trim_matches('"').trim_matches('\'');
926            if !trimmed.is_empty() {
927                return trimmed.to_string();
928            }
929        }
930    }
931    pdef.base_url.clone()
932}
933
934/// Embedded copy of `llm/providers.toml`, the single source of truth for
935/// Harn's bundled provider/model catalog. Edit the TOML, not this string.
936const EMBEDDED_PROVIDERS_TOML: &str = include_str!("llm/providers.toml");
937
938/// Parse the embedded `providers.toml` into the runtime `ProvidersConfig`.
939///
940/// Hosts overlay this base via `HARN_PROVIDERS_CONFIG`,
941/// `~/.config/harn/providers.toml`, `harn.toml`, package-manifest
942/// `[llm]` sections, and per-run `set_user_overrides(...)`. The same
943/// Serde shape applies at every layer, so there is exactly one schema to
944/// keep coherent — no parallel Rust-literal catalog.
945///
946/// We `expect` on parse failure because the file is bundled into the
947/// binary at compile time; a malformed embedded catalog is a build-time
948/// invariant violation that should fail every test, not silently
949/// degrade in production.
950fn default_config() -> ProvidersConfig {
951    parse_config_toml(EMBEDDED_PROVIDERS_TOML)
952        .expect("embedded providers.toml must parse — invariant checked by harn-vm tests")
953}
954
955#[cfg(test)]
956fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
957    let mut config = default_config();
958    config.merge_from(&overlay);
959    config
960}
961
962#[cfg(test)]
963mod tests {
964    use super::*;
965
966    fn reset_overrides() {
967        clear_user_overrides();
968    }
969
970    #[test]
971    fn test_glob_match_prefix() {
972        assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
973        assert!(glob_match("gpt-*", "gpt-4o"));
974        assert!(!glob_match("claude-*", "gpt-4o"));
975    }
976
977    #[test]
978    fn test_glob_match_suffix() {
979        assert!(glob_match("*-latest", "llama3.2-latest"));
980        assert!(!glob_match("*-latest", "llama3.2"));
981    }
982
983    #[test]
984    fn test_glob_match_middle() {
985        assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
986        assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
987    }
988
989    #[test]
990    fn test_glob_match_exact() {
991        assert!(glob_match("gpt-4o", "gpt-4o"));
992        assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
993    }
994
995    #[test]
996    fn test_infer_provider_from_defaults() {
997        let _guard = crate::llm::env_lock().lock().expect("env lock");
998        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
999        unsafe {
1000            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1001        }
1002
1003        assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
1004        assert_eq!(infer_provider("gpt-4o"), "openai");
1005        assert_eq!(infer_provider("o1-preview"), "openai");
1006        assert_eq!(infer_provider("o3-mini"), "openai");
1007        assert_eq!(infer_provider("o4-mini"), "openai");
1008        assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
1009        assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
1010        assert_eq!(infer_provider("llama3.2:latest"), "ollama");
1011        assert_eq!(infer_provider("unknown-model"), "anthropic");
1012
1013        unsafe {
1014            match prev_default_provider {
1015                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1016                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1017            }
1018        }
1019    }
1020
1021    #[test]
1022    fn test_infer_provider_prefix_rules() {
1023        assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
1024        assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
1025        // Even when the id also contains `/`, the local transport prefix wins.
1026        assert_eq!(infer_provider("local:owner/model"), "ollama");
1027        assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
1028    }
1029
1030    #[test]
1031    fn test_openrouter_inference_requires_one_slash() {
1032        let _guard = crate::llm::env_lock().lock().expect("env lock");
1033        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1034        unsafe {
1035            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1036        }
1037
1038        assert_eq!(infer_provider("org/model"), "openrouter");
1039        assert_eq!(infer_provider("org/team/model"), "anthropic");
1040
1041        unsafe {
1042            match prev_default_provider {
1043                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1044                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1045            }
1046        }
1047    }
1048
1049    #[test]
1050    fn test_resolve_model_info_normalizes_provider_prefixes() {
1051        let local = resolve_model_info("local:gemma-4-e4b-it");
1052        assert_eq!(local.id, "gemma-4-e4b-it");
1053        assert_eq!(local.provider, "ollama");
1054
1055        let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
1056        assert_eq!(ollama.id, "qwen3:30b-a3b");
1057        assert_eq!(ollama.provider, "ollama");
1058
1059        let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
1060        assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
1061        assert_eq!(hf.provider, "huggingface");
1062    }
1063
1064    #[test]
1065    fn test_model_tier_from_defaults() {
1066        assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
1067        assert_eq!(model_tier("gpt-4o"), "frontier");
1068        assert_eq!(model_tier("Qwen3.5-9B"), "small");
1069        assert_eq!(model_tier("deepseek-v3"), "mid");
1070    }
1071
1072    #[test]
1073    fn test_resolve_model_unknown_alias() {
1074        let (id, provider) = resolve_model("gpt-4o");
1075        assert_eq!(id, "gpt-4o");
1076        assert!(provider.is_none());
1077    }
1078
1079    #[test]
1080    fn test_provider_names() {
1081        let names = provider_names();
1082        assert!(names.len() >= 7);
1083        assert!(names.contains(&"anthropic".to_string()));
1084        assert!(names.contains(&"together".to_string()));
1085        assert!(names.contains(&"local".to_string()));
1086        assert!(names.contains(&"mlx".to_string()));
1087        assert!(names.contains(&"openai".to_string()));
1088        assert!(names.contains(&"ollama".to_string()));
1089        assert!(names.contains(&"bedrock".to_string()));
1090        assert!(names.contains(&"azure_openai".to_string()));
1091        assert!(names.contains(&"vertex".to_string()));
1092    }
1093
1094    #[test]
1095    fn global_provider_file_is_an_overlay_on_builtin_defaults() {
1096        let mut overlay = ProvidersConfig {
1097            default_provider: Some("ollama".to_string()),
1098            ..Default::default()
1099        };
1100        overlay.aliases.insert(
1101            "quickstart".to_string(),
1102            AliasDef {
1103                id: "llama3.2".to_string(),
1104                provider: "ollama".to_string(),
1105                tool_format: None,
1106            },
1107        );
1108
1109        let merged = merge_global_config(overlay);
1110
1111        assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
1112        assert!(merged.providers.contains_key("anthropic"));
1113        assert!(merged.providers.contains_key("ollama"));
1114        assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
1115    }
1116
1117    #[test]
1118    fn test_resolve_tier_model_default_aliases() {
1119        // Exercise the alias-resolution machinery, not the specific catalog
1120        // value: the model under each tier alias evolves as the embedded
1121        // providers.toml is updated. The invariants worth pinning are the
1122        // provider routing + catalog-registration of the resolved model.
1123        let (model, provider) = resolve_tier_model("frontier", None)
1124            .expect("frontier alias must resolve from the embedded catalog");
1125        assert_eq!(provider, "anthropic");
1126        assert!(
1127            model_catalog_entry(&model)
1128                .is_some_and(|entry| entry.provider == "anthropic" && !entry.deprecated),
1129            "frontier alias must point at a registered, non-deprecated anthropic model (got {model})"
1130        );
1131
1132        let (model, provider) = resolve_tier_model("small", None)
1133            .expect("small alias must resolve from the embedded catalog");
1134        assert!(
1135            [
1136                "openrouter",
1137                "huggingface",
1138                "local",
1139                "llamacpp",
1140                "mlx",
1141                "ollama"
1142            ]
1143            .contains(&provider.as_str()),
1144            "small tier should resolve to an open-weight provider (got {provider} / {model})"
1145        );
1146    }
1147
1148    #[test]
1149    fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
1150        // tier/<provider> takes precedence over generic tier when the
1151        // caller scopes by provider. Don't pin the specific model — the
1152        // catalog evolves.
1153        let (model, provider) = resolve_tier_model("mid", Some("openai"))
1154            .expect("mid tier scoped to openai must resolve");
1155        assert_eq!(provider, "openai");
1156        assert!(
1157            model_catalog_entry(&model).is_some(),
1158            "mid/openai alias must point at a registered model (got {model})"
1159        );
1160    }
1161
1162    #[test]
1163    fn test_provider_config_anthropic() {
1164        let pdef = provider_config("anthropic").unwrap();
1165        assert_eq!(pdef.auth_style, "header");
1166        assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
1167    }
1168
1169    #[test]
1170    fn test_provider_config_mlx() {
1171        let pdef = provider_config("mlx").unwrap();
1172        assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
1173        assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
1174        assert_eq!(
1175            pdef.healthcheck.unwrap().path.as_deref(),
1176            Some("/v1/models")
1177        );
1178
1179        let (model, provider) = resolve_model("mlx-qwen36-27b");
1180        assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
1181        assert_eq!(provider.as_deref(), Some("mlx"));
1182    }
1183
1184    #[test]
1185    fn test_enterprise_provider_defaults_and_inference() {
1186        let bedrock = provider_config("bedrock").unwrap();
1187        assert_eq!(bedrock.auth_style, "aws_sigv4");
1188        assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
1189        assert_eq!(
1190            infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
1191            "bedrock"
1192        );
1193        assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
1194
1195        let azure = provider_config("azure_openai").unwrap();
1196        assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
1197        assert_eq!(
1198            auth_env_names(&azure.auth_env),
1199            vec![
1200                "AZURE_OPENAI_API_KEY".to_string(),
1201                "AZURE_OPENAI_AD_TOKEN".to_string(),
1202                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1203            ]
1204        );
1205
1206        let vertex = provider_config("vertex").unwrap();
1207        assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
1208        assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
1209    }
1210
1211    #[test]
1212    fn test_default_provider_env_override_for_unknown_model() {
1213        let _guard = crate::llm::env_lock().lock().expect("env lock");
1214        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1215        unsafe {
1216            std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
1217        }
1218
1219        let inference = infer_provider_detail("unknown-model");
1220
1221        unsafe {
1222            match prev_default_provider {
1223                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1224                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1225            }
1226        }
1227
1228        assert_eq!(inference.provider, "openai");
1229        assert_eq!(
1230            inference.source,
1231            crate::llm::provider::ProviderInferenceSource::DefaultFallback
1232        );
1233    }
1234
1235    #[test]
1236    fn test_resolve_base_url_no_env() {
1237        let pdef = ProviderDef {
1238            base_url: "https://example.com".to_string(),
1239            ..Default::default()
1240        };
1241        assert_eq!(resolve_base_url(&pdef), "https://example.com");
1242    }
1243
1244    #[test]
1245    fn test_default_config_roundtrip() {
1246        let config = default_config();
1247        assert!(!config.providers.is_empty());
1248        assert!(!config.inference_rules.is_empty());
1249        assert!(!config.tier_rules.is_empty());
1250        assert_eq!(config.tier_defaults.default, "mid");
1251    }
1252
1253    #[test]
1254    fn test_local_ollama_catalog_metadata() {
1255        reset_overrides();
1256
1257        let qwen_coding = model_catalog_entry("qwen3.6:35b-a3b-coding-nvfp4")
1258            .expect("qwen3.6 coding catalog entry");
1259        assert_eq!(qwen_coding.context_window, 262_144);
1260        assert!(!qwen_coding.capabilities.iter().any(|cap| cap == "vision"));
1261
1262        let gemma4 = model_catalog_entry("gemma4:26b").expect("gemma4 catalog entry");
1263        assert_eq!(gemma4.context_window, 262_144);
1264        assert!(gemma4.capabilities.iter().any(|cap| cap == "vision"));
1265    }
1266
1267    #[test]
1268    fn test_external_config_overlays_default_catalog() {
1269        let mut config = default_config();
1270        let mut overlay = ProvidersConfig {
1271            default_provider: Some("ollama".to_string()),
1272            ..Default::default()
1273        };
1274        overlay.providers.insert(
1275            "custom".to_string(),
1276            ProviderDef {
1277                base_url: "https://llm.example.test/v1".to_string(),
1278                chat_endpoint: "/chat/completions".to_string(),
1279                ..Default::default()
1280            },
1281        );
1282
1283        config.merge_from(&overlay);
1284
1285        assert_eq!(config.default_provider.as_deref(), Some("ollama"));
1286        assert!(config.providers.contains_key("custom"));
1287        assert!(config.providers.contains_key("anthropic"));
1288        assert!(config.providers.contains_key("ollama"));
1289    }
1290
1291    #[test]
1292    fn test_model_params_empty() {
1293        let params = model_params("claude-sonnet-4-20250514");
1294        assert!(params.is_empty());
1295    }
1296
1297    #[test]
1298    fn test_user_overrides_add_provider_and_alias() {
1299        reset_overrides();
1300        let mut overlay = ProvidersConfig::default();
1301        overlay.providers.insert(
1302            "acme".to_string(),
1303            ProviderDef {
1304                base_url: "https://llm.acme.test/v1".to_string(),
1305                chat_endpoint: "/chat/completions".to_string(),
1306                ..Default::default()
1307            },
1308        );
1309        overlay.aliases.insert(
1310            "acme-fast".to_string(),
1311            AliasDef {
1312                id: "acme/model-fast".to_string(),
1313                provider: "acme".to_string(),
1314                tool_format: Some("native".to_string()),
1315            },
1316        );
1317        set_user_overrides(Some(overlay));
1318
1319        let (model, provider) = resolve_model("acme-fast");
1320        assert_eq!(model, "acme/model-fast");
1321        assert_eq!(provider.as_deref(), Some("acme"));
1322        assert!(provider_names().contains(&"acme".to_string()));
1323        assert_eq!(
1324            provider_config("acme").map(|provider| provider.base_url),
1325            Some("https://llm.acme.test/v1".to_string())
1326        );
1327
1328        reset_overrides();
1329    }
1330
1331    #[test]
1332    fn test_default_tool_format_uses_capability_matrix() {
1333        reset_overrides();
1334
1335        assert_eq!(
1336            default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
1337            "text"
1338        );
1339        assert_eq!(
1340            default_tool_format("devstral-small-2:24b", "ollama"),
1341            "text"
1342        );
1343        assert_eq!(
1344            default_tool_format("ollama-devstral-small-2-native", "ollama"),
1345            "native"
1346        );
1347        assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "text");
1348    }
1349
1350    #[test]
1351    fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
1352        reset_overrides();
1353        let mut overlay = ProvidersConfig::default();
1354        overlay.models.insert(
1355            "acme/model-fast".to_string(),
1356            ModelDef {
1357                name: "Acme Fast".to_string(),
1358                provider: "acme".to_string(),
1359                context_window: 65_536,
1360                runtime_context_window: None,
1361                stream_timeout: Some(42.0),
1362                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1363                pricing: Some(ModelPricing {
1364                    input_per_mtok: 1.25,
1365                    output_per_mtok: 2.5,
1366                    cache_read_per_mtok: Some(0.25),
1367                    cache_write_per_mtok: None,
1368                }),
1369                deprecated: false,
1370                deprecation_note: None,
1371                quality_tags: Vec::new(),
1372                prefer_prefill_done: None,
1373            },
1374        );
1375        overlay
1376            .qc_defaults
1377            .insert("acme".to_string(), "acme/model-cheap".to_string());
1378        set_user_overrides(Some(overlay));
1379
1380        let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
1381        assert_eq!(entry.context_window, 65_536);
1382        assert_eq!(
1383            entry.capabilities,
1384            vec!["streaming".to_string(), "tools".to_string()]
1385        );
1386        assert_eq!(
1387            entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
1388            Some(1.25)
1389        );
1390        assert_eq!(
1391            pricing_per_1k_for("acme", "acme/model-fast"),
1392            Some((0.00125, 0.0025))
1393        );
1394        assert_eq!(
1395            qc_default_model("acme").as_deref(),
1396            Some("acme/model-cheap")
1397        );
1398
1399        reset_overrides();
1400    }
1401
1402    #[test]
1403    fn test_user_overrides_prepend_inference_rules() {
1404        reset_overrides();
1405        let mut overlay = ProvidersConfig::default();
1406        overlay.inference_rules.push(InferenceRule {
1407            pattern: Some("internal-*".to_string()),
1408            contains: None,
1409            exact: None,
1410            provider: "openai".to_string(),
1411        });
1412        set_user_overrides(Some(overlay));
1413
1414        assert_eq!(infer_provider("internal-foo"), "openai");
1415
1416        reset_overrides();
1417    }
1418
1419    // ── Embedded providers.toml invariants ───────────────────────────────────
1420    // These tests pin properties of the *system* — TOML parses, every
1421    // alias resolves, every deprecated model has a note — without
1422    // pinning specific catalog values. They survive future catalog
1423    // churn and surface real schema breakage.
1424
1425    #[test]
1426    fn embedded_providers_toml_parses_and_is_not_trivially_empty() {
1427        let config = default_config();
1428        assert!(
1429            config.providers.len() >= 10,
1430            "expected >=10 providers in embedded catalog, got {}",
1431            config.providers.len()
1432        );
1433        assert!(
1434            config.models.len() >= 20,
1435            "expected >=20 models in embedded catalog, got {}",
1436            config.models.len()
1437        );
1438        assert!(
1439            config.aliases.len() >= 15,
1440            "expected >=15 aliases in embedded catalog, got {}",
1441            config.aliases.len()
1442        );
1443        assert_eq!(config.default_provider.as_deref(), Some("anthropic"));
1444    }
1445
1446    #[test]
1447    fn embedded_catalog_every_deprecated_model_has_a_note() {
1448        let config = default_config();
1449        let offenders: Vec<&str> = config
1450            .models
1451            .iter()
1452            .filter(|(_, model)| {
1453                model.deprecated
1454                    && model
1455                        .deprecation_note
1456                        .as_deref()
1457                        .unwrap_or("")
1458                        .trim()
1459                        .is_empty()
1460            })
1461            .map(|(id, _)| id.as_str())
1462            .collect();
1463        assert!(
1464            offenders.is_empty(),
1465            "deprecated models missing a deprecation_note: {offenders:?}"
1466        );
1467    }
1468
1469    #[test]
1470    fn embedded_catalog_every_model_targets_a_registered_provider() {
1471        let config = default_config();
1472        let known: std::collections::BTreeSet<&str> =
1473            config.providers.keys().map(String::as_str).collect();
1474        let orphans: Vec<(&str, &str)> = config
1475            .models
1476            .iter()
1477            .filter(|(_, model)| !known.contains(model.provider.as_str()))
1478            .map(|(id, model)| (id.as_str(), model.provider.as_str()))
1479            .collect();
1480        assert!(
1481            orphans.is_empty(),
1482            "models reference unknown providers: {orphans:?}"
1483        );
1484    }
1485
1486    #[test]
1487    fn embedded_catalog_every_alias_targets_a_registered_provider() {
1488        let config = default_config();
1489        let known: std::collections::BTreeSet<&str> =
1490            config.providers.keys().map(String::as_str).collect();
1491        let orphans: Vec<(&str, &str)> = config
1492            .aliases
1493            .iter()
1494            .filter(|(_, alias)| !known.contains(alias.provider.as_str()))
1495            .map(|(name, alias)| (name.as_str(), alias.provider.as_str()))
1496            .collect();
1497        assert!(
1498            orphans.is_empty(),
1499            "aliases reference unknown providers: {orphans:?}"
1500        );
1501    }
1502
1503    #[test]
1504    fn embedded_catalog_every_qc_default_targets_a_known_model() {
1505        let config = default_config();
1506        let orphans: Vec<(&str, &str)> = config
1507            .qc_defaults
1508            .iter()
1509            .filter(|(_, model_id)| !config.models.contains_key(model_id.as_str()))
1510            .map(|(provider, model_id)| (provider.as_str(), model_id.as_str()))
1511            .collect();
1512        assert!(
1513            orphans.is_empty(),
1514            "qc_defaults reference unknown models: {orphans:?}"
1515        );
1516    }
1517
1518    #[test]
1519    fn embedded_catalog_pricing_rates_are_non_negative() {
1520        let config = default_config();
1521        for (id, model) in &config.models {
1522            let Some(pricing) = &model.pricing else {
1523                continue;
1524            };
1525            assert!(
1526                pricing.input_per_mtok >= 0.0 && pricing.output_per_mtok >= 0.0,
1527                "{id}: negative pricing — in={} out={}",
1528                pricing.input_per_mtok,
1529                pricing.output_per_mtok
1530            );
1531            if let Some(rate) = pricing.cache_read_per_mtok {
1532                assert!(rate >= 0.0, "{id}: negative cache_read rate {rate}");
1533            }
1534            if let Some(rate) = pricing.cache_write_per_mtok {
1535                assert!(rate >= 0.0, "{id}: negative cache_write rate {rate}");
1536            }
1537        }
1538    }
1539
1540    #[test]
1541    fn embedded_catalog_tier_aliases_resolve_to_active_models() {
1542        // The three canonical tier aliases (frontier / mid / small) MUST
1543        // resolve to non-deprecated catalog entries; a default that
1544        // routes the loop into a sunsetted model is a release blocker.
1545        for alias in ["frontier", "mid", "small"] {
1546            let (model, _provider) = resolve_tier_model(alias, None)
1547                .unwrap_or_else(|| panic!("tier alias `{alias}` must resolve"));
1548            let entry = model_catalog_entry(&model).unwrap_or_else(|| {
1549                panic!("tier alias `{alias}` -> `{model}` must be a registered catalog entry")
1550            });
1551            assert!(
1552                !entry.deprecated,
1553                "tier alias `{alias}` resolves to deprecated model `{model}` ({:?})",
1554                entry.deprecation_note
1555            );
1556        }
1557    }
1558}