Skip to main content

harn_vm/
llm_config.rs

1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::BTreeMap;
4use std::sync::OnceLock;
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8
9thread_local! {
10    /// Thread-local provider config overlays installed by the CLI after it
11    /// reads the nearest `harn.toml` plus any installed package manifests.
12    /// Kept thread-local so tests and multi-VM hosts can scope extensions to
13    /// the current run without mutating the process-wide default config.
14    static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
15}
16
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct ProvidersConfig {
19    #[serde(default)]
20    pub default_provider: Option<String>,
21    #[serde(default)]
22    pub providers: BTreeMap<String, ProviderDef>,
23    #[serde(default)]
24    pub aliases: BTreeMap<String, AliasDef>,
25    #[serde(default)]
26    pub alias_tool_calling: BTreeMap<String, AliasToolCallingDef>,
27    #[serde(default)]
28    pub models: BTreeMap<String, ModelDef>,
29    #[serde(default)]
30    pub qc_defaults: BTreeMap<String, String>,
31    #[serde(default)]
32    pub inference_rules: Vec<InferenceRule>,
33    #[serde(default)]
34    pub tier_rules: Vec<TierRule>,
35    #[serde(default)]
36    pub tier_defaults: TierDefaults,
37    #[serde(default)]
38    pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
39}
40
41impl ProvidersConfig {
42    pub fn is_empty(&self) -> bool {
43        self.default_provider.is_none()
44            && self.providers.is_empty()
45            && self.aliases.is_empty()
46            && self.alias_tool_calling.is_empty()
47            && self.models.is_empty()
48            && self.qc_defaults.is_empty()
49            && self.inference_rules.is_empty()
50            && self.tier_rules.is_empty()
51            && self.model_defaults.is_empty()
52            && self.tier_defaults.default == default_mid()
53    }
54
55    pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
56        self.providers.extend(overlay.providers.clone());
57        self.aliases.extend(overlay.aliases.clone());
58        self.alias_tool_calling
59            .extend(overlay.alias_tool_calling.clone());
60        self.models.extend(overlay.models.clone());
61        self.qc_defaults.extend(overlay.qc_defaults.clone());
62
63        if overlay.default_provider.is_some() {
64            self.default_provider = overlay.default_provider.clone();
65        }
66
67        if !overlay.inference_rules.is_empty() {
68            let mut merged = overlay.inference_rules.clone();
69            merged.extend(self.inference_rules.clone());
70            self.inference_rules = merged;
71        }
72
73        if !overlay.tier_rules.is_empty() {
74            let mut merged = overlay.tier_rules.clone();
75            merged.extend(self.tier_rules.clone());
76            self.tier_rules = merged;
77        }
78
79        if overlay.tier_defaults.default != default_mid() {
80            self.tier_defaults = overlay.tier_defaults.clone();
81        }
82
83        for (pattern, defaults) in &overlay.model_defaults {
84            self.model_defaults
85                .entry(pattern.clone())
86                .or_default()
87                .extend(defaults.clone());
88        }
89    }
90}
91
92#[derive(Debug, Clone, Deserialize)]
93pub struct ProviderDef {
94    #[serde(default)]
95    pub display_name: Option<String>,
96    #[serde(default)]
97    pub icon: Option<String>,
98    pub base_url: String,
99    #[serde(default)]
100    pub base_url_env: Option<String>,
101    #[serde(default = "default_bearer")]
102    pub auth_style: String,
103    #[serde(default)]
104    pub auth_header: Option<String>,
105    #[serde(default)]
106    pub auth_env: AuthEnv,
107    #[serde(default)]
108    pub extra_headers: BTreeMap<String, String>,
109    #[serde(default)]
110    pub chat_endpoint: String,
111    #[serde(default)]
112    pub completion_endpoint: Option<String>,
113    #[serde(default)]
114    pub healthcheck: Option<HealthcheckDef>,
115    #[serde(default)]
116    pub features: Vec<String>,
117    /// Fallback provider name to try if this provider fails.
118    #[serde(default)]
119    pub fallback: Option<String>,
120    /// Number of retries before falling back (default 0).
121    #[serde(default)]
122    pub retry_count: Option<u32>,
123    /// Delay between retries in milliseconds (default 1000).
124    #[serde(default)]
125    pub retry_delay_ms: Option<u64>,
126    /// Maximum requests per minute. None = unlimited.
127    #[serde(default)]
128    pub rpm: Option<u32>,
129    /// Provider/catalog pricing in USD per 1k input tokens.
130    #[serde(default)]
131    pub cost_per_1k_in: Option<f64>,
132    /// Provider/catalog pricing in USD per 1k output tokens.
133    #[serde(default)]
134    pub cost_per_1k_out: Option<f64>,
135    /// Observed or configured p50 latency in milliseconds.
136    #[serde(default)]
137    pub latency_p50_ms: Option<u64>,
138}
139
140impl Default for ProviderDef {
141    fn default() -> Self {
142        Self {
143            display_name: None,
144            icon: None,
145            base_url: String::new(),
146            base_url_env: None,
147            auth_style: default_bearer(),
148            auth_header: None,
149            auth_env: AuthEnv::None,
150            extra_headers: BTreeMap::new(),
151            chat_endpoint: String::new(),
152            completion_endpoint: None,
153            healthcheck: None,
154            features: Vec::new(),
155            fallback: None,
156            retry_count: None,
157            retry_delay_ms: None,
158            rpm: None,
159            cost_per_1k_in: None,
160            cost_per_1k_out: None,
161            latency_p50_ms: None,
162        }
163    }
164}
165
166fn default_bearer() -> String {
167    "bearer".to_string()
168}
169
170/// Auth env var name(s) for the provider. Can be a single string or an array
171/// (tried in order until one is set).
172#[derive(Debug, Clone, Deserialize, Default)]
173#[serde(untagged)]
174pub enum AuthEnv {
175    #[default]
176    None,
177    Single(String),
178    Multiple(Vec<String>),
179}
180
181#[derive(Debug, Clone, Deserialize)]
182pub struct HealthcheckDef {
183    pub method: String,
184    #[serde(default)]
185    pub path: Option<String>,
186    #[serde(default)]
187    pub url: Option<String>,
188    #[serde(default)]
189    pub body: Option<String>,
190}
191
192#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
193pub struct AliasDef {
194    pub id: String,
195    pub provider: String,
196    /// Per-model tool format override: "native" or "text". When set, this
197    /// takes precedence over the provider-level default. Models with strong
198    /// tool-calling fine-tuning (Kimi-K2.5, GPT-4o) should use "native";
199    /// models better served by text-based tool calling use "text".
200    #[serde(default)]
201    pub tool_format: Option<String>,
202}
203
204#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
205pub struct AliasToolCallingDef {
206    #[serde(default)]
207    #[serde(skip_serializing_if = "Option::is_none")]
208    pub native: Option<String>,
209    #[serde(default)]
210    #[serde(skip_serializing_if = "Option::is_none")]
211    pub text: Option<String>,
212    #[serde(default)]
213    #[serde(skip_serializing_if = "Option::is_none")]
214    pub streaming_native: Option<String>,
215    #[serde(default)]
216    #[serde(skip_serializing_if = "Option::is_none")]
217    pub fallback_mode: Option<String>,
218    #[serde(default)]
219    #[serde(skip_serializing_if = "Option::is_none")]
220    pub failure_reason: Option<String>,
221    #[serde(default)]
222    #[serde(skip_serializing_if = "Option::is_none")]
223    pub last_probe_at: Option<String>,
224}
225
226#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
227pub struct ModelPricing {
228    pub input_per_mtok: f64,
229    pub output_per_mtok: f64,
230    #[serde(default)]
231    pub cache_read_per_mtok: Option<f64>,
232    #[serde(default)]
233    pub cache_write_per_mtok: Option<f64>,
234}
235
236#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
237pub struct ModelDef {
238    pub name: String,
239    pub provider: String,
240    pub context_window: u64,
241    #[serde(default)]
242    pub runtime_context_window: Option<u64>,
243    #[serde(default)]
244    pub stream_timeout: Option<f64>,
245    #[serde(default)]
246    pub capabilities: Vec<String>,
247    #[serde(default)]
248    pub pricing: Option<ModelPricing>,
249    #[serde(default)]
250    pub deprecated: bool,
251    #[serde(default)]
252    pub deprecation_note: Option<String>,
253    #[serde(default)]
254    pub quality_tags: Vec<String>,
255    /// Whether the model can be reached over a normal API-key serverless call,
256    /// or only via a dedicated/provisioned endpoint that the caller must spin
257    /// up out-of-band. Providers like Together list dedicated-only routes
258    /// alongside serverless ones in `/v1/models`, so this metadata lets clients
259    /// avoid presenting them as one-click options.
260    #[serde(default)]
261    pub availability: ModelAvailability,
262}
263
264#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Default)]
265#[serde(rename_all = "snake_case")]
266pub enum ModelAvailability {
267    /// Reachable through the provider's normal API-key path with no extra
268    /// setup. The default for cataloged hosted/local models: by cataloging a
269    /// row we are claiming the route works out of the box.
270    #[default]
271    Serverless,
272    /// Requires the caller to provision a dedicated endpoint before requests
273    /// will succeed. The catalog row exists for selection/pricing UI, but
274    /// hosts must not auto-route to it.
275    Dedicated,
276    /// Availability is not known ahead of time. Used for routes that were
277    /// surfaced dynamically (e.g. through `/v1/models`) without a static
278    /// claim from Harn or the user.
279    Unknown,
280}
281
282impl ModelAvailability {
283    pub fn as_str(self) -> &'static str {
284        match self {
285            Self::Serverless => "serverless",
286            Self::Dedicated => "dedicated",
287            Self::Unknown => "unknown",
288        }
289    }
290
291    pub fn parse(value: &str) -> Option<Self> {
292        match value {
293            "serverless" => Some(Self::Serverless),
294            "dedicated" => Some(Self::Dedicated),
295            "unknown" => Some(Self::Unknown),
296            _ => None,
297        }
298    }
299}
300
301#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
302pub struct ResolvedModel {
303    pub id: String,
304    pub provider: String,
305    pub alias: Option<String>,
306    pub tool_format: String,
307    pub tier: String,
308}
309
310#[derive(Debug, Clone, Deserialize)]
311pub struct InferenceRule {
312    #[serde(default)]
313    pub pattern: Option<String>,
314    #[serde(default)]
315    pub contains: Option<String>,
316    #[serde(default)]
317    pub exact: Option<String>,
318    pub provider: String,
319}
320
321#[derive(Debug, Clone, Deserialize)]
322pub struct TierRule {
323    #[serde(default)]
324    pub pattern: Option<String>,
325    #[serde(default)]
326    pub contains: Option<String>,
327    #[serde(default)]
328    pub exact: Option<String>,
329    pub tier: String,
330}
331
332#[derive(Debug, Clone, Deserialize)]
333pub struct TierDefaults {
334    #[serde(default = "default_mid")]
335    pub default: String,
336}
337
338impl Default for TierDefaults {
339    fn default() -> Self {
340        Self {
341            default: default_mid(),
342        }
343    }
344}
345
346fn default_mid() -> String {
347    "mid".to_string()
348}
349
350/// Load and cache the providers config. Called once at VM startup.
351pub fn load_config() -> &'static ProvidersConfig {
352    CONFIG.get_or_init(|| {
353        let mut config = default_config();
354        let verbose_config_logging = matches!(
355            std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
356            Some("1" | "true" | "TRUE" | "yes" | "YES")
357        ) || matches!(
358            std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
359            Some("1" | "true" | "TRUE" | "yes" | "YES")
360        );
361        if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
362            if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
363                config.merge_from(&overlay);
364                let _ = CONFIG_PATH.set(path);
365                return config;
366            }
367        }
368        if let Some(home) = dirs_or_home() {
369            let path = format!("{home}/.config/harn/providers.toml");
370            if let Some(overlay) = read_external_config(&path, false) {
371                config.merge_from(&overlay);
372                let _ = CONFIG_PATH.set(path);
373                return config;
374            }
375        }
376        config
377    })
378}
379
380fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
381    match std::fs::read_to_string(path) {
382        Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
383            Ok(config) => {
384                if verbose {
385                    eprintln!(
386                        "[llm_config] Loaded {} providers, {} aliases from {}",
387                        config.providers.len(),
388                        config.aliases.len(),
389                        path
390                    );
391                }
392                Some(config)
393            }
394            Err(error) => {
395                eprintln!("[llm_config] TOML parse error in {}: {}", path, error);
396                None
397            }
398        },
399        Err(error) => {
400            if verbose {
401                eprintln!("[llm_config] Cannot read {}: {}", path, error);
402            }
403            None
404        }
405    }
406}
407
408/// Parse a provider/model catalog overlay in the same shape as
409/// `providers.toml` or `[llm]` package-manifest sections.
410pub fn parse_config_toml(src: &str) -> Result<ProvidersConfig, toml::de::Error> {
411    toml::from_str::<ProvidersConfig>(src)
412}
413
414/// Returns the filesystem path of the currently-loaded providers config, if
415/// any. Returns `None` when built-in defaults are active.
416pub fn loaded_config_path() -> Option<std::path::PathBuf> {
417    // Force lazy init so CONFIG_PATH is populated if a file was loaded.
418    let _ = load_config();
419    CONFIG_PATH.get().map(std::path::PathBuf::from)
420}
421
422/// Install per-run provider config overlays. The overlay uses the same shape as
423/// `providers.toml`, but lives under `[llm]` in `harn.toml` and package
424/// manifests. Passing `None` clears the overlay.
425pub fn set_user_overrides(config: Option<ProvidersConfig>) {
426    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
427}
428
429/// Clear per-run provider config overlays.
430pub fn clear_user_overrides() {
431    set_user_overrides(None);
432}
433
434fn effective_config() -> ProvidersConfig {
435    let mut merged = load_config().clone();
436    USER_OVERRIDES.with(|cell| {
437        if let Some(overlay) = cell.borrow().as_ref() {
438            merged.merge_from(overlay);
439        }
440    });
441    merged
442}
443
444/// Resolve a model alias to (model_id, provider_name).
445pub fn resolve_model(alias: &str) -> (String, Option<String>) {
446    let config = effective_config();
447    if let Some(a) = config.aliases.get(alias) {
448        return (a.id.clone(), Some(a.provider.clone()));
449    }
450    (normalize_model_id(alias), None)
451}
452
453/// Strip host/provider selector prefixes that identify transport, not the
454/// provider-native model id. This mirrors Burin's existing normalization so
455/// `ollama:qwen3:30b` reaches Ollama as `qwen3:30b` instead of an invalid
456/// model named `ollama`. Cerebras follows the same convention but uses a
457/// slash separator (`cerebras/gpt-oss-120b`) because its own /v1/models
458/// endpoint returns bare names that overlap OpenAI's families.
459pub fn normalize_model_id(raw: &str) -> String {
460    for prefix in PROVIDER_SELECTOR_PREFIXES {
461        if let Some(stripped) = raw.strip_prefix(prefix) {
462            return stripped.to_string();
463        }
464    }
465    raw.to_string()
466}
467
468const PROVIDER_SELECTOR_PREFIXES: &[&str] =
469    &["ollama:", "local:", "huggingface:", "hf:", "cerebras/"];
470
471/// Resolve an alias or selector into the complete catalog identity hosts need:
472/// provider inference, prefix-normalized model id, default tool format, and tier.
473pub fn resolve_model_info(selector: &str) -> ResolvedModel {
474    let config = effective_config();
475    if let Some(alias) = config.aliases.get(selector) {
476        let id = alias.id.clone();
477        let provider = alias.provider.clone();
478        let tool_format = alias
479            .tool_format
480            .clone()
481            .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
482        return ResolvedModel {
483            tier: model_tier_with_config(&config, &id),
484            id,
485            provider,
486            alias: Some(selector.to_string()),
487            tool_format,
488        };
489    }
490
491    let id = normalize_model_id(selector);
492    let provider = infer_provider_with_config(&config, selector).provider;
493    let tool_format = default_tool_format_with_config(&config, &id, &provider);
494    let tier = model_tier_with_config(&config, &id);
495    ResolvedModel {
496        id,
497        provider,
498        alias: None,
499        tool_format,
500        tier,
501    }
502}
503
504/// Infer provider from a model ID using inference rules.
505pub fn infer_provider(model_id: &str) -> String {
506    infer_provider_detail(model_id).provider
507}
508
509/// Infer provider from a model ID and retain whether the configured default was used.
510pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
511    let config = effective_config();
512    infer_provider_with_config(&config, model_id)
513}
514
515fn infer_provider_with_config(
516    config: &ProvidersConfig,
517    model_id: &str,
518) -> crate::llm::provider::ProviderInference {
519    if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
520        return crate::llm::provider::ProviderInference::builtin("ollama");
521    }
522    if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
523        return crate::llm::provider::ProviderInference::builtin("huggingface");
524    }
525    // Exact catalog rows are the most authoritative declaration of where
526    // a model is hosted: any pattern-based inference rule is necessarily
527    // less specific than `[models."<id>"].provider = "<name>"`. Catalogs
528    // include user overlays, so users can still re-home a model by
529    // setting a catalog entry in their own providers.toml.
530    let normalized_id = normalize_model_id(model_id);
531    if let Some(model) = config
532        .models
533        .get(model_id)
534        .or_else(|| config.models.get(&normalized_id))
535    {
536        return crate::llm::provider::ProviderInference::builtin(model.provider.clone());
537    }
538    for rule in &config.inference_rules {
539        if let Some(exact) = &rule.exact {
540            if model_id == exact {
541                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
542            }
543        }
544        if let Some(pattern) = &rule.pattern {
545            if glob_match(pattern, model_id) {
546                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
547            }
548        }
549        if let Some(substr) = &rule.contains {
550            if model_id.contains(substr.as_str()) {
551                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
552            }
553        }
554    }
555    crate::llm::provider::infer_provider_from_model_id(
556        model_id,
557        &default_provider_with_config(config),
558    )
559}
560
561pub fn default_provider() -> String {
562    let config = effective_config();
563    default_provider_with_config(&config)
564}
565
566fn default_provider_with_config(config: &ProvidersConfig) -> String {
567    std::env::var("HARN_DEFAULT_PROVIDER")
568        .ok()
569        .map(|value| value.trim().to_string())
570        .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
571        .or_else(|| {
572            config
573                .default_provider
574                .as_deref()
575                .map(str::trim)
576                .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
577                .map(str::to_string)
578        })
579        .unwrap_or_else(|| "anthropic".to_string())
580}
581
582/// Get model tier ("small", "mid", "frontier").
583pub fn model_tier(model_id: &str) -> String {
584    let config = effective_config();
585    model_tier_with_config(&config, model_id)
586}
587
588fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
589    for rule in &config.tier_rules {
590        if let Some(exact) = &rule.exact {
591            if model_id == exact {
592                return rule.tier.clone();
593            }
594        }
595        if let Some(pattern) = &rule.pattern {
596            if glob_match(pattern, model_id) {
597                return rule.tier.clone();
598            }
599        }
600        if let Some(substr) = &rule.contains {
601            if model_id.contains(substr.as_str()) {
602                return rule.tier.clone();
603            }
604        }
605    }
606    let lower = model_id.to_lowercase();
607    if lower.contains("9b") || lower.contains("a3b") {
608        return "small".to_string();
609    }
610    if lower.starts_with("claude-") || lower == "gpt-4o" {
611        return "frontier".to_string();
612    }
613    config.tier_defaults.default.clone()
614}
615
616/// Get provider config for resolving base_url, auth, etc.
617pub fn provider_config(name: &str) -> Option<ProviderDef> {
618    effective_config().providers.get(name).cloned()
619}
620
621/// Get model-specific default parameters (temperature, etc.).
622/// Matches glob patterns in model_defaults keys.
623pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
624    let config = effective_config();
625    let mut params = BTreeMap::new();
626    for (pattern, defaults) in &config.model_defaults {
627        if glob_match(pattern, model_id) {
628            for (k, v) in defaults {
629                params.insert(k.clone(), v.clone());
630            }
631        }
632    }
633    params
634}
635
636/// Get list of configured provider names.
637pub fn provider_names() -> Vec<String> {
638    effective_config().providers.keys().cloned().collect()
639}
640
641/// Return every configured alias name, sorted deterministically.
642pub fn known_model_names() -> Vec<String> {
643    effective_config().aliases.keys().cloned().collect()
644}
645
646pub fn alias_entries() -> Vec<(String, AliasDef)> {
647    effective_config().aliases.into_iter().collect()
648}
649
650pub fn alias_tool_calling_entry(alias: &str) -> Option<AliasToolCallingDef> {
651    effective_config().alias_tool_calling.get(alias).cloned()
652}
653
654/// Return every configured model-catalog entry, sorted by provider then id.
655pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
656    let mut entries: Vec<_> = effective_config()
657        .models
658        .into_iter()
659        .map(|(id, model)| {
660            let provider = model.provider.clone();
661            (
662                id.clone(),
663                with_effective_capability_tags(id, provider, model),
664            )
665        })
666        .collect();
667    entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
668        model_a
669            .provider
670            .cmp(&model_b.provider)
671            .then_with(|| id_a.cmp(id_b))
672    });
673    entries
674}
675
676pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
677    effective_config()
678        .models
679        .get(model_id)
680        .cloned()
681        .map(|model| {
682            let provider = model.provider.clone();
683            with_effective_capability_tags(model_id.to_string(), provider, model)
684        })
685}
686
687pub fn qc_default_model(provider: &str) -> Option<String> {
688    std::env::var("BURIN_QC_MODEL")
689        .ok()
690        .filter(|value| !value.trim().is_empty())
691        .or_else(|| {
692            effective_config()
693                .qc_defaults
694                .get(&provider.to_lowercase())
695                .cloned()
696        })
697}
698
699pub fn default_model_for_provider(provider: &str) -> String {
700    match provider {
701        "local" => std::env::var("LOCAL_LLM_MODEL")
702            .or_else(|_| std::env::var("HARN_LLM_MODEL"))
703            .unwrap_or_else(|_| "gemma-4-26b-a4b-it".to_string()),
704        "mlx" => std::env::var("MLX_MODEL_ID")
705            .unwrap_or_else(|_| "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string()),
706        "openai" => "gpt-4o-mini".to_string(),
707        "ollama" => "llama3.2".to_string(),
708        "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
709        _ => "claude-sonnet-4-6".to_string(),
710    }
711}
712
713pub fn qc_defaults() -> BTreeMap<String, String> {
714    effective_config().qc_defaults
715}
716
717pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
718    effective_config()
719        .models
720        .get(model_id)
721        .and_then(|model| model.pricing.clone())
722}
723
724pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
725    model_pricing_per_mtok(model_id)
726        .map(|pricing| {
727            (
728                pricing.input_per_mtok / 1000.0,
729                pricing.output_per_mtok / 1000.0,
730            )
731        })
732        .or_else(|| {
733            let (input, output, _) = provider_economics(provider);
734            match (input, output) {
735                (Some(input), Some(output)) => Some((input, output)),
736                _ => None,
737            }
738        })
739}
740
741pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
742    match auth_env {
743        AuthEnv::None => Vec::new(),
744        AuthEnv::Single(name) => vec![name.clone()],
745        AuthEnv::Multiple(names) => names.clone(),
746    }
747}
748
749pub fn provider_key_available(provider: &str) -> bool {
750    let Some(pdef) = provider_config(provider) else {
751        return provider == "ollama";
752    };
753    if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
754        return true;
755    }
756    auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
757        std::env::var(env_name)
758            .ok()
759            .is_some_and(|value| !value.trim().is_empty())
760    })
761}
762
763pub fn available_provider_names() -> Vec<String> {
764    provider_names()
765        .into_iter()
766        .filter(|provider| provider_key_available(provider))
767        .collect()
768}
769
770/// Check if a provider advertises a legacy provider-level feature.
771pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
772    provider_config(provider)
773        .map(|p| p.features.iter().any(|f| f == feature))
774        .unwrap_or(false)
775}
776
777/// Provider-level catalog pricing/latency. Model-specific catalog pricing
778/// wins when available; this is the adapter-level fallback used by routing
779/// and portal summaries when a model has no explicit catalog entry.
780pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
781    provider_config(provider)
782        .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
783        .unwrap_or((None, None, None))
784}
785
786/// Resolve the default tool format for a model+provider combination.
787/// Priority: alias `tool_format` (matched by model ID) > provider/model
788/// capability matrix > legacy provider feature > "text".
789pub fn default_tool_format(model: &str, provider: &str) -> String {
790    let config = effective_config();
791    default_tool_format_with_config(&config, model, provider)
792}
793
794fn default_tool_format_with_config(
795    config: &ProvidersConfig,
796    model: &str,
797    provider: &str,
798) -> String {
799    // Aliases match by model ID + provider, or by alias name.
800    for (name, alias) in &config.aliases {
801        let matches = (alias.id == model && alias.provider == provider) || name == model;
802        if matches {
803            if let Some(ref fmt) = alias.tool_format {
804                return fmt.clone();
805            }
806        }
807    }
808    let capabilities = crate::llm::capabilities::lookup(provider, model);
809    if let Some(format) = capabilities.preferred_tool_format.as_deref() {
810        if matches!(format, "native" | "text") {
811            return format.to_string();
812        }
813    }
814    let capability_matrix_native = capabilities.native_tools;
815    let legacy_provider_native = config
816        .providers
817        .get(provider)
818        .map(|p| p.features.iter().any(|f| f == "native_tools"))
819        .unwrap_or(false);
820    if capability_matrix_native || legacy_provider_native {
821        "native".to_string()
822    } else {
823        "text".to_string()
824    }
825}
826
827fn with_effective_capability_tags(
828    model_id: String,
829    provider: String,
830    mut model: ModelDef,
831) -> ModelDef {
832    model.capabilities = effective_model_capability_tags(&provider, &model_id);
833    model
834}
835
836/// Legacy display tags derived from the canonical provider/model capability
837/// matrix. The matrix is the source of truth; `models.*.capabilities` in
838/// providers.toml is accepted only for backwards-compatible parsing.
839pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
840    let caps = crate::llm::capabilities::lookup(provider, model_id);
841    let mut tags = Vec::new();
842    // Today all Harn chat providers expose streaming. Keep this as a
843    // transport baseline rather than a duplicated per-model declaration.
844    tags.push("streaming".to_string());
845    if caps.native_tools || caps.text_tool_wire_format_supported {
846        tags.push("tools".to_string());
847    }
848    if !caps.tool_search.is_empty() {
849        tags.push("tool_search".to_string());
850    }
851    if caps.vision || caps.vision_supported {
852        tags.push("vision".to_string());
853    }
854    if caps.audio {
855        tags.push("audio".to_string());
856    }
857    if caps.pdf {
858        tags.push("pdf".to_string());
859    }
860    if caps.files_api_supported {
861        tags.push("files".to_string());
862    }
863    if caps.prompt_caching {
864        tags.push("prompt_caching".to_string());
865    }
866    if !caps.thinking_modes.is_empty() {
867        tags.push("thinking".to_string());
868    }
869    if caps.interleaved_thinking_supported
870        || caps
871            .thinking_modes
872            .iter()
873            .any(|mode| mode == "adaptive" || mode == "effort")
874    {
875        tags.push("extended_thinking".to_string());
876    }
877    if caps.json_schema.is_some() {
878        tags.push("structured_output".to_string());
879    }
880    tags
881}
882
883/// Resolve a tier or alias into a concrete model/provider pair.
884pub fn resolve_tier_model(
885    target: &str,
886    preferred_provider: Option<&str>,
887) -> Option<(String, String)> {
888    let config = effective_config();
889
890    if let Some(alias) = config.aliases.get(target) {
891        return Some((alias.id.clone(), alias.provider.clone()));
892    }
893
894    let candidate_aliases = if let Some(provider) = preferred_provider {
895        vec![
896            format!("{provider}/{target}"),
897            format!("{provider}:{target}"),
898            format!("tier/{target}"),
899            target.to_string(),
900        ]
901    } else {
902        vec![format!("tier/{target}"), target.to_string()]
903    };
904
905    for alias_name in candidate_aliases {
906        if let Some(alias) = config.aliases.get(&alias_name) {
907            return Some((alias.id.clone(), alias.provider.clone()));
908        }
909    }
910
911    None
912}
913
914/// Return all configured alias-backed model/provider pairs whose resolved
915/// model falls into the requested capability tier. The result is de-duplicated
916/// and sorted deterministically by provider then model id.
917pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
918    let config = effective_config();
919    let mut seen = std::collections::BTreeSet::new();
920    let mut candidates = Vec::new();
921
922    for alias in config.aliases.values() {
923        let pair = (alias.id.clone(), alias.provider.clone());
924        if seen.contains(&pair) {
925            continue;
926        }
927        if model_tier(&alias.id) == target {
928            seen.insert(pair.clone());
929            candidates.push(pair);
930        }
931    }
932
933    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
934        provider_a
935            .cmp(provider_b)
936            .then_with(|| model_a.cmp(model_b))
937    });
938    candidates
939}
940
941/// Return all configured alias-backed model/provider pairs. Used by routing
942/// policies that need to compare alternatives across tiers.
943pub fn all_model_candidates() -> Vec<(String, String)> {
944    let config = effective_config();
945    let mut seen = std::collections::BTreeSet::new();
946    let mut candidates = Vec::new();
947
948    for alias in config.aliases.values() {
949        let pair = (alias.id.clone(), alias.provider.clone());
950        if seen.insert(pair.clone()) {
951            candidates.push(pair);
952        }
953    }
954
955    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
956        provider_a
957            .cmp(provider_b)
958            .then_with(|| model_a.cmp(model_b))
959    });
960    candidates
961}
962
963/// Simple glob matching for patterns like "claude-*", "qwen/*", "ollama:*".
964fn glob_match(pattern: &str, input: &str) -> bool {
965    if let Some(prefix) = pattern.strip_suffix('*') {
966        input.starts_with(prefix)
967    } else if let Some(suffix) = pattern.strip_prefix('*') {
968        input.ends_with(suffix)
969    } else if pattern.contains('*') {
970        let parts: Vec<&str> = pattern.split('*').collect();
971        if parts.len() == 2 {
972            input.starts_with(parts[0]) && input.ends_with(parts[1])
973        } else {
974            input == pattern
975        }
976    } else {
977        input == pattern
978    }
979}
980
981fn dirs_or_home() -> Option<String> {
982    std::env::var("HOME").ok()
983}
984
985/// Resolve the effective base URL for a provider, checking the `base_url_env`
986/// override first, then falling back to the configured `base_url`.
987pub fn resolve_base_url(pdef: &ProviderDef) -> String {
988    if let Some(env_name) = &pdef.base_url_env {
989        if let Ok(val) = std::env::var(env_name) {
990            // Strip surrounding quotes that some .env parsers leave intact.
991            let trimmed = val.trim().trim_matches('"').trim_matches('\'');
992            if !trimmed.is_empty() {
993                return trimmed.to_string();
994            }
995        }
996    }
997    pdef.base_url.clone()
998}
999
1000/// Embedded copy of `llm/providers.toml`, the single source of truth for
1001/// Harn's bundled provider/model catalog. Edit the TOML, not this string.
1002const EMBEDDED_PROVIDERS_TOML: &str = include_str!("llm/providers.toml");
1003
1004/// Parse the embedded `providers.toml` into the runtime `ProvidersConfig`.
1005///
1006/// Hosts overlay this base via `HARN_PROVIDERS_CONFIG`,
1007/// `~/.config/harn/providers.toml`, `harn.toml`, package-manifest
1008/// `[llm]` sections, and per-run `set_user_overrides(...)`. The same
1009/// Serde shape applies at every layer, so there is exactly one schema to
1010/// keep coherent — no parallel Rust-literal catalog.
1011///
1012/// We `expect` on parse failure because the file is bundled into the
1013/// binary at compile time; a malformed embedded catalog is a build-time
1014/// invariant violation that should fail every test, not silently
1015/// degrade in production.
1016fn default_config() -> ProvidersConfig {
1017    parse_config_toml(EMBEDDED_PROVIDERS_TOML)
1018        .expect("embedded providers.toml must parse — invariant checked by harn-vm tests")
1019}
1020
1021#[cfg(test)]
1022fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
1023    let mut config = default_config();
1024    config.merge_from(&overlay);
1025    config
1026}
1027
1028#[cfg(test)]
1029mod tests {
1030    use super::*;
1031
1032    fn reset_overrides() {
1033        clear_user_overrides();
1034    }
1035
1036    #[test]
1037    fn test_glob_match_prefix() {
1038        assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
1039        assert!(glob_match("gpt-*", "gpt-4o"));
1040        assert!(!glob_match("claude-*", "gpt-4o"));
1041    }
1042
1043    #[test]
1044    fn test_glob_match_suffix() {
1045        assert!(glob_match("*-latest", "llama3.2-latest"));
1046        assert!(!glob_match("*-latest", "llama3.2"));
1047    }
1048
1049    #[test]
1050    fn test_glob_match_middle() {
1051        assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
1052        assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
1053    }
1054
1055    #[test]
1056    fn test_glob_match_exact() {
1057        assert!(glob_match("gpt-4o", "gpt-4o"));
1058        assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
1059    }
1060
1061    #[test]
1062    fn test_infer_provider_from_defaults() {
1063        let _guard = crate::llm::env_lock().lock().expect("env lock");
1064        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1065        unsafe {
1066            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1067        }
1068
1069        assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
1070        assert_eq!(infer_provider("gpt-4o"), "openai");
1071        assert_eq!(infer_provider("o1-preview"), "openai");
1072        assert_eq!(infer_provider("o3-mini"), "openai");
1073        assert_eq!(infer_provider("o4-mini"), "openai");
1074        assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
1075        assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
1076        assert_eq!(infer_provider("llama3.2:latest"), "ollama");
1077        assert_eq!(infer_provider("unknown-model"), "anthropic");
1078
1079        unsafe {
1080            match prev_default_provider {
1081                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1082                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1083            }
1084        }
1085    }
1086
1087    #[test]
1088    fn test_infer_provider_prefix_rules() {
1089        assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
1090        assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
1091        // Even when the id also contains `/`, the local transport prefix wins.
1092        assert_eq!(infer_provider("local:owner/model"), "ollama");
1093        assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
1094    }
1095
1096    #[test]
1097    fn test_openrouter_inference_requires_one_slash() {
1098        let _guard = crate::llm::env_lock().lock().expect("env lock");
1099        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1100        unsafe {
1101            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1102        }
1103
1104        assert_eq!(infer_provider("org/model"), "openrouter");
1105        assert_eq!(infer_provider("org/team/model"), "anthropic");
1106
1107        unsafe {
1108            match prev_default_provider {
1109                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1110                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1111            }
1112        }
1113    }
1114
1115    #[test]
1116    fn test_cerebras_inference_beats_openrouter_slash_fallback() {
1117        let _guard = crate::llm::env_lock().lock().expect("env lock");
1118        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1119        unsafe {
1120            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1121        }
1122
1123        assert_eq!(infer_provider("cerebras/gpt-oss-120b"), "cerebras");
1124        assert_eq!(infer_provider("cerebras/llama-3.3-70b"), "cerebras");
1125
1126        unsafe {
1127            match prev_default_provider {
1128                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1129                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1130            }
1131        }
1132    }
1133
1134    #[test]
1135    fn test_direct_catalog_model_id_resolves_to_catalog_provider() {
1136        // Bare model IDs that the embedded catalog hosts on Cerebras must
1137        // not be misrouted by the generic `gpt-*` / single-slash inference
1138        // fallbacks. Regression for harn#2142 (model-info routed
1139        // `gpt-oss-120b` to openai, breaking Burin TUI credential checks).
1140        let _guard = crate::llm::env_lock().lock().expect("env lock");
1141        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1142        unsafe {
1143            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1144        }
1145
1146        for model in ["gpt-oss-120b", "llama-3.3-70b"] {
1147            assert_eq!(
1148                infer_provider(model),
1149                "cerebras",
1150                "{model} should route to its catalog provider"
1151            );
1152            let resolved = resolve_model_info(model);
1153            assert_eq!(resolved.id, model);
1154            assert_eq!(resolved.provider, "cerebras");
1155        }
1156
1157        unsafe {
1158            match prev_default_provider {
1159                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1160                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1161            }
1162        }
1163    }
1164
1165    #[test]
1166    fn test_user_catalog_overlay_re_homes_model_provider() {
1167        // Users can re-home a built-in model by overlaying a catalog row;
1168        // the exact-match catalog lookup must honor overlays as well as the
1169        // embedded TOML.
1170        reset_overrides();
1171        let mut overlay = ProvidersConfig::default();
1172        overlay.models.insert(
1173            "gpt-4o".to_string(),
1174            ModelDef {
1175                name: "GPT-4o via OpenRouter".to_string(),
1176                provider: "openrouter".to_string(),
1177                context_window: 128_000,
1178                runtime_context_window: None,
1179                stream_timeout: None,
1180                capabilities: Vec::new(),
1181                pricing: None,
1182                deprecated: false,
1183                deprecation_note: None,
1184                quality_tags: Vec::new(),
1185                availability: ModelAvailability::default(),
1186            },
1187        );
1188        set_user_overrides(Some(overlay));
1189
1190        assert_eq!(infer_provider("gpt-4o"), "openrouter");
1191
1192        reset_overrides();
1193    }
1194
1195    #[test]
1196    fn test_resolve_model_info_normalizes_provider_prefixes() {
1197        let local = resolve_model_info("local:gemma-4-e4b-it");
1198        assert_eq!(local.id, "gemma-4-e4b-it");
1199        assert_eq!(local.provider, "ollama");
1200
1201        let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
1202        assert_eq!(ollama.id, "qwen3:30b-a3b");
1203        assert_eq!(ollama.provider, "ollama");
1204
1205        let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
1206        assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
1207        assert_eq!(hf.provider, "huggingface");
1208
1209        let cerebras = resolve_model_info("cerebras/gpt-oss-120b");
1210        assert_eq!(cerebras.id, "gpt-oss-120b");
1211        assert_eq!(cerebras.provider, "cerebras");
1212    }
1213
1214    #[test]
1215    fn test_model_tier_from_defaults() {
1216        assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
1217        assert_eq!(model_tier("gpt-4o"), "frontier");
1218        assert_eq!(model_tier("Qwen3.5-9B"), "small");
1219        assert_eq!(model_tier("deepseek-v3"), "mid");
1220    }
1221
1222    #[test]
1223    fn test_resolve_model_unknown_alias() {
1224        let (id, provider) = resolve_model("gpt-4o");
1225        assert_eq!(id, "gpt-4o");
1226        assert!(provider.is_none());
1227    }
1228
1229    #[test]
1230    fn test_provider_names() {
1231        let names = provider_names();
1232        assert!(names.len() >= 7);
1233        assert!(names.contains(&"anthropic".to_string()));
1234        assert!(names.contains(&"together".to_string()));
1235        assert!(names.contains(&"local".to_string()));
1236        assert!(names.contains(&"mlx".to_string()));
1237        assert!(names.contains(&"openai".to_string()));
1238        assert!(names.contains(&"ollama".to_string()));
1239        assert!(names.contains(&"bedrock".to_string()));
1240        assert!(names.contains(&"azure_openai".to_string()));
1241        assert!(names.contains(&"vertex".to_string()));
1242    }
1243
1244    #[test]
1245    fn global_provider_file_is_an_overlay_on_builtin_defaults() {
1246        let mut overlay = ProvidersConfig {
1247            default_provider: Some("ollama".to_string()),
1248            ..Default::default()
1249        };
1250        overlay.aliases.insert(
1251            "quickstart".to_string(),
1252            AliasDef {
1253                id: "llama3.2".to_string(),
1254                provider: "ollama".to_string(),
1255                tool_format: None,
1256            },
1257        );
1258
1259        let merged = merge_global_config(overlay);
1260
1261        assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
1262        assert!(merged.providers.contains_key("anthropic"));
1263        assert!(merged.providers.contains_key("ollama"));
1264        assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
1265    }
1266
1267    #[test]
1268    fn test_resolve_tier_model_default_aliases() {
1269        // Exercise the alias-resolution machinery, not the specific catalog
1270        // value: the model under each tier alias evolves as the embedded
1271        // providers.toml is updated. The invariants worth pinning are the
1272        // provider routing + catalog-registration of the resolved model.
1273        let (model, provider) = resolve_tier_model("frontier", None)
1274            .expect("frontier alias must resolve from the embedded catalog");
1275        assert_eq!(provider, "anthropic");
1276        assert!(
1277            model_catalog_entry(&model)
1278                .is_some_and(|entry| entry.provider == "anthropic" && !entry.deprecated),
1279            "frontier alias must point at a registered, non-deprecated anthropic model (got {model})"
1280        );
1281
1282        let (model, provider) = resolve_tier_model("small", None)
1283            .expect("small alias must resolve from the embedded catalog");
1284        assert!(
1285            [
1286                "openrouter",
1287                "huggingface",
1288                "local",
1289                "llamacpp",
1290                "mlx",
1291                "ollama"
1292            ]
1293            .contains(&provider.as_str()),
1294            "small tier should resolve to an open-weight provider (got {provider} / {model})"
1295        );
1296    }
1297
1298    #[test]
1299    fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
1300        // tier/<provider> takes precedence over generic tier when the
1301        // caller scopes by provider. Don't pin the specific model — the
1302        // catalog evolves.
1303        let (model, provider) = resolve_tier_model("mid", Some("openai"))
1304            .expect("mid tier scoped to openai must resolve");
1305        assert_eq!(provider, "openai");
1306        assert!(
1307            model_catalog_entry(&model).is_some(),
1308            "mid/openai alias must point at a registered model (got {model})"
1309        );
1310    }
1311
1312    #[test]
1313    fn test_provider_config_anthropic() {
1314        let pdef = provider_config("anthropic").unwrap();
1315        assert_eq!(pdef.auth_style, "header");
1316        assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
1317    }
1318
1319    #[test]
1320    fn test_provider_config_mlx() {
1321        let pdef = provider_config("mlx").unwrap();
1322        assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
1323        assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
1324        assert_eq!(
1325            pdef.healthcheck.unwrap().path.as_deref(),
1326            Some("/v1/models")
1327        );
1328
1329        let (model, provider) = resolve_model("mlx-qwen36-27b");
1330        assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
1331        assert_eq!(provider.as_deref(), Some("mlx"));
1332    }
1333
1334    #[test]
1335    fn test_enterprise_provider_defaults_and_inference() {
1336        let bedrock = provider_config("bedrock").unwrap();
1337        assert_eq!(bedrock.auth_style, "aws_sigv4");
1338        assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
1339        assert_eq!(
1340            infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
1341            "bedrock"
1342        );
1343        assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
1344
1345        let azure = provider_config("azure_openai").unwrap();
1346        assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
1347        assert_eq!(
1348            auth_env_names(&azure.auth_env),
1349            vec![
1350                "AZURE_OPENAI_API_KEY".to_string(),
1351                "AZURE_OPENAI_AD_TOKEN".to_string(),
1352                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1353            ]
1354        );
1355
1356        let vertex = provider_config("vertex").unwrap();
1357        assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
1358        assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
1359    }
1360
1361    #[test]
1362    fn test_default_provider_env_override_for_unknown_model() {
1363        let _guard = crate::llm::env_lock().lock().expect("env lock");
1364        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1365        unsafe {
1366            std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
1367        }
1368
1369        let inference = infer_provider_detail("unknown-model");
1370
1371        unsafe {
1372            match prev_default_provider {
1373                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1374                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1375            }
1376        }
1377
1378        assert_eq!(inference.provider, "openai");
1379        assert_eq!(
1380            inference.source,
1381            crate::llm::provider::ProviderInferenceSource::DefaultFallback
1382        );
1383    }
1384
1385    #[test]
1386    fn test_resolve_base_url_no_env() {
1387        let pdef = ProviderDef {
1388            base_url: "https://example.com".to_string(),
1389            ..Default::default()
1390        };
1391        assert_eq!(resolve_base_url(&pdef), "https://example.com");
1392    }
1393
1394    #[test]
1395    fn test_default_config_roundtrip() {
1396        let config = default_config();
1397        assert!(!config.providers.is_empty());
1398        assert!(!config.inference_rules.is_empty());
1399        assert!(!config.tier_rules.is_empty());
1400        assert_eq!(config.tier_defaults.default, "mid");
1401    }
1402
1403    #[test]
1404    fn test_local_ollama_catalog_metadata() {
1405        reset_overrides();
1406
1407        let qwen_coding = model_catalog_entry("qwen3.6:35b-a3b-coding-nvfp4")
1408            .expect("qwen3.6 coding catalog entry");
1409        assert_eq!(qwen_coding.context_window, 262_144);
1410        assert!(!qwen_coding.capabilities.iter().any(|cap| cap == "vision"));
1411
1412        let gemma4 = model_catalog_entry("gemma4:26b").expect("gemma4 catalog entry");
1413        assert_eq!(gemma4.context_window, 262_144);
1414        assert!(gemma4.capabilities.iter().any(|cap| cap == "vision"));
1415    }
1416
1417    #[test]
1418    fn test_external_config_overlays_default_catalog() {
1419        let mut config = default_config();
1420        let mut overlay = ProvidersConfig {
1421            default_provider: Some("ollama".to_string()),
1422            ..Default::default()
1423        };
1424        overlay.providers.insert(
1425            "custom".to_string(),
1426            ProviderDef {
1427                base_url: "https://llm.example.test/v1".to_string(),
1428                chat_endpoint: "/chat/completions".to_string(),
1429                ..Default::default()
1430            },
1431        );
1432
1433        config.merge_from(&overlay);
1434
1435        assert_eq!(config.default_provider.as_deref(), Some("ollama"));
1436        assert!(config.providers.contains_key("custom"));
1437        assert!(config.providers.contains_key("anthropic"));
1438        assert!(config.providers.contains_key("ollama"));
1439    }
1440
1441    #[test]
1442    fn test_model_params_empty() {
1443        let params = model_params("claude-sonnet-4-20250514");
1444        assert!(params.is_empty());
1445    }
1446
1447    #[test]
1448    fn test_user_overrides_add_provider_and_alias() {
1449        reset_overrides();
1450        let mut overlay = ProvidersConfig::default();
1451        overlay.providers.insert(
1452            "acme".to_string(),
1453            ProviderDef {
1454                base_url: "https://llm.acme.test/v1".to_string(),
1455                chat_endpoint: "/chat/completions".to_string(),
1456                ..Default::default()
1457            },
1458        );
1459        overlay.aliases.insert(
1460            "acme-fast".to_string(),
1461            AliasDef {
1462                id: "acme/model-fast".to_string(),
1463                provider: "acme".to_string(),
1464                tool_format: Some("native".to_string()),
1465            },
1466        );
1467        set_user_overrides(Some(overlay));
1468
1469        let (model, provider) = resolve_model("acme-fast");
1470        assert_eq!(model, "acme/model-fast");
1471        assert_eq!(provider.as_deref(), Some("acme"));
1472        assert!(provider_names().contains(&"acme".to_string()));
1473        assert_eq!(
1474            provider_config("acme").map(|provider| provider.base_url),
1475            Some("https://llm.acme.test/v1".to_string())
1476        );
1477
1478        reset_overrides();
1479    }
1480
1481    #[test]
1482    fn test_default_tool_format_uses_capability_matrix() {
1483        reset_overrides();
1484
1485        assert_eq!(
1486            default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
1487            "text"
1488        );
1489        assert_eq!(
1490            default_tool_format("devstral-small-2:24b", "ollama"),
1491            "text"
1492        );
1493        assert_eq!(
1494            default_tool_format("ollama-devstral-small-2-native", "ollama"),
1495            "native"
1496        );
1497        assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "text");
1498        assert_eq!(
1499            default_tool_format("deepseek/deepseek-v3.2", "openrouter"),
1500            "text"
1501        );
1502        assert_eq!(
1503            default_tool_format("qwen/qwen3-coder-flash", "openrouter"),
1504            "text"
1505        );
1506    }
1507
1508    #[test]
1509    fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
1510        reset_overrides();
1511        let mut overlay = ProvidersConfig::default();
1512        overlay.models.insert(
1513            "acme/model-fast".to_string(),
1514            ModelDef {
1515                name: "Acme Fast".to_string(),
1516                provider: "acme".to_string(),
1517                context_window: 65_536,
1518                runtime_context_window: None,
1519                stream_timeout: Some(42.0),
1520                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1521                pricing: Some(ModelPricing {
1522                    input_per_mtok: 1.25,
1523                    output_per_mtok: 2.5,
1524                    cache_read_per_mtok: Some(0.25),
1525                    cache_write_per_mtok: None,
1526                }),
1527                deprecated: false,
1528                deprecation_note: None,
1529                quality_tags: Vec::new(),
1530                availability: ModelAvailability::default(),
1531            },
1532        );
1533        overlay
1534            .qc_defaults
1535            .insert("acme".to_string(), "acme/model-cheap".to_string());
1536        set_user_overrides(Some(overlay));
1537
1538        let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
1539        assert_eq!(entry.context_window, 65_536);
1540        assert_eq!(
1541            entry.capabilities,
1542            vec!["streaming".to_string(), "tools".to_string()]
1543        );
1544        assert_eq!(
1545            entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
1546            Some(1.25)
1547        );
1548        assert_eq!(
1549            pricing_per_1k_for("acme", "acme/model-fast"),
1550            Some((0.00125, 0.0025))
1551        );
1552        assert_eq!(
1553            qc_default_model("acme").as_deref(),
1554            Some("acme/model-cheap")
1555        );
1556
1557        reset_overrides();
1558    }
1559
1560    #[test]
1561    fn test_user_overrides_prepend_inference_rules() {
1562        reset_overrides();
1563        let mut overlay = ProvidersConfig::default();
1564        overlay.inference_rules.push(InferenceRule {
1565            pattern: Some("internal-*".to_string()),
1566            contains: None,
1567            exact: None,
1568            provider: "openai".to_string(),
1569        });
1570        set_user_overrides(Some(overlay));
1571
1572        assert_eq!(infer_provider("internal-foo"), "openai");
1573
1574        reset_overrides();
1575    }
1576
1577    // ── Embedded providers.toml invariants ───────────────────────────────────
1578    // These tests pin properties of the *system* — TOML parses, every
1579    // alias resolves, every deprecated model has a note — without
1580    // pinning specific catalog values. They survive future catalog
1581    // churn and surface real schema breakage.
1582
1583    #[test]
1584    fn embedded_providers_toml_parses_and_is_not_trivially_empty() {
1585        let config = default_config();
1586        assert!(
1587            config.providers.len() >= 10,
1588            "expected >=10 providers in embedded catalog, got {}",
1589            config.providers.len()
1590        );
1591        assert!(
1592            config.models.len() >= 20,
1593            "expected >=20 models in embedded catalog, got {}",
1594            config.models.len()
1595        );
1596        assert!(
1597            config.aliases.len() >= 15,
1598            "expected >=15 aliases in embedded catalog, got {}",
1599            config.aliases.len()
1600        );
1601        assert_eq!(config.default_provider.as_deref(), Some("anthropic"));
1602    }
1603
1604    #[test]
1605    fn embedded_catalog_every_deprecated_model_has_a_note() {
1606        let config = default_config();
1607        let offenders: Vec<&str> = config
1608            .models
1609            .iter()
1610            .filter(|(_, model)| {
1611                model.deprecated
1612                    && model
1613                        .deprecation_note
1614                        .as_deref()
1615                        .unwrap_or("")
1616                        .trim()
1617                        .is_empty()
1618            })
1619            .map(|(id, _)| id.as_str())
1620            .collect();
1621        assert!(
1622            offenders.is_empty(),
1623            "deprecated models missing a deprecation_note: {offenders:?}"
1624        );
1625    }
1626
1627    #[test]
1628    fn embedded_catalog_every_model_targets_a_registered_provider() {
1629        let config = default_config();
1630        let known: std::collections::BTreeSet<&str> =
1631            config.providers.keys().map(String::as_str).collect();
1632        let orphans: Vec<(&str, &str)> = config
1633            .models
1634            .iter()
1635            .filter(|(_, model)| !known.contains(model.provider.as_str()))
1636            .map(|(id, model)| (id.as_str(), model.provider.as_str()))
1637            .collect();
1638        assert!(
1639            orphans.is_empty(),
1640            "models reference unknown providers: {orphans:?}"
1641        );
1642    }
1643
1644    #[test]
1645    fn embedded_catalog_every_alias_targets_a_registered_provider() {
1646        let config = default_config();
1647        let known: std::collections::BTreeSet<&str> =
1648            config.providers.keys().map(String::as_str).collect();
1649        let orphans: Vec<(&str, &str)> = config
1650            .aliases
1651            .iter()
1652            .filter(|(_, alias)| !known.contains(alias.provider.as_str()))
1653            .map(|(name, alias)| (name.as_str(), alias.provider.as_str()))
1654            .collect();
1655        assert!(
1656            orphans.is_empty(),
1657            "aliases reference unknown providers: {orphans:?}"
1658        );
1659    }
1660
1661    #[test]
1662    fn embedded_catalog_every_qc_default_targets_a_known_model() {
1663        let config = default_config();
1664        let orphans: Vec<(&str, &str)> = config
1665            .qc_defaults
1666            .iter()
1667            .filter(|(_, model_id)| !config.models.contains_key(model_id.as_str()))
1668            .map(|(provider, model_id)| (provider.as_str(), model_id.as_str()))
1669            .collect();
1670        assert!(
1671            orphans.is_empty(),
1672            "qc_defaults reference unknown models: {orphans:?}"
1673        );
1674    }
1675
1676    #[test]
1677    fn embedded_catalog_pricing_rates_are_non_negative() {
1678        let config = default_config();
1679        for (id, model) in &config.models {
1680            let Some(pricing) = &model.pricing else {
1681                continue;
1682            };
1683            assert!(
1684                pricing.input_per_mtok >= 0.0 && pricing.output_per_mtok >= 0.0,
1685                "{id}: negative pricing — in={} out={}",
1686                pricing.input_per_mtok,
1687                pricing.output_per_mtok
1688            );
1689            if let Some(rate) = pricing.cache_read_per_mtok {
1690                assert!(rate >= 0.0, "{id}: negative cache_read rate {rate}");
1691            }
1692            if let Some(rate) = pricing.cache_write_per_mtok {
1693                assert!(rate >= 0.0, "{id}: negative cache_write rate {rate}");
1694            }
1695        }
1696    }
1697
1698    #[test]
1699    fn model_availability_parses_known_strings() {
1700        assert_eq!(
1701            ModelAvailability::parse("serverless"),
1702            Some(ModelAvailability::Serverless)
1703        );
1704        assert_eq!(
1705            ModelAvailability::parse("dedicated"),
1706            Some(ModelAvailability::Dedicated)
1707        );
1708        assert_eq!(
1709            ModelAvailability::parse("unknown"),
1710            Some(ModelAvailability::Unknown)
1711        );
1712        assert_eq!(ModelAvailability::parse("provisioned"), None);
1713        for value in [
1714            ModelAvailability::Serverless,
1715            ModelAvailability::Dedicated,
1716            ModelAvailability::Unknown,
1717        ] {
1718            assert_eq!(ModelAvailability::parse(value.as_str()), Some(value));
1719        }
1720    }
1721
1722    #[test]
1723    fn embedded_catalog_marks_together_dedicated_route_as_dedicated() {
1724        let config = default_config();
1725        let model = config
1726            .models
1727            .get("Qwen/Qwen3-Coder-Next-FP8")
1728            .expect("Together Qwen3 Coder Next FP8 is cataloged");
1729        assert_eq!(model.provider, "together");
1730        assert_eq!(model.availability, ModelAvailability::Dedicated);
1731    }
1732
1733    #[test]
1734    fn embedded_catalog_dedicated_models_are_not_targeted_by_tier_aliases() {
1735        // A dedicated-only model behind a tier alias would silently fail
1736        // every serverless caller; the catalog must keep those routes
1737        // separated.
1738        let config = default_config();
1739        let dedicated: std::collections::BTreeSet<(&str, &str)> = config
1740            .models
1741            .iter()
1742            .filter(|(_, model)| model.availability == ModelAvailability::Dedicated)
1743            .map(|(id, model)| (model.provider.as_str(), id.as_str()))
1744            .collect();
1745        for (name, alias) in &config.aliases {
1746            if matches!(
1747                name.as_str(),
1748                "frontier"
1749                    | "mid"
1750                    | "small"
1751                    | "tier/frontier"
1752                    | "tier/mid"
1753                    | "tier/small"
1754                    | "sonnet"
1755                    | "opus"
1756                    | "haiku"
1757            ) {
1758                assert!(
1759                    !dedicated.contains(&(alias.provider.as_str(), alias.id.as_str())),
1760                    "tier alias `{name}` targets dedicated-only route `{}/{}`",
1761                    alias.provider,
1762                    alias.id,
1763                );
1764            }
1765        }
1766    }
1767
1768    #[test]
1769    fn embedded_catalog_tier_aliases_resolve_to_active_models() {
1770        // The three canonical tier aliases (frontier / mid / small) MUST
1771        // resolve to non-deprecated catalog entries; a default that
1772        // routes the loop into a sunsetted model is a release blocker.
1773        for alias in ["frontier", "mid", "small"] {
1774            let (model, _provider) = resolve_tier_model(alias, None)
1775                .unwrap_or_else(|| panic!("tier alias `{alias}` must resolve"));
1776            let entry = model_catalog_entry(&model).unwrap_or_else(|| {
1777                panic!("tier alias `{alias}` -> `{model}` must be a registered catalog entry")
1778            });
1779            assert!(
1780                !entry.deprecated,
1781                "tier alias `{alias}` resolves to deprecated model `{model}` ({:?})",
1782                entry.deprecation_note
1783            );
1784        }
1785    }
1786}