harn_vm/
llm_config.rs

1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::BTreeMap;
4use std::sync::OnceLock;
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8
9thread_local! {
10    /// Thread-local provider config overlays installed by the CLI after it
11    /// reads the nearest `harn.toml` plus any installed package manifests.
12    /// Kept thread-local so tests and multi-VM hosts can scope extensions to
13    /// the current run without mutating the process-wide default config.
14    static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
15}
16
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct ProvidersConfig {
19    #[serde(default)]
20    pub default_provider: Option<String>,
21    #[serde(default)]
22    pub providers: BTreeMap<String, ProviderDef>,
23    #[serde(default)]
24    pub aliases: BTreeMap<String, AliasDef>,
25    #[serde(default)]
26    pub models: BTreeMap<String, ModelDef>,
27    #[serde(default)]
28    pub qc_defaults: BTreeMap<String, String>,
29    #[serde(default)]
30    pub inference_rules: Vec<InferenceRule>,
31    #[serde(default)]
32    pub tier_rules: Vec<TierRule>,
33    #[serde(default)]
34    pub tier_defaults: TierDefaults,
35    #[serde(default)]
36    pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
37}
38
39impl ProvidersConfig {
40    pub fn is_empty(&self) -> bool {
41        self.default_provider.is_none()
42            && self.providers.is_empty()
43            && self.aliases.is_empty()
44            && self.models.is_empty()
45            && self.qc_defaults.is_empty()
46            && self.inference_rules.is_empty()
47            && self.tier_rules.is_empty()
48            && self.model_defaults.is_empty()
49            && self.tier_defaults.default == default_mid()
50    }
51
52    pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
53        self.providers.extend(overlay.providers.clone());
54        self.aliases.extend(overlay.aliases.clone());
55        self.models.extend(overlay.models.clone());
56        self.qc_defaults.extend(overlay.qc_defaults.clone());
57
58        if overlay.default_provider.is_some() {
59            self.default_provider = overlay.default_provider.clone();
60        }
61
62        if !overlay.inference_rules.is_empty() {
63            let mut merged = overlay.inference_rules.clone();
64            merged.extend(self.inference_rules.clone());
65            self.inference_rules = merged;
66        }
67
68        if !overlay.tier_rules.is_empty() {
69            let mut merged = overlay.tier_rules.clone();
70            merged.extend(self.tier_rules.clone());
71            self.tier_rules = merged;
72        }
73
74        if overlay.tier_defaults.default != default_mid() {
75            self.tier_defaults = overlay.tier_defaults.clone();
76        }
77
78        for (pattern, defaults) in &overlay.model_defaults {
79            self.model_defaults
80                .entry(pattern.clone())
81                .or_default()
82                .extend(defaults.clone());
83        }
84    }
85}
86
87#[derive(Debug, Clone, Deserialize)]
88pub struct ProviderDef {
89    #[serde(default)]
90    pub display_name: Option<String>,
91    #[serde(default)]
92    pub icon: Option<String>,
93    pub base_url: String,
94    #[serde(default)]
95    pub base_url_env: Option<String>,
96    #[serde(default = "default_bearer")]
97    pub auth_style: String,
98    #[serde(default)]
99    pub auth_header: Option<String>,
100    #[serde(default)]
101    pub auth_env: AuthEnv,
102    #[serde(default)]
103    pub extra_headers: BTreeMap<String, String>,
104    #[serde(default)]
105    pub chat_endpoint: String,
106    #[serde(default)]
107    pub completion_endpoint: Option<String>,
108    #[serde(default)]
109    pub healthcheck: Option<HealthcheckDef>,
110    #[serde(default)]
111    pub features: Vec<String>,
112    /// Fallback provider name to try if this provider fails.
113    #[serde(default)]
114    pub fallback: Option<String>,
115    /// Number of retries before falling back (default 0).
116    #[serde(default)]
117    pub retry_count: Option<u32>,
118    /// Delay between retries in milliseconds (default 1000).
119    #[serde(default)]
120    pub retry_delay_ms: Option<u64>,
121    /// Maximum requests per minute. None = unlimited.
122    #[serde(default)]
123    pub rpm: Option<u32>,
124    /// Provider/catalog pricing in USD per 1k input tokens.
125    #[serde(default)]
126    pub cost_per_1k_in: Option<f64>,
127    /// Provider/catalog pricing in USD per 1k output tokens.
128    #[serde(default)]
129    pub cost_per_1k_out: Option<f64>,
130    /// Observed or configured p50 latency in milliseconds.
131    #[serde(default)]
132    pub latency_p50_ms: Option<u64>,
133}
134
135impl Default for ProviderDef {
136    fn default() -> Self {
137        Self {
138            display_name: None,
139            icon: None,
140            base_url: String::new(),
141            base_url_env: None,
142            auth_style: default_bearer(),
143            auth_header: None,
144            auth_env: AuthEnv::None,
145            extra_headers: BTreeMap::new(),
146            chat_endpoint: String::new(),
147            completion_endpoint: None,
148            healthcheck: None,
149            features: Vec::new(),
150            fallback: None,
151            retry_count: None,
152            retry_delay_ms: None,
153            rpm: None,
154            cost_per_1k_in: None,
155            cost_per_1k_out: None,
156            latency_p50_ms: None,
157        }
158    }
159}
160
161fn default_bearer() -> String {
162    "bearer".to_string()
163}
164
165/// Auth env var name(s) for the provider. Can be a single string or an array
166/// (tried in order until one is set).
167#[derive(Debug, Clone, Deserialize, Default)]
168#[serde(untagged)]
169pub enum AuthEnv {
170    #[default]
171    None,
172    Single(String),
173    Multiple(Vec<String>),
174}
175
176#[derive(Debug, Clone, Deserialize)]
177pub struct HealthcheckDef {
178    pub method: String,
179    #[serde(default)]
180    pub path: Option<String>,
181    #[serde(default)]
182    pub url: Option<String>,
183    #[serde(default)]
184    pub body: Option<String>,
185}
186
187#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
188pub struct AliasDef {
189    pub id: String,
190    pub provider: String,
191    /// Per-model tool format override: "native" or "text". When set, this
192    /// takes precedence over the provider-level default. Models with strong
193    /// tool-calling fine-tuning (Kimi-K2.5, GPT-4o) should use "native";
194    /// models better served by text-based tool calling use "text".
195    #[serde(default)]
196    pub tool_format: Option<String>,
197}
198
199#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
200pub struct ModelPricing {
201    pub input_per_mtok: f64,
202    pub output_per_mtok: f64,
203    #[serde(default)]
204    pub cache_read_per_mtok: Option<f64>,
205    #[serde(default)]
206    pub cache_write_per_mtok: Option<f64>,
207}
208
209#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
210pub struct ModelDef {
211    pub name: String,
212    pub provider: String,
213    pub context_window: u64,
214    #[serde(default)]
215    pub stream_timeout: Option<f64>,
216    #[serde(default)]
217    pub capabilities: Vec<String>,
218    #[serde(default)]
219    pub pricing: Option<ModelPricing>,
220}
221
222#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
223pub struct ResolvedModel {
224    pub id: String,
225    pub provider: String,
226    pub alias: Option<String>,
227    pub tool_format: String,
228    pub tier: String,
229}
230
231#[derive(Debug, Clone, Deserialize)]
232pub struct InferenceRule {
233    #[serde(default)]
234    pub pattern: Option<String>,
235    #[serde(default)]
236    pub contains: Option<String>,
237    #[serde(default)]
238    pub exact: Option<String>,
239    pub provider: String,
240}
241
242#[derive(Debug, Clone, Deserialize)]
243pub struct TierRule {
244    #[serde(default)]
245    pub pattern: Option<String>,
246    #[serde(default)]
247    pub contains: Option<String>,
248    #[serde(default)]
249    pub exact: Option<String>,
250    pub tier: String,
251}
252
253#[derive(Debug, Clone, Deserialize)]
254pub struct TierDefaults {
255    #[serde(default = "default_mid")]
256    pub default: String,
257}
258
259impl Default for TierDefaults {
260    fn default() -> Self {
261        Self {
262            default: default_mid(),
263        }
264    }
265}
266
267fn default_mid() -> String {
268    "mid".to_string()
269}
270
271/// Load and cache the providers config. Called once at VM startup.
272pub fn load_config() -> &'static ProvidersConfig {
273    CONFIG.get_or_init(|| {
274        let verbose_config_logging = matches!(
275            std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
276            Some("1" | "true" | "TRUE" | "yes" | "YES")
277        ) || matches!(
278            std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
279            Some("1" | "true" | "TRUE" | "yes" | "YES")
280        );
281        if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
282            match std::fs::read_to_string(&path) {
283                Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
284                    Ok(config) => {
285                        if verbose_config_logging {
286                            eprintln!(
287                                "[llm_config] Loaded {} providers, {} aliases from {}",
288                                config.providers.len(),
289                                config.aliases.len(),
290                                path
291                            );
292                        }
293                        let _ = CONFIG_PATH.set(path);
294                        return config;
295                    }
296                    Err(e) => eprintln!("[llm_config] TOML parse error in {}: {}", path, e),
297                },
298                Err(e) => eprintln!("[llm_config] Cannot read {}: {}", path, e),
299            }
300        }
301        if let Some(home) = dirs_or_home() {
302            let path = format!("{home}/.config/harn/providers.toml");
303            if let Ok(content) = std::fs::read_to_string(&path) {
304                if let Ok(config) = toml::from_str::<ProvidersConfig>(&content) {
305                    let _ = CONFIG_PATH.set(path);
306                    return config;
307                }
308            }
309        }
310        default_config()
311    })
312}
313
314/// Returns the filesystem path of the currently-loaded providers config, if
315/// any. Returns `None` when built-in defaults are active.
316pub fn loaded_config_path() -> Option<std::path::PathBuf> {
317    // Force lazy init so CONFIG_PATH is populated if a file was loaded.
318    let _ = load_config();
319    CONFIG_PATH.get().map(std::path::PathBuf::from)
320}
321
322/// Install per-run provider config overlays. The overlay uses the same shape as
323/// `providers.toml`, but lives under `[llm]` in `harn.toml` and package
324/// manifests. Passing `None` clears the overlay.
325pub fn set_user_overrides(config: Option<ProvidersConfig>) {
326    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
327}
328
329/// Clear per-run provider config overlays.
330pub fn clear_user_overrides() {
331    set_user_overrides(None);
332}
333
334fn effective_config() -> ProvidersConfig {
335    let mut merged = load_config().clone();
336    USER_OVERRIDES.with(|cell| {
337        if let Some(overlay) = cell.borrow().as_ref() {
338            merged.merge_from(overlay);
339        }
340    });
341    merged
342}
343
344/// Resolve a model alias to (model_id, provider_name).
345pub fn resolve_model(alias: &str) -> (String, Option<String>) {
346    let config = effective_config();
347    if let Some(a) = config.aliases.get(alias) {
348        return (a.id.clone(), Some(a.provider.clone()));
349    }
350    (normalize_model_id(alias), None)
351}
352
353/// Strip host/provider selector prefixes that identify transport, not the
354/// provider-native model id. This mirrors Burin's existing normalization so
355/// `ollama:qwen3:30b` reaches Ollama as `qwen3:30b` instead of an invalid
356/// model named `ollama`.
357pub fn normalize_model_id(raw: &str) -> String {
358    for prefix in ["ollama:", "local:", "huggingface:", "hf:"] {
359        if let Some(stripped) = raw.strip_prefix(prefix) {
360            return stripped.to_string();
361        }
362    }
363    raw.to_string()
364}
365
366/// Resolve an alias or selector into the complete catalog identity hosts need:
367/// provider inference, prefix-normalized model id, default tool format, and tier.
368pub fn resolve_model_info(selector: &str) -> ResolvedModel {
369    let config = effective_config();
370    if let Some(alias) = config.aliases.get(selector) {
371        let id = alias.id.clone();
372        let provider = alias.provider.clone();
373        let tool_format = alias
374            .tool_format
375            .clone()
376            .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
377        return ResolvedModel {
378            tier: model_tier_with_config(&config, &id),
379            id,
380            provider,
381            alias: Some(selector.to_string()),
382            tool_format,
383        };
384    }
385
386    let provider = infer_provider_with_config(&config, selector).provider;
387    let id = normalize_model_id(selector);
388    let tool_format = default_tool_format_with_config(&config, &id, &provider);
389    let tier = model_tier_with_config(&config, &id);
390    ResolvedModel {
391        id,
392        provider,
393        alias: None,
394        tool_format,
395        tier,
396    }
397}
398
399/// Infer provider from a model ID using inference rules.
400pub fn infer_provider(model_id: &str) -> String {
401    infer_provider_detail(model_id).provider
402}
403
404/// Infer provider from a model ID and retain whether the configured default was used.
405pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
406    let config = effective_config();
407    infer_provider_with_config(&config, model_id)
408}
409
410fn infer_provider_with_config(
411    config: &ProvidersConfig,
412    model_id: &str,
413) -> crate::llm::provider::ProviderInference {
414    if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
415        return crate::llm::provider::ProviderInference::builtin("ollama");
416    }
417    if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
418        return crate::llm::provider::ProviderInference::builtin("huggingface");
419    }
420    for rule in &config.inference_rules {
421        if let Some(exact) = &rule.exact {
422            if model_id == exact {
423                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
424            }
425        }
426        if let Some(pattern) = &rule.pattern {
427            if glob_match(pattern, model_id) {
428                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
429            }
430        }
431        if let Some(substr) = &rule.contains {
432            if model_id.contains(substr.as_str()) {
433                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
434            }
435        }
436    }
437    crate::llm::provider::infer_provider_from_model_id(
438        model_id,
439        &default_provider_with_config(config),
440    )
441}
442
443pub fn default_provider() -> String {
444    let config = effective_config();
445    default_provider_with_config(&config)
446}
447
448fn default_provider_with_config(config: &ProvidersConfig) -> String {
449    std::env::var("HARN_DEFAULT_PROVIDER")
450        .ok()
451        .map(|value| value.trim().to_string())
452        .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
453        .or_else(|| {
454            config
455                .default_provider
456                .as_deref()
457                .map(str::trim)
458                .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
459                .map(str::to_string)
460        })
461        .unwrap_or_else(|| "anthropic".to_string())
462}
463
464/// Get model tier ("small", "mid", "frontier").
465pub fn model_tier(model_id: &str) -> String {
466    let config = effective_config();
467    model_tier_with_config(&config, model_id)
468}
469
470fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
471    for rule in &config.tier_rules {
472        if let Some(exact) = &rule.exact {
473            if model_id == exact {
474                return rule.tier.clone();
475            }
476        }
477        if let Some(pattern) = &rule.pattern {
478            if glob_match(pattern, model_id) {
479                return rule.tier.clone();
480            }
481        }
482        if let Some(substr) = &rule.contains {
483            if model_id.contains(substr.as_str()) {
484                return rule.tier.clone();
485            }
486        }
487    }
488    let lower = model_id.to_lowercase();
489    if lower.contains("9b") || lower.contains("a3b") {
490        return "small".to_string();
491    }
492    if lower.starts_with("claude-") || lower == "gpt-4o" {
493        return "frontier".to_string();
494    }
495    config.tier_defaults.default.clone()
496}
497
498/// Get provider config for resolving base_url, auth, etc.
499pub fn provider_config(name: &str) -> Option<ProviderDef> {
500    effective_config().providers.get(name).cloned()
501}
502
503/// Get model-specific default parameters (temperature, etc.).
504/// Matches glob patterns in model_defaults keys.
505pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
506    let config = effective_config();
507    let mut params = BTreeMap::new();
508    for (pattern, defaults) in &config.model_defaults {
509        if glob_match(pattern, model_id) {
510            for (k, v) in defaults {
511                params.insert(k.clone(), v.clone());
512            }
513        }
514    }
515    params
516}
517
518/// Get list of configured provider names.
519pub fn provider_names() -> Vec<String> {
520    effective_config().providers.keys().cloned().collect()
521}
522
523/// Return every configured alias name, sorted deterministically.
524pub fn known_model_names() -> Vec<String> {
525    effective_config().aliases.keys().cloned().collect()
526}
527
528pub fn alias_entries() -> Vec<(String, AliasDef)> {
529    effective_config().aliases.into_iter().collect()
530}
531
532/// Return every configured model-catalog entry, sorted by provider then id.
533pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
534    let mut entries: Vec<_> = effective_config().models.into_iter().collect();
535    entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
536        model_a
537            .provider
538            .cmp(&model_b.provider)
539            .then_with(|| id_a.cmp(id_b))
540    });
541    entries
542}
543
544pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
545    effective_config().models.get(model_id).cloned()
546}
547
548pub fn qc_default_model(provider: &str) -> Option<String> {
549    std::env::var("BURIN_QC_MODEL")
550        .ok()
551        .filter(|value| !value.trim().is_empty())
552        .or_else(|| {
553            effective_config()
554                .qc_defaults
555                .get(&provider.to_lowercase())
556                .cloned()
557        })
558}
559
560pub fn qc_defaults() -> BTreeMap<String, String> {
561    effective_config().qc_defaults
562}
563
564pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
565    effective_config()
566        .models
567        .get(model_id)
568        .and_then(|model| model.pricing.clone())
569}
570
571pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
572    model_pricing_per_mtok(model_id)
573        .map(|pricing| {
574            (
575                pricing.input_per_mtok / 1000.0,
576                pricing.output_per_mtok / 1000.0,
577            )
578        })
579        .or_else(|| {
580            let (input, output, _) = provider_economics(provider);
581            match (input, output) {
582                (Some(input), Some(output)) => Some((input, output)),
583                _ => None,
584            }
585        })
586}
587
588pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
589    match auth_env {
590        AuthEnv::None => Vec::new(),
591        AuthEnv::Single(name) => vec![name.clone()],
592        AuthEnv::Multiple(names) => names.clone(),
593    }
594}
595
596pub fn provider_key_available(provider: &str) -> bool {
597    let Some(pdef) = provider_config(provider) else {
598        return provider == "ollama";
599    };
600    if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
601        return true;
602    }
603    auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
604        std::env::var(env_name)
605            .ok()
606            .is_some_and(|value| !value.trim().is_empty())
607    })
608}
609
610pub fn available_provider_names() -> Vec<String> {
611    provider_names()
612        .into_iter()
613        .filter(|provider| provider_key_available(provider))
614        .collect()
615}
616
617/// Check if a provider advertises a feature (e.g., "native_tools").
618pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
619    provider_config(provider)
620        .map(|p| p.features.iter().any(|f| f == feature))
621        .unwrap_or(false)
622}
623
624/// Provider-level catalog pricing/latency. Model-specific static pricing in
625/// `llm::cost` still wins when available; this is the adapter-level fallback
626/// used by routing and portal summaries.
627pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
628    provider_config(provider)
629        .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
630        .unwrap_or((None, None, None))
631}
632
633/// Resolve the default tool format for a model+provider combination.
634/// Priority: alias `tool_format` (matched by model ID) > provider feature > "text".
635pub fn default_tool_format(model: &str, provider: &str) -> String {
636    let config = effective_config();
637    default_tool_format_with_config(&config, model, provider)
638}
639
640fn default_tool_format_with_config(
641    config: &ProvidersConfig,
642    model: &str,
643    provider: &str,
644) -> String {
645    // Aliases match by model ID + provider, or by alias name.
646    for (name, alias) in &config.aliases {
647        let matches = (alias.id == model && alias.provider == provider) || name == model;
648        if matches {
649            if let Some(ref fmt) = alias.tool_format {
650                return fmt.clone();
651            }
652        }
653    }
654    if config
655        .providers
656        .get(provider)
657        .map(|p| p.features.iter().any(|f| f == "native_tools"))
658        .unwrap_or(false)
659    {
660        "native".to_string()
661    } else {
662        "text".to_string()
663    }
664}
665
666/// Resolve a tier or alias into a concrete model/provider pair.
667pub fn resolve_tier_model(
668    target: &str,
669    preferred_provider: Option<&str>,
670) -> Option<(String, String)> {
671    let config = effective_config();
672
673    if let Some(alias) = config.aliases.get(target) {
674        return Some((alias.id.clone(), alias.provider.clone()));
675    }
676
677    let candidate_aliases = if let Some(provider) = preferred_provider {
678        vec![
679            format!("{provider}/{target}"),
680            format!("{provider}:{target}"),
681            format!("tier/{target}"),
682            target.to_string(),
683        ]
684    } else {
685        vec![format!("tier/{target}"), target.to_string()]
686    };
687
688    for alias_name in candidate_aliases {
689        if let Some(alias) = config.aliases.get(&alias_name) {
690            return Some((alias.id.clone(), alias.provider.clone()));
691        }
692    }
693
694    None
695}
696
697/// Return all configured alias-backed model/provider pairs whose resolved
698/// model falls into the requested capability tier. The result is de-duplicated
699/// and sorted deterministically by provider then model id.
700pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
701    let config = effective_config();
702    let mut seen = std::collections::BTreeSet::new();
703    let mut candidates = Vec::new();
704
705    for alias in config.aliases.values() {
706        let pair = (alias.id.clone(), alias.provider.clone());
707        if seen.contains(&pair) {
708            continue;
709        }
710        if model_tier(&alias.id) == target {
711            seen.insert(pair.clone());
712            candidates.push(pair);
713        }
714    }
715
716    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
717        provider_a
718            .cmp(provider_b)
719            .then_with(|| model_a.cmp(model_b))
720    });
721    candidates
722}
723
724/// Return all configured alias-backed model/provider pairs. Used by routing
725/// policies that need to compare alternatives across tiers.
726pub fn all_model_candidates() -> Vec<(String, String)> {
727    let config = effective_config();
728    let mut seen = std::collections::BTreeSet::new();
729    let mut candidates = Vec::new();
730
731    for alias in config.aliases.values() {
732        let pair = (alias.id.clone(), alias.provider.clone());
733        if seen.insert(pair.clone()) {
734            candidates.push(pair);
735        }
736    }
737
738    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
739        provider_a
740            .cmp(provider_b)
741            .then_with(|| model_a.cmp(model_b))
742    });
743    candidates
744}
745
746/// Simple glob matching for patterns like "claude-*", "qwen/*", "ollama:*".
747fn glob_match(pattern: &str, input: &str) -> bool {
748    if let Some(prefix) = pattern.strip_suffix('*') {
749        input.starts_with(prefix)
750    } else if let Some(suffix) = pattern.strip_prefix('*') {
751        input.ends_with(suffix)
752    } else if pattern.contains('*') {
753        let parts: Vec<&str> = pattern.split('*').collect();
754        if parts.len() == 2 {
755            input.starts_with(parts[0]) && input.ends_with(parts[1])
756        } else {
757            input == pattern
758        }
759    } else {
760        input == pattern
761    }
762}
763
764fn dirs_or_home() -> Option<String> {
765    std::env::var("HOME").ok()
766}
767
768/// Resolve the effective base URL for a provider, checking the `base_url_env`
769/// override first, then falling back to the configured `base_url`.
770pub fn resolve_base_url(pdef: &ProviderDef) -> String {
771    if let Some(env_name) = &pdef.base_url_env {
772        if let Ok(val) = std::env::var(env_name) {
773            // Strip surrounding quotes that some .env parsers leave intact.
774            let trimmed = val.trim().trim_matches('"').trim_matches('\'');
775            if !trimmed.is_empty() {
776                return trimmed.to_string();
777            }
778        }
779    }
780    pdef.base_url.clone()
781}
782
783fn default_config() -> ProvidersConfig {
784    let mut config = ProvidersConfig {
785        default_provider: Some("anthropic".to_string()),
786        ..Default::default()
787    };
788
789    config.providers.insert(
790        "anthropic".to_string(),
791        ProviderDef {
792            base_url: "https://api.anthropic.com/v1".to_string(),
793            auth_style: "header".to_string(),
794            auth_header: Some("x-api-key".to_string()),
795            auth_env: AuthEnv::Single("ANTHROPIC_API_KEY".to_string()),
796            extra_headers: BTreeMap::from([(
797                "anthropic-version".to_string(),
798                "2023-06-01".to_string(),
799            )]),
800            chat_endpoint: "/messages".to_string(),
801            completion_endpoint: None,
802            healthcheck: Some(HealthcheckDef {
803                method: "POST".to_string(),
804                path: Some("/messages/count_tokens".to_string()),
805                url: None,
806                body: Some(
807                    r#"{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":"x"}]}"#
808                        .to_string(),
809                ),
810            }),
811            features: vec!["prompt_caching".to_string(), "thinking".to_string()],
812            cost_per_1k_in: Some(0.003),
813            cost_per_1k_out: Some(0.015),
814            latency_p50_ms: Some(2500),
815            ..Default::default()
816        },
817    );
818
819    // OpenAI
820    config.providers.insert(
821        "openai".to_string(),
822        ProviderDef {
823            base_url: "https://api.openai.com/v1".to_string(),
824            auth_style: "bearer".to_string(),
825            auth_env: AuthEnv::Single("OPENAI_API_KEY".to_string()),
826            chat_endpoint: "/chat/completions".to_string(),
827            completion_endpoint: Some("/completions".to_string()),
828            healthcheck: Some(HealthcheckDef {
829                method: "GET".to_string(),
830                path: Some("/models".to_string()),
831                url: None,
832                body: None,
833            }),
834            cost_per_1k_in: Some(0.0025),
835            cost_per_1k_out: Some(0.010),
836            latency_p50_ms: Some(1800),
837            ..Default::default()
838        },
839    );
840
841    // OpenRouter
842    config.providers.insert(
843        "openrouter".to_string(),
844        ProviderDef {
845            base_url: "https://openrouter.ai/api/v1".to_string(),
846            auth_style: "bearer".to_string(),
847            auth_env: AuthEnv::Single("OPENROUTER_API_KEY".to_string()),
848            chat_endpoint: "/chat/completions".to_string(),
849            completion_endpoint: Some("/completions".to_string()),
850            healthcheck: Some(HealthcheckDef {
851                method: "GET".to_string(),
852                path: Some("/auth/key".to_string()),
853                url: None,
854                body: None,
855            }),
856            cost_per_1k_in: Some(0.003),
857            cost_per_1k_out: Some(0.015),
858            latency_p50_ms: Some(2200),
859            ..Default::default()
860        },
861    );
862
863    // HuggingFace
864    config.providers.insert(
865        "huggingface".to_string(),
866        ProviderDef {
867            base_url: "https://router.huggingface.co/v1".to_string(),
868            auth_style: "bearer".to_string(),
869            auth_env: AuthEnv::Multiple(vec![
870                "HF_TOKEN".to_string(),
871                "HUGGINGFACE_API_KEY".to_string(),
872            ]),
873            chat_endpoint: "/chat/completions".to_string(),
874            completion_endpoint: Some("/completions".to_string()),
875            healthcheck: Some(HealthcheckDef {
876                method: "GET".to_string(),
877                url: Some("https://huggingface.co/api/whoami-v2".to_string()),
878                path: None,
879                body: None,
880            }),
881            cost_per_1k_in: Some(0.0002),
882            cost_per_1k_out: Some(0.0006),
883            latency_p50_ms: Some(2400),
884            ..Default::default()
885        },
886    );
887
888    // Ollama default. Hosts can override this to `/v1/chat/completions`
889    // via a bundled `providers.toml` (loaded by setting
890    // `HARN_PROVIDERS_CONFIG` in the host process). The OpenAI-compat
891    // path bypasses Ollama's per-model tool-call post-processors
892    // (qwen3coder.go, qwen35.go) which raise HTTP 500s on text-mode
893    // responses for the Qwen3.5 family. The default here stays on
894    // `/api/chat` so the harn-vm test stub keeps working with Ollama's
895    // native NDJSON wire format.
896    config.providers.insert(
897        "ollama".to_string(),
898        ProviderDef {
899            base_url: "http://localhost:11434".to_string(),
900            base_url_env: Some("OLLAMA_HOST".to_string()),
901            auth_style: "none".to_string(),
902            chat_endpoint: "/api/chat".to_string(),
903            completion_endpoint: Some("/api/generate".to_string()),
904            healthcheck: Some(HealthcheckDef {
905                method: "GET".to_string(),
906                path: Some("/api/tags".to_string()),
907                url: None,
908                body: None,
909            }),
910            cost_per_1k_in: Some(0.0),
911            cost_per_1k_out: Some(0.0),
912            latency_p50_ms: Some(1200),
913            ..Default::default()
914        },
915    );
916
917    // Google Gemini native API.
918    config.providers.insert(
919        "gemini".to_string(),
920        ProviderDef {
921            base_url: "https://generativelanguage.googleapis.com".to_string(),
922            base_url_env: Some("GEMINI_BASE_URL".to_string()),
923            auth_style: "header".to_string(),
924            auth_header: Some("x-goog-api-key".to_string()),
925            auth_env: AuthEnv::Multiple(vec![
926                "GEMINI_API_KEY".to_string(),
927                "GOOGLE_API_KEY".to_string(),
928            ]),
929            chat_endpoint: "/v1beta/models".to_string(),
930            healthcheck: Some(HealthcheckDef {
931                method: "GET".to_string(),
932                path: Some("/v1beta/models".to_string()),
933                url: None,
934                body: None,
935            }),
936            cost_per_1k_in: Some(0.00125),
937            cost_per_1k_out: Some(0.005),
938            latency_p50_ms: Some(1800),
939            ..Default::default()
940        },
941    );
942
943    // Together AI (OpenAI-compatible)
944    config.providers.insert(
945        "together".to_string(),
946        ProviderDef {
947            base_url: "https://api.together.xyz/v1".to_string(),
948            base_url_env: Some("TOGETHER_AI_BASE_URL".to_string()),
949            auth_style: "bearer".to_string(),
950            auth_env: AuthEnv::Single("TOGETHER_AI_API_KEY".to_string()),
951            chat_endpoint: "/chat/completions".to_string(),
952            completion_endpoint: Some("/completions".to_string()),
953            healthcheck: Some(HealthcheckDef {
954                method: "GET".to_string(),
955                path: Some("/models".to_string()),
956                url: None,
957                body: None,
958            }),
959            cost_per_1k_in: Some(0.0002),
960            cost_per_1k_out: Some(0.0006),
961            latency_p50_ms: Some(1600),
962            ..Default::default()
963        },
964    );
965
966    // Groq (OpenAI-compatible)
967    config.providers.insert(
968        "groq".to_string(),
969        ProviderDef {
970            base_url: "https://api.groq.com/openai/v1".to_string(),
971            base_url_env: Some("GROQ_BASE_URL".to_string()),
972            auth_style: "bearer".to_string(),
973            auth_env: AuthEnv::Single("GROQ_API_KEY".to_string()),
974            chat_endpoint: "/chat/completions".to_string(),
975            completion_endpoint: Some("/completions".to_string()),
976            healthcheck: Some(HealthcheckDef {
977                method: "GET".to_string(),
978                path: Some("/models".to_string()),
979                url: None,
980                body: None,
981            }),
982            cost_per_1k_in: Some(0.0001),
983            cost_per_1k_out: Some(0.0003),
984            latency_p50_ms: Some(450),
985            ..Default::default()
986        },
987    );
988
989    // DeepSeek (OpenAI-compatible)
990    config.providers.insert(
991        "deepseek".to_string(),
992        ProviderDef {
993            base_url: "https://api.deepseek.com/v1".to_string(),
994            base_url_env: Some("DEEPSEEK_BASE_URL".to_string()),
995            auth_style: "bearer".to_string(),
996            auth_env: AuthEnv::Single("DEEPSEEK_API_KEY".to_string()),
997            chat_endpoint: "/chat/completions".to_string(),
998            completion_endpoint: Some("/completions".to_string()),
999            healthcheck: Some(HealthcheckDef {
1000                method: "GET".to_string(),
1001                path: Some("/models".to_string()),
1002                url: None,
1003                body: None,
1004            }),
1005            cost_per_1k_in: Some(0.00014),
1006            cost_per_1k_out: Some(0.00028),
1007            latency_p50_ms: Some(1800),
1008            ..Default::default()
1009        },
1010    );
1011
1012    // Fireworks (OpenAI-compatible open-weight hosting)
1013    config.providers.insert(
1014        "fireworks".to_string(),
1015        ProviderDef {
1016            base_url: "https://api.fireworks.ai/inference/v1".to_string(),
1017            base_url_env: Some("FIREWORKS_BASE_URL".to_string()),
1018            auth_style: "bearer".to_string(),
1019            auth_env: AuthEnv::Single("FIREWORKS_API_KEY".to_string()),
1020            chat_endpoint: "/chat/completions".to_string(),
1021            completion_endpoint: Some("/completions".to_string()),
1022            healthcheck: Some(HealthcheckDef {
1023                method: "GET".to_string(),
1024                path: Some("/models".to_string()),
1025                url: None,
1026                body: None,
1027            }),
1028            cost_per_1k_in: Some(0.0002),
1029            cost_per_1k_out: Some(0.0006),
1030            latency_p50_ms: Some(1400),
1031            ..Default::default()
1032        },
1033    );
1034
1035    // Alibaba DashScope (OpenAI-compatible Qwen host)
1036    config.providers.insert(
1037        "dashscope".to_string(),
1038        ProviderDef {
1039            base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".to_string(),
1040            base_url_env: Some("DASHSCOPE_BASE_URL".to_string()),
1041            auth_style: "bearer".to_string(),
1042            auth_env: AuthEnv::Single("DASHSCOPE_API_KEY".to_string()),
1043            chat_endpoint: "/chat/completions".to_string(),
1044            completion_endpoint: Some("/completions".to_string()),
1045            healthcheck: Some(HealthcheckDef {
1046                method: "GET".to_string(),
1047                path: Some("/models".to_string()),
1048                url: None,
1049                body: None,
1050            }),
1051            cost_per_1k_in: Some(0.0003),
1052            cost_per_1k_out: Some(0.0012),
1053            latency_p50_ms: Some(1600),
1054            ..Default::default()
1055        },
1056    );
1057
1058    // AWS Bedrock Runtime. The provider shim resolves AWS credentials through
1059    // env vars, the selected/default profile, container credentials, or EC2
1060    // instance profile credentials, then signs Converse API calls with SigV4.
1061    config.providers.insert(
1062        "bedrock".to_string(),
1063        ProviderDef {
1064            base_url: String::new(),
1065            base_url_env: Some("BEDROCK_BASE_URL".to_string()),
1066            auth_style: "aws_sigv4".to_string(),
1067            auth_env: AuthEnv::None,
1068            chat_endpoint: "/model/{model}/converse".to_string(),
1069            features: vec!["native_tools".to_string()],
1070            latency_p50_ms: Some(2600),
1071            ..Default::default()
1072        },
1073    );
1074
1075    // Azure OpenAI. The deployment name is routed in the URL; callers can
1076    // use the Harn model field as the deployment name or set
1077    // AZURE_OPENAI_DEPLOYMENT.
1078    config.providers.insert(
1079        "azure_openai".to_string(),
1080        ProviderDef {
1081            base_url: "https://{resource}.openai.azure.com".to_string(),
1082            base_url_env: Some("AZURE_OPENAI_ENDPOINT".to_string()),
1083            auth_style: "azure_openai".to_string(),
1084            auth_env: AuthEnv::Multiple(vec![
1085                "AZURE_OPENAI_API_KEY".to_string(),
1086                "AZURE_OPENAI_AD_TOKEN".to_string(),
1087                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1088            ]),
1089            chat_endpoint:
1090                "/openai/deployments/{deployment}/chat/completions?api-version={api_version}"
1091                    .to_string(),
1092            features: vec!["native_tools".to_string()],
1093            cost_per_1k_in: Some(0.0025),
1094            cost_per_1k_out: Some(0.010),
1095            latency_p50_ms: Some(1900),
1096            ..Default::default()
1097        },
1098    );
1099
1100    // Google Vertex AI Gemini.
1101    config.providers.insert(
1102        "vertex".to_string(),
1103        ProviderDef {
1104            base_url: "https://aiplatform.googleapis.com/v1".to_string(),
1105            base_url_env: Some("VERTEX_AI_BASE_URL".to_string()),
1106            auth_style: "bearer".to_string(),
1107            auth_env: AuthEnv::Multiple(vec![
1108                "VERTEX_AI_ACCESS_TOKEN".to_string(),
1109                "GOOGLE_OAUTH_ACCESS_TOKEN".to_string(),
1110                "GOOGLE_APPLICATION_CREDENTIALS".to_string(),
1111            ]),
1112            chat_endpoint:
1113                "/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent"
1114                    .to_string(),
1115            features: vec!["native_tools".to_string()],
1116            cost_per_1k_in: Some(0.00125),
1117            cost_per_1k_out: Some(0.005),
1118            latency_p50_ms: Some(2100),
1119            ..Default::default()
1120        },
1121    );
1122
1123    // Local OpenAI-compatible server
1124    config.providers.insert(
1125        "local".to_string(),
1126        ProviderDef {
1127            base_url: "http://localhost:8000".to_string(),
1128            base_url_env: Some("LOCAL_LLM_BASE_URL".to_string()),
1129            auth_style: "none".to_string(),
1130            chat_endpoint: "/v1/chat/completions".to_string(),
1131            completion_endpoint: Some("/v1/completions".to_string()),
1132            healthcheck: Some(HealthcheckDef {
1133                method: "GET".to_string(),
1134                path: Some("/v1/models".to_string()),
1135                url: None,
1136                body: None,
1137            }),
1138            cost_per_1k_in: Some(0.0),
1139            cost_per_1k_out: Some(0.0),
1140            latency_p50_ms: Some(900),
1141            ..Default::default()
1142        },
1143    );
1144
1145    // Apple Silicon MLX OpenAI-compatible server. Harn owns readiness
1146    // probing; hosts that want script-based auto-start should launch the
1147    // process first, then call Harn again to verify readiness.
1148    config.providers.insert(
1149        "mlx".to_string(),
1150        ProviderDef {
1151            base_url: "http://127.0.0.1:8002".to_string(),
1152            base_url_env: Some("MLX_BASE_URL".to_string()),
1153            auth_style: "none".to_string(),
1154            chat_endpoint: "/v1/chat/completions".to_string(),
1155            completion_endpoint: Some("/v1/completions".to_string()),
1156            healthcheck: Some(HealthcheckDef {
1157                method: "GET".to_string(),
1158                path: Some("/v1/models".to_string()),
1159                url: None,
1160                body: None,
1161            }),
1162            cost_per_1k_in: Some(0.0),
1163            cost_per_1k_out: Some(0.0),
1164            latency_p50_ms: Some(900),
1165            ..Default::default()
1166        },
1167    );
1168
1169    // vLLM OpenAI-compatible server.
1170    config.providers.insert(
1171        "vllm".to_string(),
1172        ProviderDef {
1173            base_url: "http://localhost:8000".to_string(),
1174            base_url_env: Some("VLLM_BASE_URL".to_string()),
1175            auth_style: "none".to_string(),
1176            chat_endpoint: "/v1/chat/completions".to_string(),
1177            completion_endpoint: Some("/v1/completions".to_string()),
1178            healthcheck: Some(HealthcheckDef {
1179                method: "GET".to_string(),
1180                path: Some("/v1/models".to_string()),
1181                url: None,
1182                body: None,
1183            }),
1184            cost_per_1k_in: Some(0.0),
1185            cost_per_1k_out: Some(0.0),
1186            latency_p50_ms: Some(800),
1187            ..Default::default()
1188        },
1189    );
1190
1191    // HuggingFace Text Generation Inference OpenAI-compatible endpoint.
1192    config.providers.insert(
1193        "tgi".to_string(),
1194        ProviderDef {
1195            base_url: "http://localhost:8080".to_string(),
1196            base_url_env: Some("TGI_BASE_URL".to_string()),
1197            auth_style: "none".to_string(),
1198            chat_endpoint: "/v1/chat/completions".to_string(),
1199            completion_endpoint: Some("/v1/completions".to_string()),
1200            healthcheck: Some(HealthcheckDef {
1201                method: "GET".to_string(),
1202                path: Some("/health".to_string()),
1203                url: None,
1204                body: None,
1205            }),
1206            cost_per_1k_in: Some(0.0),
1207            cost_per_1k_out: Some(0.0),
1208            latency_p50_ms: Some(950),
1209            ..Default::default()
1210        },
1211    );
1212
1213    // Default inference rules
1214    config.inference_rules = vec![
1215        InferenceRule {
1216            pattern: Some("claude-*".to_string()),
1217            contains: None,
1218            exact: None,
1219            provider: "anthropic".to_string(),
1220        },
1221        InferenceRule {
1222            pattern: Some("gpt-*".to_string()),
1223            contains: None,
1224            exact: None,
1225            provider: "openai".to_string(),
1226        },
1227        InferenceRule {
1228            pattern: Some("o1*".to_string()),
1229            contains: None,
1230            exact: None,
1231            provider: "openai".to_string(),
1232        },
1233        InferenceRule {
1234            pattern: Some("o3*".to_string()),
1235            contains: None,
1236            exact: None,
1237            provider: "openai".to_string(),
1238        },
1239        InferenceRule {
1240            pattern: Some("o4*".to_string()),
1241            contains: None,
1242            exact: None,
1243            provider: "openai".to_string(),
1244        },
1245        InferenceRule {
1246            pattern: Some("anthropic.claude-*".to_string()),
1247            contains: None,
1248            exact: None,
1249            provider: "bedrock".to_string(),
1250        },
1251        InferenceRule {
1252            pattern: Some("meta.llama*".to_string()),
1253            contains: None,
1254            exact: None,
1255            provider: "bedrock".to_string(),
1256        },
1257        InferenceRule {
1258            pattern: Some("amazon.*".to_string()),
1259            contains: None,
1260            exact: None,
1261            provider: "bedrock".to_string(),
1262        },
1263        InferenceRule {
1264            pattern: Some("mistral.*".to_string()),
1265            contains: None,
1266            exact: None,
1267            provider: "bedrock".to_string(),
1268        },
1269        InferenceRule {
1270            pattern: Some("cohere.*".to_string()),
1271            contains: None,
1272            exact: None,
1273            provider: "bedrock".to_string(),
1274        },
1275        InferenceRule {
1276            pattern: Some("gemini-*".to_string()),
1277            contains: None,
1278            exact: None,
1279            provider: "gemini".to_string(),
1280        },
1281    ];
1282
1283    // Default tier rules
1284    config.tier_rules = vec![
1285        TierRule {
1286            contains: Some("9b".to_string()),
1287            pattern: None,
1288            exact: None,
1289            tier: "small".to_string(),
1290        },
1291        TierRule {
1292            contains: Some("a3b".to_string()),
1293            pattern: None,
1294            exact: None,
1295            tier: "small".to_string(),
1296        },
1297        TierRule {
1298            contains: Some("gemma-4-e2b".to_string()),
1299            pattern: None,
1300            exact: None,
1301            tier: "small".to_string(),
1302        },
1303        TierRule {
1304            contains: Some("gemma-4-e4b".to_string()),
1305            pattern: None,
1306            exact: None,
1307            tier: "small".to_string(),
1308        },
1309        TierRule {
1310            contains: Some("gemma-4-26b".to_string()),
1311            pattern: None,
1312            exact: None,
1313            tier: "mid".to_string(),
1314        },
1315        TierRule {
1316            contains: Some("gemma-4-31b".to_string()),
1317            pattern: None,
1318            exact: None,
1319            tier: "frontier".to_string(),
1320        },
1321        TierRule {
1322            contains: Some("gemma4:26b".to_string()),
1323            pattern: None,
1324            exact: None,
1325            tier: "mid".to_string(),
1326        },
1327        TierRule {
1328            contains: Some("gemma4:31b".to_string()),
1329            pattern: None,
1330            exact: None,
1331            tier: "frontier".to_string(),
1332        },
1333        TierRule {
1334            pattern: Some("claude-*".to_string()),
1335            contains: None,
1336            exact: None,
1337            tier: "frontier".to_string(),
1338        },
1339        TierRule {
1340            exact: Some("gpt-4o".to_string()),
1341            contains: None,
1342            pattern: None,
1343            tier: "frontier".to_string(),
1344        },
1345    ];
1346
1347    config.tier_defaults = TierDefaults {
1348        default: "mid".to_string(),
1349    };
1350
1351    config.aliases.insert(
1352        "frontier".to_string(),
1353        AliasDef {
1354            id: "claude-sonnet-4-20250514".to_string(),
1355            provider: "anthropic".to_string(),
1356            tool_format: None,
1357        },
1358    );
1359    config.aliases.insert(
1360        "tier/frontier".to_string(),
1361        AliasDef {
1362            id: "claude-sonnet-4-20250514".to_string(),
1363            provider: "anthropic".to_string(),
1364            tool_format: None,
1365        },
1366    );
1367    config.aliases.insert(
1368        "mid".to_string(),
1369        AliasDef {
1370            id: "gpt-4o-mini".to_string(),
1371            provider: "openai".to_string(),
1372            tool_format: None,
1373        },
1374    );
1375    config.aliases.insert(
1376        "tier/mid".to_string(),
1377        AliasDef {
1378            id: "gpt-4o-mini".to_string(),
1379            provider: "openai".to_string(),
1380            tool_format: None,
1381        },
1382    );
1383    config.aliases.insert(
1384        "small".to_string(),
1385        AliasDef {
1386            id: "Qwen/Qwen3.5-9B".to_string(),
1387            provider: "openrouter".to_string(),
1388            tool_format: None,
1389        },
1390    );
1391    config.aliases.insert(
1392        "tier/small".to_string(),
1393        AliasDef {
1394            id: "Qwen/Qwen3.5-9B".to_string(),
1395            provider: "openrouter".to_string(),
1396            tool_format: None,
1397        },
1398    );
1399    config.aliases.insert(
1400        "local-gemma4".to_string(),
1401        AliasDef {
1402            id: "gemma-4-26b-a4b-it".to_string(),
1403            provider: "local".to_string(),
1404            tool_format: None,
1405        },
1406    );
1407    config.aliases.insert(
1408        "local-gemma4-26b".to_string(),
1409        AliasDef {
1410            id: "gemma-4-26b-a4b-it".to_string(),
1411            provider: "local".to_string(),
1412            tool_format: None,
1413        },
1414    );
1415    config.aliases.insert(
1416        "local-gemma4-31b".to_string(),
1417        AliasDef {
1418            id: "gemma-4-31b-it".to_string(),
1419            provider: "local".to_string(),
1420            tool_format: None,
1421        },
1422    );
1423    config.aliases.insert(
1424        "local-gemma4-e4b".to_string(),
1425        AliasDef {
1426            id: "gemma-4-e4b-it".to_string(),
1427            provider: "local".to_string(),
1428            tool_format: None,
1429        },
1430    );
1431    config.aliases.insert(
1432        "local-gemma4-e2b".to_string(),
1433        AliasDef {
1434            id: "gemma-4-e2b-it".to_string(),
1435            provider: "local".to_string(),
1436            tool_format: None,
1437        },
1438    );
1439    config.aliases.insert(
1440        "mlx-qwen36-27b".to_string(),
1441        AliasDef {
1442            id: "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string(),
1443            provider: "mlx".to_string(),
1444            tool_format: None,
1445        },
1446    );
1447
1448    config.qc_defaults.extend(BTreeMap::from([
1449        (
1450            "anthropic".to_string(),
1451            "claude-3-5-haiku-20241022".to_string(),
1452        ),
1453        ("openai".to_string(), "gpt-4o-mini".to_string()),
1454        (
1455            "openrouter".to_string(),
1456            "google/gemini-2.5-flash".to_string(),
1457        ),
1458        ("ollama".to_string(), "llama3.2".to_string()),
1459        ("local".to_string(), "gpt-4o".to_string()),
1460    ]));
1461
1462    config.models.extend(BTreeMap::from([
1463        (
1464            "claude-sonnet-4-20250514".to_string(),
1465            ModelDef {
1466                name: "Claude Sonnet 4".to_string(),
1467                provider: "anthropic".to_string(),
1468                context_window: 200_000,
1469                stream_timeout: None,
1470                capabilities: vec![
1471                    "tools".to_string(),
1472                    "streaming".to_string(),
1473                    "prompt_caching".to_string(),
1474                    "thinking".to_string(),
1475                ],
1476                pricing: Some(ModelPricing {
1477                    input_per_mtok: 3.0,
1478                    output_per_mtok: 15.0,
1479                    cache_read_per_mtok: Some(0.3),
1480                    cache_write_per_mtok: Some(3.75),
1481                }),
1482            },
1483        ),
1484        (
1485            "gpt-4o-mini".to_string(),
1486            ModelDef {
1487                name: "GPT-4o Mini".to_string(),
1488                provider: "openai".to_string(),
1489                context_window: 128_000,
1490                stream_timeout: None,
1491                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1492                pricing: Some(ModelPricing {
1493                    input_per_mtok: 0.15,
1494                    output_per_mtok: 0.60,
1495                    cache_read_per_mtok: None,
1496                    cache_write_per_mtok: None,
1497                }),
1498            },
1499        ),
1500        (
1501            "Qwen/Qwen3.5-9B".to_string(),
1502            ModelDef {
1503                name: "Qwen3.5 9B".to_string(),
1504                provider: "openrouter".to_string(),
1505                context_window: 131_072,
1506                stream_timeout: None,
1507                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1508                pricing: None,
1509            },
1510        ),
1511        (
1512            "llama3.2".to_string(),
1513            ModelDef {
1514                name: "Llama 3.2".to_string(),
1515                provider: "ollama".to_string(),
1516                context_window: 32_000,
1517                stream_timeout: Some(300.0),
1518                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1519                pricing: None,
1520            },
1521        ),
1522    ]));
1523
1524    config
1525}
1526
1527#[cfg(test)]
1528mod tests {
1529    use super::*;
1530
1531    fn reset_overrides() {
1532        clear_user_overrides();
1533    }
1534
1535    #[test]
1536    fn test_glob_match_prefix() {
1537        assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
1538        assert!(glob_match("gpt-*", "gpt-4o"));
1539        assert!(!glob_match("claude-*", "gpt-4o"));
1540    }
1541
1542    #[test]
1543    fn test_glob_match_suffix() {
1544        assert!(glob_match("*-latest", "llama3.2-latest"));
1545        assert!(!glob_match("*-latest", "llama3.2"));
1546    }
1547
1548    #[test]
1549    fn test_glob_match_middle() {
1550        assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
1551        assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
1552    }
1553
1554    #[test]
1555    fn test_glob_match_exact() {
1556        assert!(glob_match("gpt-4o", "gpt-4o"));
1557        assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
1558    }
1559
1560    #[test]
1561    fn test_infer_provider_from_defaults() {
1562        let _guard = crate::llm::env_lock().lock().expect("env lock");
1563        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1564        unsafe {
1565            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1566        }
1567
1568        assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
1569        assert_eq!(infer_provider("gpt-4o"), "openai");
1570        assert_eq!(infer_provider("o1-preview"), "openai");
1571        assert_eq!(infer_provider("o3-mini"), "openai");
1572        assert_eq!(infer_provider("o4-mini"), "openai");
1573        assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
1574        assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
1575        assert_eq!(infer_provider("llama3.2:latest"), "ollama");
1576        assert_eq!(infer_provider("unknown-model"), "anthropic");
1577
1578        unsafe {
1579            match prev_default_provider {
1580                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1581                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1582            }
1583        }
1584    }
1585
1586    #[test]
1587    fn test_infer_provider_prefix_rules() {
1588        assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
1589        assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
1590        // Even when the id also contains `/`, the local transport prefix wins.
1591        assert_eq!(infer_provider("local:owner/model"), "ollama");
1592        assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
1593    }
1594
1595    #[test]
1596    fn test_openrouter_inference_requires_one_slash() {
1597        let _guard = crate::llm::env_lock().lock().expect("env lock");
1598        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1599        unsafe {
1600            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1601        }
1602
1603        assert_eq!(infer_provider("org/model"), "openrouter");
1604        assert_eq!(infer_provider("org/team/model"), "anthropic");
1605
1606        unsafe {
1607            match prev_default_provider {
1608                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1609                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1610            }
1611        }
1612    }
1613
1614    #[test]
1615    fn test_resolve_model_info_normalizes_provider_prefixes() {
1616        let local = resolve_model_info("local:gemma-4-e4b-it");
1617        assert_eq!(local.id, "gemma-4-e4b-it");
1618        assert_eq!(local.provider, "ollama");
1619
1620        let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
1621        assert_eq!(ollama.id, "qwen3:30b-a3b");
1622        assert_eq!(ollama.provider, "ollama");
1623
1624        let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
1625        assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
1626        assert_eq!(hf.provider, "huggingface");
1627    }
1628
1629    #[test]
1630    fn test_model_tier_from_defaults() {
1631        assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
1632        assert_eq!(model_tier("gpt-4o"), "frontier");
1633        assert_eq!(model_tier("Qwen3.5-9B"), "small");
1634        assert_eq!(model_tier("deepseek-v3"), "mid");
1635    }
1636
1637    #[test]
1638    fn test_resolve_model_unknown_alias() {
1639        let (id, provider) = resolve_model("gpt-4o");
1640        assert_eq!(id, "gpt-4o");
1641        assert!(provider.is_none());
1642    }
1643
1644    #[test]
1645    fn test_provider_names() {
1646        let names = provider_names();
1647        assert!(names.len() >= 7);
1648        assert!(names.contains(&"anthropic".to_string()));
1649        assert!(names.contains(&"together".to_string()));
1650        assert!(names.contains(&"local".to_string()));
1651        assert!(names.contains(&"mlx".to_string()));
1652        assert!(names.contains(&"openai".to_string()));
1653        assert!(names.contains(&"ollama".to_string()));
1654        assert!(names.contains(&"bedrock".to_string()));
1655        assert!(names.contains(&"azure_openai".to_string()));
1656        assert!(names.contains(&"vertex".to_string()));
1657    }
1658
1659    #[test]
1660    fn test_resolve_tier_model_default_aliases() {
1661        let (model, provider) = resolve_tier_model("frontier", None).unwrap();
1662        assert_eq!(model, "claude-sonnet-4-20250514");
1663        assert_eq!(provider, "anthropic");
1664
1665        let (model, provider) = resolve_tier_model("small", None).unwrap();
1666        assert_eq!(model, "Qwen/Qwen3.5-9B");
1667        assert_eq!(provider, "openrouter");
1668    }
1669
1670    #[test]
1671    fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
1672        let (model, provider) = resolve_tier_model("mid", Some("openai")).unwrap();
1673        assert_eq!(model, "gpt-4o-mini");
1674        assert_eq!(provider, "openai");
1675    }
1676
1677    #[test]
1678    fn test_provider_config_anthropic() {
1679        let pdef = provider_config("anthropic").unwrap();
1680        assert_eq!(pdef.auth_style, "header");
1681        assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
1682    }
1683
1684    #[test]
1685    fn test_provider_config_mlx() {
1686        let pdef = provider_config("mlx").unwrap();
1687        assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
1688        assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
1689        assert_eq!(
1690            pdef.healthcheck.unwrap().path.as_deref(),
1691            Some("/v1/models")
1692        );
1693
1694        let (model, provider) = resolve_model("mlx-qwen36-27b");
1695        assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
1696        assert_eq!(provider.as_deref(), Some("mlx"));
1697    }
1698
1699    #[test]
1700    fn test_enterprise_provider_defaults_and_inference() {
1701        let bedrock = provider_config("bedrock").unwrap();
1702        assert_eq!(bedrock.auth_style, "aws_sigv4");
1703        assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
1704        assert_eq!(
1705            infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
1706            "bedrock"
1707        );
1708        assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
1709
1710        let azure = provider_config("azure_openai").unwrap();
1711        assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
1712        assert_eq!(
1713            auth_env_names(&azure.auth_env),
1714            vec![
1715                "AZURE_OPENAI_API_KEY".to_string(),
1716                "AZURE_OPENAI_AD_TOKEN".to_string(),
1717                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1718            ]
1719        );
1720
1721        let vertex = provider_config("vertex").unwrap();
1722        assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
1723        assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
1724    }
1725
1726    #[test]
1727    fn test_default_provider_env_override_for_unknown_model() {
1728        let _guard = crate::llm::env_lock().lock().expect("env lock");
1729        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1730        unsafe {
1731            std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
1732        }
1733
1734        let inference = infer_provider_detail("unknown-model");
1735
1736        unsafe {
1737            match prev_default_provider {
1738                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1739                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1740            }
1741        }
1742
1743        assert_eq!(inference.provider, "openai");
1744        assert_eq!(
1745            inference.source,
1746            crate::llm::provider::ProviderInferenceSource::DefaultFallback
1747        );
1748    }
1749
1750    #[test]
1751    fn test_resolve_base_url_no_env() {
1752        let pdef = ProviderDef {
1753            base_url: "https://example.com".to_string(),
1754            ..Default::default()
1755        };
1756        assert_eq!(resolve_base_url(&pdef), "https://example.com");
1757    }
1758
1759    #[test]
1760    fn test_default_config_roundtrip() {
1761        let config = default_config();
1762        assert!(!config.providers.is_empty());
1763        assert!(!config.inference_rules.is_empty());
1764        assert!(!config.tier_rules.is_empty());
1765        assert_eq!(config.tier_defaults.default, "mid");
1766    }
1767
1768    #[test]
1769    fn test_model_params_empty() {
1770        let params = model_params("claude-sonnet-4-20250514");
1771        assert!(params.is_empty());
1772    }
1773
1774    #[test]
1775    fn test_user_overrides_add_provider_and_alias() {
1776        reset_overrides();
1777        let mut overlay = ProvidersConfig::default();
1778        overlay.providers.insert(
1779            "acme".to_string(),
1780            ProviderDef {
1781                base_url: "https://llm.acme.test/v1".to_string(),
1782                chat_endpoint: "/chat/completions".to_string(),
1783                ..Default::default()
1784            },
1785        );
1786        overlay.aliases.insert(
1787            "acme-fast".to_string(),
1788            AliasDef {
1789                id: "acme/model-fast".to_string(),
1790                provider: "acme".to_string(),
1791                tool_format: Some("native".to_string()),
1792            },
1793        );
1794        set_user_overrides(Some(overlay));
1795
1796        let (model, provider) = resolve_model("acme-fast");
1797        assert_eq!(model, "acme/model-fast");
1798        assert_eq!(provider.as_deref(), Some("acme"));
1799        assert!(provider_names().contains(&"acme".to_string()));
1800        assert_eq!(
1801            provider_config("acme").map(|provider| provider.base_url),
1802            Some("https://llm.acme.test/v1".to_string())
1803        );
1804
1805        reset_overrides();
1806    }
1807
1808    #[test]
1809    fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
1810        reset_overrides();
1811        let mut overlay = ProvidersConfig::default();
1812        overlay.models.insert(
1813            "acme/model-fast".to_string(),
1814            ModelDef {
1815                name: "Acme Fast".to_string(),
1816                provider: "acme".to_string(),
1817                context_window: 65_536,
1818                stream_timeout: Some(42.0),
1819                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1820                pricing: Some(ModelPricing {
1821                    input_per_mtok: 1.25,
1822                    output_per_mtok: 2.5,
1823                    cache_read_per_mtok: Some(0.25),
1824                    cache_write_per_mtok: None,
1825                }),
1826            },
1827        );
1828        overlay
1829            .qc_defaults
1830            .insert("acme".to_string(), "acme/model-cheap".to_string());
1831        set_user_overrides(Some(overlay));
1832
1833        let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
1834        assert_eq!(entry.context_window, 65_536);
1835        assert_eq!(
1836            entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
1837            Some(1.25)
1838        );
1839        assert_eq!(
1840            pricing_per_1k_for("acme", "acme/model-fast"),
1841            Some((0.00125, 0.0025))
1842        );
1843        assert_eq!(
1844            qc_default_model("acme").as_deref(),
1845            Some("acme/model-cheap")
1846        );
1847
1848        reset_overrides();
1849    }
1850
1851    #[test]
1852    fn test_user_overrides_prepend_inference_rules() {
1853        reset_overrides();
1854        let mut overlay = ProvidersConfig::default();
1855        overlay.inference_rules.push(InferenceRule {
1856            pattern: Some("internal-*".to_string()),
1857            contains: None,
1858            exact: None,
1859            provider: "openai".to_string(),
1860        });
1861        set_user_overrides(Some(overlay));
1862
1863        assert_eq!(infer_provider("internal-foo"), "openai");
1864
1865        reset_overrides();
1866    }
1867}
harn_vm/llm_config.rs

harn_vm/
llm_config.rs