harn_vm/
llm_config.rs

1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::BTreeMap;
4use std::sync::OnceLock;
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8
9thread_local! {
10    /// Thread-local provider config overlays installed by the CLI after it
11    /// reads the nearest `harn.toml` plus any installed package manifests.
12    /// Kept thread-local so tests and multi-VM hosts can scope extensions to
13    /// the current run without mutating the process-wide default config.
14    static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
15}
16
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct ProvidersConfig {
19    #[serde(default)]
20    pub default_provider: Option<String>,
21    #[serde(default)]
22    pub providers: BTreeMap<String, ProviderDef>,
23    #[serde(default)]
24    pub aliases: BTreeMap<String, AliasDef>,
25    #[serde(default)]
26    pub models: BTreeMap<String, ModelDef>,
27    #[serde(default)]
28    pub qc_defaults: BTreeMap<String, String>,
29    #[serde(default)]
30    pub inference_rules: Vec<InferenceRule>,
31    #[serde(default)]
32    pub tier_rules: Vec<TierRule>,
33    #[serde(default)]
34    pub tier_defaults: TierDefaults,
35    #[serde(default)]
36    pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
37}
38
39impl ProvidersConfig {
40    pub fn is_empty(&self) -> bool {
41        self.default_provider.is_none()
42            && self.providers.is_empty()
43            && self.aliases.is_empty()
44            && self.models.is_empty()
45            && self.qc_defaults.is_empty()
46            && self.inference_rules.is_empty()
47            && self.tier_rules.is_empty()
48            && self.model_defaults.is_empty()
49            && self.tier_defaults.default == default_mid()
50    }
51
52    pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
53        self.providers.extend(overlay.providers.clone());
54        self.aliases.extend(overlay.aliases.clone());
55        self.models.extend(overlay.models.clone());
56        self.qc_defaults.extend(overlay.qc_defaults.clone());
57
58        if overlay.default_provider.is_some() {
59            self.default_provider = overlay.default_provider.clone();
60        }
61
62        if !overlay.inference_rules.is_empty() {
63            let mut merged = overlay.inference_rules.clone();
64            merged.extend(self.inference_rules.clone());
65            self.inference_rules = merged;
66        }
67
68        if !overlay.tier_rules.is_empty() {
69            let mut merged = overlay.tier_rules.clone();
70            merged.extend(self.tier_rules.clone());
71            self.tier_rules = merged;
72        }
73
74        if overlay.tier_defaults.default != default_mid() {
75            self.tier_defaults = overlay.tier_defaults.clone();
76        }
77
78        for (pattern, defaults) in &overlay.model_defaults {
79            self.model_defaults
80                .entry(pattern.clone())
81                .or_default()
82                .extend(defaults.clone());
83        }
84    }
85}
86
87#[derive(Debug, Clone, Deserialize)]
88pub struct ProviderDef {
89    #[serde(default)]
90    pub display_name: Option<String>,
91    #[serde(default)]
92    pub icon: Option<String>,
93    pub base_url: String,
94    #[serde(default)]
95    pub base_url_env: Option<String>,
96    #[serde(default = "default_bearer")]
97    pub auth_style: String,
98    #[serde(default)]
99    pub auth_header: Option<String>,
100    #[serde(default)]
101    pub auth_env: AuthEnv,
102    #[serde(default)]
103    pub extra_headers: BTreeMap<String, String>,
104    #[serde(default)]
105    pub chat_endpoint: String,
106    #[serde(default)]
107    pub completion_endpoint: Option<String>,
108    #[serde(default)]
109    pub healthcheck: Option<HealthcheckDef>,
110    #[serde(default)]
111    pub features: Vec<String>,
112    /// Fallback provider name to try if this provider fails.
113    #[serde(default)]
114    pub fallback: Option<String>,
115    /// Number of retries before falling back (default 0).
116    #[serde(default)]
117    pub retry_count: Option<u32>,
118    /// Delay between retries in milliseconds (default 1000).
119    #[serde(default)]
120    pub retry_delay_ms: Option<u64>,
121    /// Maximum requests per minute. None = unlimited.
122    #[serde(default)]
123    pub rpm: Option<u32>,
124    /// Provider/catalog pricing in USD per 1k input tokens.
125    #[serde(default)]
126    pub cost_per_1k_in: Option<f64>,
127    /// Provider/catalog pricing in USD per 1k output tokens.
128    #[serde(default)]
129    pub cost_per_1k_out: Option<f64>,
130    /// Observed or configured p50 latency in milliseconds.
131    #[serde(default)]
132    pub latency_p50_ms: Option<u64>,
133}
134
135impl Default for ProviderDef {
136    fn default() -> Self {
137        Self {
138            display_name: None,
139            icon: None,
140            base_url: String::new(),
141            base_url_env: None,
142            auth_style: default_bearer(),
143            auth_header: None,
144            auth_env: AuthEnv::None,
145            extra_headers: BTreeMap::new(),
146            chat_endpoint: String::new(),
147            completion_endpoint: None,
148            healthcheck: None,
149            features: Vec::new(),
150            fallback: None,
151            retry_count: None,
152            retry_delay_ms: None,
153            rpm: None,
154            cost_per_1k_in: None,
155            cost_per_1k_out: None,
156            latency_p50_ms: None,
157        }
158    }
159}
160
161fn default_bearer() -> String {
162    "bearer".to_string()
163}
164
165/// Auth env var name(s) for the provider. Can be a single string or an array
166/// (tried in order until one is set).
167#[derive(Debug, Clone, Deserialize, Default)]
168#[serde(untagged)]
169pub enum AuthEnv {
170    #[default]
171    None,
172    Single(String),
173    Multiple(Vec<String>),
174}
175
176#[derive(Debug, Clone, Deserialize)]
177pub struct HealthcheckDef {
178    pub method: String,
179    #[serde(default)]
180    pub path: Option<String>,
181    #[serde(default)]
182    pub url: Option<String>,
183    #[serde(default)]
184    pub body: Option<String>,
185}
186
187#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
188pub struct AliasDef {
189    pub id: String,
190    pub provider: String,
191    /// Per-model tool format override: "native" or "text". When set, this
192    /// takes precedence over the provider-level default. Models with strong
193    /// tool-calling fine-tuning (Kimi-K2.5, GPT-4o) should use "native";
194    /// models better served by text-based tool calling use "text".
195    #[serde(default)]
196    pub tool_format: Option<String>,
197}
198
199#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
200pub struct ModelPricing {
201    pub input_per_mtok: f64,
202    pub output_per_mtok: f64,
203    #[serde(default)]
204    pub cache_read_per_mtok: Option<f64>,
205    #[serde(default)]
206    pub cache_write_per_mtok: Option<f64>,
207}
208
209#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
210pub struct ModelDef {
211    pub name: String,
212    pub provider: String,
213    pub context_window: u64,
214    #[serde(default)]
215    pub runtime_context_window: Option<u64>,
216    #[serde(default)]
217    pub stream_timeout: Option<f64>,
218    #[serde(default)]
219    pub capabilities: Vec<String>,
220    #[serde(default)]
221    pub pricing: Option<ModelPricing>,
222}
223
224#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
225pub struct ResolvedModel {
226    pub id: String,
227    pub provider: String,
228    pub alias: Option<String>,
229    pub tool_format: String,
230    pub tier: String,
231}
232
233#[derive(Debug, Clone, Deserialize)]
234pub struct InferenceRule {
235    #[serde(default)]
236    pub pattern: Option<String>,
237    #[serde(default)]
238    pub contains: Option<String>,
239    #[serde(default)]
240    pub exact: Option<String>,
241    pub provider: String,
242}
243
244#[derive(Debug, Clone, Deserialize)]
245pub struct TierRule {
246    #[serde(default)]
247    pub pattern: Option<String>,
248    #[serde(default)]
249    pub contains: Option<String>,
250    #[serde(default)]
251    pub exact: Option<String>,
252    pub tier: String,
253}
254
255#[derive(Debug, Clone, Deserialize)]
256pub struct TierDefaults {
257    #[serde(default = "default_mid")]
258    pub default: String,
259}
260
261impl Default for TierDefaults {
262    fn default() -> Self {
263        Self {
264            default: default_mid(),
265        }
266    }
267}
268
269fn default_mid() -> String {
270    "mid".to_string()
271}
272
273/// Load and cache the providers config. Called once at VM startup.
274pub fn load_config() -> &'static ProvidersConfig {
275    CONFIG.get_or_init(|| {
276        let mut config = default_config();
277        let verbose_config_logging = matches!(
278            std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
279            Some("1" | "true" | "TRUE" | "yes" | "YES")
280        ) || matches!(
281            std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
282            Some("1" | "true" | "TRUE" | "yes" | "YES")
283        );
284        if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
285            if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
286                config.merge_from(&overlay);
287                let _ = CONFIG_PATH.set(path);
288                return config;
289            }
290        }
291        if let Some(home) = dirs_or_home() {
292            let path = format!("{home}/.config/harn/providers.toml");
293            if let Some(overlay) = read_external_config(&path, false) {
294                config.merge_from(&overlay);
295                let _ = CONFIG_PATH.set(path);
296                return config;
297            }
298        }
299        config
300    })
301}
302
303fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
304    match std::fs::read_to_string(path) {
305        Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
306            Ok(config) => {
307                if verbose {
308                    eprintln!(
309                        "[llm_config] Loaded {} providers, {} aliases from {}",
310                        config.providers.len(),
311                        config.aliases.len(),
312                        path
313                    );
314                }
315                Some(config)
316            }
317            Err(error) => {
318                eprintln!("[llm_config] TOML parse error in {}: {}", path, error);
319                None
320            }
321        },
322        Err(error) => {
323            if verbose {
324                eprintln!("[llm_config] Cannot read {}: {}", path, error);
325            }
326            None
327        }
328    }
329}
330
331/// Returns the filesystem path of the currently-loaded providers config, if
332/// any. Returns `None` when built-in defaults are active.
333pub fn loaded_config_path() -> Option<std::path::PathBuf> {
334    // Force lazy init so CONFIG_PATH is populated if a file was loaded.
335    let _ = load_config();
336    CONFIG_PATH.get().map(std::path::PathBuf::from)
337}
338
339/// Install per-run provider config overlays. The overlay uses the same shape as
340/// `providers.toml`, but lives under `[llm]` in `harn.toml` and package
341/// manifests. Passing `None` clears the overlay.
342pub fn set_user_overrides(config: Option<ProvidersConfig>) {
343    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
344}
345
346/// Clear per-run provider config overlays.
347pub fn clear_user_overrides() {
348    set_user_overrides(None);
349}
350
351fn effective_config() -> ProvidersConfig {
352    let mut merged = load_config().clone();
353    USER_OVERRIDES.with(|cell| {
354        if let Some(overlay) = cell.borrow().as_ref() {
355            merged.merge_from(overlay);
356        }
357    });
358    merged
359}
360
361/// Resolve a model alias to (model_id, provider_name).
362pub fn resolve_model(alias: &str) -> (String, Option<String>) {
363    let config = effective_config();
364    if let Some(a) = config.aliases.get(alias) {
365        return (a.id.clone(), Some(a.provider.clone()));
366    }
367    (normalize_model_id(alias), None)
368}
369
370/// Strip host/provider selector prefixes that identify transport, not the
371/// provider-native model id. This mirrors Burin's existing normalization so
372/// `ollama:qwen3:30b` reaches Ollama as `qwen3:30b` instead of an invalid
373/// model named `ollama`.
374pub fn normalize_model_id(raw: &str) -> String {
375    for prefix in ["ollama:", "local:", "huggingface:", "hf:"] {
376        if let Some(stripped) = raw.strip_prefix(prefix) {
377            return stripped.to_string();
378        }
379    }
380    raw.to_string()
381}
382
383/// Resolve an alias or selector into the complete catalog identity hosts need:
384/// provider inference, prefix-normalized model id, default tool format, and tier.
385pub fn resolve_model_info(selector: &str) -> ResolvedModel {
386    let config = effective_config();
387    if let Some(alias) = config.aliases.get(selector) {
388        let id = alias.id.clone();
389        let provider = alias.provider.clone();
390        let tool_format = alias
391            .tool_format
392            .clone()
393            .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
394        return ResolvedModel {
395            tier: model_tier_with_config(&config, &id),
396            id,
397            provider,
398            alias: Some(selector.to_string()),
399            tool_format,
400        };
401    }
402
403    let provider = infer_provider_with_config(&config, selector).provider;
404    let id = normalize_model_id(selector);
405    let tool_format = default_tool_format_with_config(&config, &id, &provider);
406    let tier = model_tier_with_config(&config, &id);
407    ResolvedModel {
408        id,
409        provider,
410        alias: None,
411        tool_format,
412        tier,
413    }
414}
415
416/// Infer provider from a model ID using inference rules.
417pub fn infer_provider(model_id: &str) -> String {
418    infer_provider_detail(model_id).provider
419}
420
421/// Infer provider from a model ID and retain whether the configured default was used.
422pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
423    let config = effective_config();
424    infer_provider_with_config(&config, model_id)
425}
426
427fn infer_provider_with_config(
428    config: &ProvidersConfig,
429    model_id: &str,
430) -> crate::llm::provider::ProviderInference {
431    if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
432        return crate::llm::provider::ProviderInference::builtin("ollama");
433    }
434    if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
435        return crate::llm::provider::ProviderInference::builtin("huggingface");
436    }
437    for rule in &config.inference_rules {
438        if let Some(exact) = &rule.exact {
439            if model_id == exact {
440                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
441            }
442        }
443        if let Some(pattern) = &rule.pattern {
444            if glob_match(pattern, model_id) {
445                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
446            }
447        }
448        if let Some(substr) = &rule.contains {
449            if model_id.contains(substr.as_str()) {
450                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
451            }
452        }
453    }
454    crate::llm::provider::infer_provider_from_model_id(
455        model_id,
456        &default_provider_with_config(config),
457    )
458}
459
460pub fn default_provider() -> String {
461    let config = effective_config();
462    default_provider_with_config(&config)
463}
464
465fn default_provider_with_config(config: &ProvidersConfig) -> String {
466    std::env::var("HARN_DEFAULT_PROVIDER")
467        .ok()
468        .map(|value| value.trim().to_string())
469        .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
470        .or_else(|| {
471            config
472                .default_provider
473                .as_deref()
474                .map(str::trim)
475                .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
476                .map(str::to_string)
477        })
478        .unwrap_or_else(|| "anthropic".to_string())
479}
480
481/// Get model tier ("small", "mid", "frontier").
482pub fn model_tier(model_id: &str) -> String {
483    let config = effective_config();
484    model_tier_with_config(&config, model_id)
485}
486
487fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
488    for rule in &config.tier_rules {
489        if let Some(exact) = &rule.exact {
490            if model_id == exact {
491                return rule.tier.clone();
492            }
493        }
494        if let Some(pattern) = &rule.pattern {
495            if glob_match(pattern, model_id) {
496                return rule.tier.clone();
497            }
498        }
499        if let Some(substr) = &rule.contains {
500            if model_id.contains(substr.as_str()) {
501                return rule.tier.clone();
502            }
503        }
504    }
505    let lower = model_id.to_lowercase();
506    if lower.contains("9b") || lower.contains("a3b") {
507        return "small".to_string();
508    }
509    if lower.starts_with("claude-") || lower == "gpt-4o" {
510        return "frontier".to_string();
511    }
512    config.tier_defaults.default.clone()
513}
514
515/// Get provider config for resolving base_url, auth, etc.
516pub fn provider_config(name: &str) -> Option<ProviderDef> {
517    effective_config().providers.get(name).cloned()
518}
519
520/// Get model-specific default parameters (temperature, etc.).
521/// Matches glob patterns in model_defaults keys.
522pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
523    let config = effective_config();
524    let mut params = BTreeMap::new();
525    for (pattern, defaults) in &config.model_defaults {
526        if glob_match(pattern, model_id) {
527            for (k, v) in defaults {
528                params.insert(k.clone(), v.clone());
529            }
530        }
531    }
532    params
533}
534
535/// Get list of configured provider names.
536pub fn provider_names() -> Vec<String> {
537    effective_config().providers.keys().cloned().collect()
538}
539
540/// Return every configured alias name, sorted deterministically.
541pub fn known_model_names() -> Vec<String> {
542    effective_config().aliases.keys().cloned().collect()
543}
544
545pub fn alias_entries() -> Vec<(String, AliasDef)> {
546    effective_config().aliases.into_iter().collect()
547}
548
549/// Return every configured model-catalog entry, sorted by provider then id.
550pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
551    let mut entries: Vec<_> = effective_config()
552        .models
553        .into_iter()
554        .map(|(id, model)| {
555            let provider = model.provider.clone();
556            (
557                id.clone(),
558                with_effective_capability_tags(id, provider, model),
559            )
560        })
561        .collect();
562    entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
563        model_a
564            .provider
565            .cmp(&model_b.provider)
566            .then_with(|| id_a.cmp(id_b))
567    });
568    entries
569}
570
571pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
572    effective_config()
573        .models
574        .get(model_id)
575        .cloned()
576        .map(|model| {
577            let provider = model.provider.clone();
578            with_effective_capability_tags(model_id.to_string(), provider, model)
579        })
580}
581
582pub fn qc_default_model(provider: &str) -> Option<String> {
583    std::env::var("BURIN_QC_MODEL")
584        .ok()
585        .filter(|value| !value.trim().is_empty())
586        .or_else(|| {
587            effective_config()
588                .qc_defaults
589                .get(&provider.to_lowercase())
590                .cloned()
591        })
592}
593
594pub fn default_model_for_provider(provider: &str) -> String {
595    match provider {
596        "local" => std::env::var("LOCAL_LLM_MODEL")
597            .or_else(|_| std::env::var("HARN_LLM_MODEL"))
598            .unwrap_or_else(|_| "gpt-4o".to_string()),
599        "mlx" => std::env::var("MLX_MODEL_ID")
600            .unwrap_or_else(|_| "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string()),
601        "openai" => "gpt-4o".to_string(),
602        "ollama" => "llama3.2".to_string(),
603        "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
604        _ => "claude-sonnet-4-20250514".to_string(),
605    }
606}
607
608pub fn qc_defaults() -> BTreeMap<String, String> {
609    effective_config().qc_defaults
610}
611
612pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
613    effective_config()
614        .models
615        .get(model_id)
616        .and_then(|model| model.pricing.clone())
617}
618
619pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
620    model_pricing_per_mtok(model_id)
621        .map(|pricing| {
622            (
623                pricing.input_per_mtok / 1000.0,
624                pricing.output_per_mtok / 1000.0,
625            )
626        })
627        .or_else(|| {
628            let (input, output, _) = provider_economics(provider);
629            match (input, output) {
630                (Some(input), Some(output)) => Some((input, output)),
631                _ => None,
632            }
633        })
634}
635
636pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
637    match auth_env {
638        AuthEnv::None => Vec::new(),
639        AuthEnv::Single(name) => vec![name.clone()],
640        AuthEnv::Multiple(names) => names.clone(),
641    }
642}
643
644pub fn provider_key_available(provider: &str) -> bool {
645    let Some(pdef) = provider_config(provider) else {
646        return provider == "ollama";
647    };
648    if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
649        return true;
650    }
651    auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
652        std::env::var(env_name)
653            .ok()
654            .is_some_and(|value| !value.trim().is_empty())
655    })
656}
657
658pub fn available_provider_names() -> Vec<String> {
659    provider_names()
660        .into_iter()
661        .filter(|provider| provider_key_available(provider))
662        .collect()
663}
664
665/// Check if a provider advertises a legacy provider-level feature.
666pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
667    provider_config(provider)
668        .map(|p| p.features.iter().any(|f| f == feature))
669        .unwrap_or(false)
670}
671
672/// Provider-level catalog pricing/latency. Model-specific catalog pricing
673/// wins when available; this is the adapter-level fallback used by routing
674/// and portal summaries when a model has no explicit catalog entry.
675pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
676    provider_config(provider)
677        .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
678        .unwrap_or((None, None, None))
679}
680
681/// Resolve the default tool format for a model+provider combination.
682/// Priority: alias `tool_format` (matched by model ID) > provider/model
683/// capability matrix > legacy provider feature > "text".
684pub fn default_tool_format(model: &str, provider: &str) -> String {
685    let config = effective_config();
686    default_tool_format_with_config(&config, model, provider)
687}
688
689fn default_tool_format_with_config(
690    config: &ProvidersConfig,
691    model: &str,
692    provider: &str,
693) -> String {
694    // Aliases match by model ID + provider, or by alias name.
695    for (name, alias) in &config.aliases {
696        let matches = (alias.id == model && alias.provider == provider) || name == model;
697        if matches {
698            if let Some(ref fmt) = alias.tool_format {
699                return fmt.clone();
700            }
701        }
702    }
703    let capability_matrix_native = crate::llm::capabilities::lookup(provider, model).native_tools;
704    let legacy_provider_native = config
705        .providers
706        .get(provider)
707        .map(|p| p.features.iter().any(|f| f == "native_tools"))
708        .unwrap_or(false);
709    if capability_matrix_native || legacy_provider_native {
710        "native".to_string()
711    } else {
712        "text".to_string()
713    }
714}
715
716fn with_effective_capability_tags(
717    model_id: String,
718    provider: String,
719    mut model: ModelDef,
720) -> ModelDef {
721    model.capabilities = effective_model_capability_tags(&provider, &model_id);
722    model
723}
724
725/// Legacy display tags derived from the canonical provider/model capability
726/// matrix. The matrix is the source of truth; `models.*.capabilities` in
727/// providers.toml is accepted only for backwards-compatible parsing.
728pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
729    let caps = crate::llm::capabilities::lookup(provider, model_id);
730    let mut tags = Vec::new();
731    // Today all Harn chat providers expose streaming. Keep this as a
732    // transport baseline rather than a duplicated per-model declaration.
733    tags.push("streaming".to_string());
734    if caps.native_tools {
735        tags.push("tools".to_string());
736    }
737    if !caps.tool_search.is_empty() {
738        tags.push("tool_search".to_string());
739    }
740    if caps.vision || caps.vision_supported {
741        tags.push("vision".to_string());
742    }
743    if caps.audio {
744        tags.push("audio".to_string());
745    }
746    if caps.pdf {
747        tags.push("pdf".to_string());
748    }
749    if caps.files_api_supported {
750        tags.push("files".to_string());
751    }
752    if caps.prompt_caching {
753        tags.push("prompt_caching".to_string());
754    }
755    if !caps.thinking_modes.is_empty() {
756        tags.push("thinking".to_string());
757    }
758    if caps.interleaved_thinking_supported
759        || caps
760            .thinking_modes
761            .iter()
762            .any(|mode| mode == "adaptive" || mode == "effort")
763    {
764        tags.push("extended_thinking".to_string());
765    }
766    if caps.json_schema.is_some() {
767        tags.push("structured_output".to_string());
768    }
769    tags
770}
771
772/// Resolve a tier or alias into a concrete model/provider pair.
773pub fn resolve_tier_model(
774    target: &str,
775    preferred_provider: Option<&str>,
776) -> Option<(String, String)> {
777    let config = effective_config();
778
779    if let Some(alias) = config.aliases.get(target) {
780        return Some((alias.id.clone(), alias.provider.clone()));
781    }
782
783    let candidate_aliases = if let Some(provider) = preferred_provider {
784        vec![
785            format!("{provider}/{target}"),
786            format!("{provider}:{target}"),
787            format!("tier/{target}"),
788            target.to_string(),
789        ]
790    } else {
791        vec![format!("tier/{target}"), target.to_string()]
792    };
793
794    for alias_name in candidate_aliases {
795        if let Some(alias) = config.aliases.get(&alias_name) {
796            return Some((alias.id.clone(), alias.provider.clone()));
797        }
798    }
799
800    None
801}
802
803/// Return all configured alias-backed model/provider pairs whose resolved
804/// model falls into the requested capability tier. The result is de-duplicated
805/// and sorted deterministically by provider then model id.
806pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
807    let config = effective_config();
808    let mut seen = std::collections::BTreeSet::new();
809    let mut candidates = Vec::new();
810
811    for alias in config.aliases.values() {
812        let pair = (alias.id.clone(), alias.provider.clone());
813        if seen.contains(&pair) {
814            continue;
815        }
816        if model_tier(&alias.id) == target {
817            seen.insert(pair.clone());
818            candidates.push(pair);
819        }
820    }
821
822    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
823        provider_a
824            .cmp(provider_b)
825            .then_with(|| model_a.cmp(model_b))
826    });
827    candidates
828}
829
830/// Return all configured alias-backed model/provider pairs. Used by routing
831/// policies that need to compare alternatives across tiers.
832pub fn all_model_candidates() -> Vec<(String, String)> {
833    let config = effective_config();
834    let mut seen = std::collections::BTreeSet::new();
835    let mut candidates = Vec::new();
836
837    for alias in config.aliases.values() {
838        let pair = (alias.id.clone(), alias.provider.clone());
839        if seen.insert(pair.clone()) {
840            candidates.push(pair);
841        }
842    }
843
844    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
845        provider_a
846            .cmp(provider_b)
847            .then_with(|| model_a.cmp(model_b))
848    });
849    candidates
850}
851
852/// Simple glob matching for patterns like "claude-*", "qwen/*", "ollama:*".
853fn glob_match(pattern: &str, input: &str) -> bool {
854    if let Some(prefix) = pattern.strip_suffix('*') {
855        input.starts_with(prefix)
856    } else if let Some(suffix) = pattern.strip_prefix('*') {
857        input.ends_with(suffix)
858    } else if pattern.contains('*') {
859        let parts: Vec<&str> = pattern.split('*').collect();
860        if parts.len() == 2 {
861            input.starts_with(parts[0]) && input.ends_with(parts[1])
862        } else {
863            input == pattern
864        }
865    } else {
866        input == pattern
867    }
868}
869
870fn dirs_or_home() -> Option<String> {
871    std::env::var("HOME").ok()
872}
873
874/// Resolve the effective base URL for a provider, checking the `base_url_env`
875/// override first, then falling back to the configured `base_url`.
876pub fn resolve_base_url(pdef: &ProviderDef) -> String {
877    if let Some(env_name) = &pdef.base_url_env {
878        if let Ok(val) = std::env::var(env_name) {
879            // Strip surrounding quotes that some .env parsers leave intact.
880            let trimmed = val.trim().trim_matches('"').trim_matches('\'');
881            if !trimmed.is_empty() {
882                return trimmed.to_string();
883            }
884        }
885    }
886    pdef.base_url.clone()
887}
888
889fn default_config() -> ProvidersConfig {
890    let mut config = ProvidersConfig {
891        default_provider: Some("anthropic".to_string()),
892        ..Default::default()
893    };
894
895    config.providers.insert(
896        "anthropic".to_string(),
897        ProviderDef {
898            base_url: "https://api.anthropic.com/v1".to_string(),
899            auth_style: "header".to_string(),
900            auth_header: Some("x-api-key".to_string()),
901            auth_env: AuthEnv::Single("ANTHROPIC_API_KEY".to_string()),
902            extra_headers: BTreeMap::from([(
903                "anthropic-version".to_string(),
904                "2023-06-01".to_string(),
905            )]),
906            chat_endpoint: "/messages".to_string(),
907            completion_endpoint: None,
908            healthcheck: Some(HealthcheckDef {
909                method: "POST".to_string(),
910                path: Some("/messages/count_tokens".to_string()),
911                url: None,
912                body: Some(
913                    r#"{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":"x"}]}"#
914                        .to_string(),
915                ),
916            }),
917            features: vec!["prompt_caching".to_string(), "thinking".to_string()],
918            cost_per_1k_in: Some(0.003),
919            cost_per_1k_out: Some(0.015),
920            latency_p50_ms: Some(2500),
921            ..Default::default()
922        },
923    );
924
925    // OpenAI
926    config.providers.insert(
927        "openai".to_string(),
928        ProviderDef {
929            base_url: "https://api.openai.com/v1".to_string(),
930            auth_style: "bearer".to_string(),
931            auth_env: AuthEnv::Single("OPENAI_API_KEY".to_string()),
932            chat_endpoint: "/chat/completions".to_string(),
933            completion_endpoint: Some("/completions".to_string()),
934            healthcheck: Some(HealthcheckDef {
935                method: "GET".to_string(),
936                path: Some("/models".to_string()),
937                url: None,
938                body: None,
939            }),
940            cost_per_1k_in: Some(0.0025),
941            cost_per_1k_out: Some(0.010),
942            latency_p50_ms: Some(1800),
943            ..Default::default()
944        },
945    );
946
947    // OpenRouter
948    config.providers.insert(
949        "openrouter".to_string(),
950        ProviderDef {
951            base_url: "https://openrouter.ai/api/v1".to_string(),
952            auth_style: "bearer".to_string(),
953            auth_env: AuthEnv::Single("OPENROUTER_API_KEY".to_string()),
954            chat_endpoint: "/chat/completions".to_string(),
955            completion_endpoint: Some("/completions".to_string()),
956            healthcheck: Some(HealthcheckDef {
957                method: "GET".to_string(),
958                path: Some("/auth/key".to_string()),
959                url: None,
960                body: None,
961            }),
962            cost_per_1k_in: Some(0.003),
963            cost_per_1k_out: Some(0.015),
964            latency_p50_ms: Some(2200),
965            ..Default::default()
966        },
967    );
968
969    // HuggingFace
970    config.providers.insert(
971        "huggingface".to_string(),
972        ProviderDef {
973            base_url: "https://router.huggingface.co/v1".to_string(),
974            auth_style: "bearer".to_string(),
975            auth_env: AuthEnv::Multiple(vec![
976                "HF_TOKEN".to_string(),
977                "HUGGINGFACE_API_KEY".to_string(),
978            ]),
979            chat_endpoint: "/chat/completions".to_string(),
980            completion_endpoint: Some("/completions".to_string()),
981            healthcheck: Some(HealthcheckDef {
982                method: "GET".to_string(),
983                url: Some("https://huggingface.co/api/whoami-v2".to_string()),
984                path: None,
985                body: None,
986            }),
987            cost_per_1k_in: Some(0.0002),
988            cost_per_1k_out: Some(0.0006),
989            latency_p50_ms: Some(2400),
990            ..Default::default()
991        },
992    );
993
994    // Ollama default. Hosts can override this to `/v1/chat/completions`
995    // via a bundled `providers.toml` (loaded by setting
996    // `HARN_PROVIDERS_CONFIG` in the host process). The OpenAI-compat
997    // path bypasses Ollama's per-model tool-call post-processors
998    // (qwen3coder.go, qwen35.go) which raise HTTP 500s on text-mode
999    // responses for the Qwen3.5 family. The default here stays on
1000    // `/api/chat` so the harn-vm test stub keeps working with Ollama's
1001    // native NDJSON wire format.
1002    config.providers.insert(
1003        "ollama".to_string(),
1004        ProviderDef {
1005            base_url: "http://localhost:11434".to_string(),
1006            base_url_env: Some("OLLAMA_HOST".to_string()),
1007            auth_style: "none".to_string(),
1008            chat_endpoint: "/api/chat".to_string(),
1009            completion_endpoint: Some("/api/generate".to_string()),
1010            healthcheck: Some(HealthcheckDef {
1011                method: "GET".to_string(),
1012                path: Some("/api/tags".to_string()),
1013                url: None,
1014                body: None,
1015            }),
1016            cost_per_1k_in: Some(0.0),
1017            cost_per_1k_out: Some(0.0),
1018            latency_p50_ms: Some(1200),
1019            ..Default::default()
1020        },
1021    );
1022
1023    // Google Gemini native API.
1024    config.providers.insert(
1025        "gemini".to_string(),
1026        ProviderDef {
1027            base_url: "https://generativelanguage.googleapis.com".to_string(),
1028            base_url_env: Some("GEMINI_BASE_URL".to_string()),
1029            auth_style: "header".to_string(),
1030            auth_header: Some("x-goog-api-key".to_string()),
1031            auth_env: AuthEnv::Multiple(vec![
1032                "GEMINI_API_KEY".to_string(),
1033                "GOOGLE_API_KEY".to_string(),
1034            ]),
1035            chat_endpoint: "/v1beta/models".to_string(),
1036            healthcheck: Some(HealthcheckDef {
1037                method: "GET".to_string(),
1038                path: Some("/v1beta/models".to_string()),
1039                url: None,
1040                body: None,
1041            }),
1042            cost_per_1k_in: Some(0.00125),
1043            cost_per_1k_out: Some(0.005),
1044            latency_p50_ms: Some(1800),
1045            ..Default::default()
1046        },
1047    );
1048
1049    // Together AI (OpenAI-compatible)
1050    config.providers.insert(
1051        "together".to_string(),
1052        ProviderDef {
1053            base_url: "https://api.together.xyz/v1".to_string(),
1054            base_url_env: Some("TOGETHER_AI_BASE_URL".to_string()),
1055            auth_style: "bearer".to_string(),
1056            auth_env: AuthEnv::Single("TOGETHER_AI_API_KEY".to_string()),
1057            chat_endpoint: "/chat/completions".to_string(),
1058            completion_endpoint: Some("/completions".to_string()),
1059            healthcheck: Some(HealthcheckDef {
1060                method: "GET".to_string(),
1061                path: Some("/models".to_string()),
1062                url: None,
1063                body: None,
1064            }),
1065            cost_per_1k_in: Some(0.0002),
1066            cost_per_1k_out: Some(0.0006),
1067            latency_p50_ms: Some(1600),
1068            ..Default::default()
1069        },
1070    );
1071
1072    // Groq (OpenAI-compatible)
1073    config.providers.insert(
1074        "groq".to_string(),
1075        ProviderDef {
1076            base_url: "https://api.groq.com/openai/v1".to_string(),
1077            base_url_env: Some("GROQ_BASE_URL".to_string()),
1078            auth_style: "bearer".to_string(),
1079            auth_env: AuthEnv::Single("GROQ_API_KEY".to_string()),
1080            chat_endpoint: "/chat/completions".to_string(),
1081            completion_endpoint: Some("/completions".to_string()),
1082            healthcheck: Some(HealthcheckDef {
1083                method: "GET".to_string(),
1084                path: Some("/models".to_string()),
1085                url: None,
1086                body: None,
1087            }),
1088            cost_per_1k_in: Some(0.0001),
1089            cost_per_1k_out: Some(0.0003),
1090            latency_p50_ms: Some(450),
1091            ..Default::default()
1092        },
1093    );
1094
1095    // DeepSeek (OpenAI-compatible)
1096    config.providers.insert(
1097        "deepseek".to_string(),
1098        ProviderDef {
1099            base_url: "https://api.deepseek.com/v1".to_string(),
1100            base_url_env: Some("DEEPSEEK_BASE_URL".to_string()),
1101            auth_style: "bearer".to_string(),
1102            auth_env: AuthEnv::Single("DEEPSEEK_API_KEY".to_string()),
1103            chat_endpoint: "/chat/completions".to_string(),
1104            completion_endpoint: Some("/completions".to_string()),
1105            healthcheck: Some(HealthcheckDef {
1106                method: "GET".to_string(),
1107                path: Some("/models".to_string()),
1108                url: None,
1109                body: None,
1110            }),
1111            cost_per_1k_in: Some(0.00014),
1112            cost_per_1k_out: Some(0.00028),
1113            latency_p50_ms: Some(1800),
1114            ..Default::default()
1115        },
1116    );
1117
1118    // Fireworks (OpenAI-compatible open-weight hosting)
1119    config.providers.insert(
1120        "fireworks".to_string(),
1121        ProviderDef {
1122            base_url: "https://api.fireworks.ai/inference/v1".to_string(),
1123            base_url_env: Some("FIREWORKS_BASE_URL".to_string()),
1124            auth_style: "bearer".to_string(),
1125            auth_env: AuthEnv::Single("FIREWORKS_API_KEY".to_string()),
1126            chat_endpoint: "/chat/completions".to_string(),
1127            completion_endpoint: Some("/completions".to_string()),
1128            healthcheck: Some(HealthcheckDef {
1129                method: "GET".to_string(),
1130                path: Some("/models".to_string()),
1131                url: None,
1132                body: None,
1133            }),
1134            cost_per_1k_in: Some(0.0002),
1135            cost_per_1k_out: Some(0.0006),
1136            latency_p50_ms: Some(1400),
1137            ..Default::default()
1138        },
1139    );
1140
1141    // Alibaba DashScope (OpenAI-compatible Qwen host)
1142    config.providers.insert(
1143        "dashscope".to_string(),
1144        ProviderDef {
1145            base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".to_string(),
1146            base_url_env: Some("DASHSCOPE_BASE_URL".to_string()),
1147            auth_style: "bearer".to_string(),
1148            auth_env: AuthEnv::Single("DASHSCOPE_API_KEY".to_string()),
1149            chat_endpoint: "/chat/completions".to_string(),
1150            completion_endpoint: Some("/completions".to_string()),
1151            healthcheck: Some(HealthcheckDef {
1152                method: "GET".to_string(),
1153                path: Some("/models".to_string()),
1154                url: None,
1155                body: None,
1156            }),
1157            cost_per_1k_in: Some(0.0003),
1158            cost_per_1k_out: Some(0.0012),
1159            latency_p50_ms: Some(1600),
1160            ..Default::default()
1161        },
1162    );
1163
1164    // AWS Bedrock Runtime. The provider shim resolves AWS credentials through
1165    // env vars, the selected/default profile, container credentials, or EC2
1166    // instance profile credentials, then signs Converse API calls with SigV4.
1167    config.providers.insert(
1168        "bedrock".to_string(),
1169        ProviderDef {
1170            base_url: String::new(),
1171            base_url_env: Some("BEDROCK_BASE_URL".to_string()),
1172            auth_style: "aws_sigv4".to_string(),
1173            auth_env: AuthEnv::None,
1174            chat_endpoint: "/model/{model}/converse".to_string(),
1175            features: vec!["native_tools".to_string()],
1176            latency_p50_ms: Some(2600),
1177            ..Default::default()
1178        },
1179    );
1180
1181    // Azure OpenAI. The deployment name is routed in the URL; callers can
1182    // use the Harn model field as the deployment name or set
1183    // AZURE_OPENAI_DEPLOYMENT.
1184    config.providers.insert(
1185        "azure_openai".to_string(),
1186        ProviderDef {
1187            base_url: "https://{resource}.openai.azure.com".to_string(),
1188            base_url_env: Some("AZURE_OPENAI_ENDPOINT".to_string()),
1189            auth_style: "azure_openai".to_string(),
1190            auth_env: AuthEnv::Multiple(vec![
1191                "AZURE_OPENAI_API_KEY".to_string(),
1192                "AZURE_OPENAI_AD_TOKEN".to_string(),
1193                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1194            ]),
1195            chat_endpoint:
1196                "/openai/deployments/{deployment}/chat/completions?api-version={api_version}"
1197                    .to_string(),
1198            features: vec!["native_tools".to_string()],
1199            cost_per_1k_in: Some(0.0025),
1200            cost_per_1k_out: Some(0.010),
1201            latency_p50_ms: Some(1900),
1202            ..Default::default()
1203        },
1204    );
1205
1206    // Google Vertex AI Gemini.
1207    config.providers.insert(
1208        "vertex".to_string(),
1209        ProviderDef {
1210            base_url: "https://aiplatform.googleapis.com/v1".to_string(),
1211            base_url_env: Some("VERTEX_AI_BASE_URL".to_string()),
1212            auth_style: "bearer".to_string(),
1213            auth_env: AuthEnv::Multiple(vec![
1214                "VERTEX_AI_ACCESS_TOKEN".to_string(),
1215                "GOOGLE_OAUTH_ACCESS_TOKEN".to_string(),
1216                "GOOGLE_APPLICATION_CREDENTIALS".to_string(),
1217            ]),
1218            chat_endpoint:
1219                "/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent"
1220                    .to_string(),
1221            features: vec!["native_tools".to_string()],
1222            cost_per_1k_in: Some(0.00125),
1223            cost_per_1k_out: Some(0.005),
1224            latency_p50_ms: Some(2100),
1225            ..Default::default()
1226        },
1227    );
1228
1229    // Local OpenAI-compatible server
1230    config.providers.insert(
1231        "local".to_string(),
1232        ProviderDef {
1233            base_url: "http://localhost:8000".to_string(),
1234            base_url_env: Some("LOCAL_LLM_BASE_URL".to_string()),
1235            auth_style: "none".to_string(),
1236            chat_endpoint: "/v1/chat/completions".to_string(),
1237            completion_endpoint: Some("/v1/completions".to_string()),
1238            healthcheck: Some(HealthcheckDef {
1239                method: "GET".to_string(),
1240                path: Some("/v1/models".to_string()),
1241                url: None,
1242                body: None,
1243            }),
1244            cost_per_1k_in: Some(0.0),
1245            cost_per_1k_out: Some(0.0),
1246            latency_p50_ms: Some(900),
1247            ..Default::default()
1248        },
1249    );
1250
1251    // llama.cpp / llama-server OpenAI-compatible server. This is separate
1252    // from `local` so capability rules can distinguish Qwen chat-template
1253    // thinking quirks from other local OpenAI-compatible hosts.
1254    config.providers.insert(
1255        "llamacpp".to_string(),
1256        ProviderDef {
1257            base_url: "http://127.0.0.1:8001".to_string(),
1258            base_url_env: Some("LLAMACPP_BASE_URL".to_string()),
1259            auth_style: "none".to_string(),
1260            chat_endpoint: "/v1/chat/completions".to_string(),
1261            completion_endpoint: Some("/v1/completions".to_string()),
1262            healthcheck: Some(HealthcheckDef {
1263                method: "GET".to_string(),
1264                path: Some("/v1/models".to_string()),
1265                url: None,
1266                body: None,
1267            }),
1268            cost_per_1k_in: Some(0.0),
1269            cost_per_1k_out: Some(0.0),
1270            latency_p50_ms: Some(900),
1271            ..Default::default()
1272        },
1273    );
1274
1275    // Apple Silicon MLX OpenAI-compatible server. Harn owns readiness
1276    // probing; hosts that want script-based auto-start should launch the
1277    // process first, then call Harn again to verify readiness.
1278    config.providers.insert(
1279        "mlx".to_string(),
1280        ProviderDef {
1281            base_url: "http://127.0.0.1:8002".to_string(),
1282            base_url_env: Some("MLX_BASE_URL".to_string()),
1283            auth_style: "none".to_string(),
1284            chat_endpoint: "/v1/chat/completions".to_string(),
1285            completion_endpoint: Some("/v1/completions".to_string()),
1286            healthcheck: Some(HealthcheckDef {
1287                method: "GET".to_string(),
1288                path: Some("/v1/models".to_string()),
1289                url: None,
1290                body: None,
1291            }),
1292            cost_per_1k_in: Some(0.0),
1293            cost_per_1k_out: Some(0.0),
1294            latency_p50_ms: Some(900),
1295            ..Default::default()
1296        },
1297    );
1298
1299    // vLLM OpenAI-compatible server.
1300    config.providers.insert(
1301        "vllm".to_string(),
1302        ProviderDef {
1303            base_url: "http://localhost:8000".to_string(),
1304            base_url_env: Some("VLLM_BASE_URL".to_string()),
1305            auth_style: "none".to_string(),
1306            chat_endpoint: "/v1/chat/completions".to_string(),
1307            completion_endpoint: Some("/v1/completions".to_string()),
1308            healthcheck: Some(HealthcheckDef {
1309                method: "GET".to_string(),
1310                path: Some("/v1/models".to_string()),
1311                url: None,
1312                body: None,
1313            }),
1314            cost_per_1k_in: Some(0.0),
1315            cost_per_1k_out: Some(0.0),
1316            latency_p50_ms: Some(800),
1317            ..Default::default()
1318        },
1319    );
1320
1321    // HuggingFace Text Generation Inference OpenAI-compatible endpoint.
1322    config.providers.insert(
1323        "tgi".to_string(),
1324        ProviderDef {
1325            base_url: "http://localhost:8080".to_string(),
1326            base_url_env: Some("TGI_BASE_URL".to_string()),
1327            auth_style: "none".to_string(),
1328            chat_endpoint: "/v1/chat/completions".to_string(),
1329            completion_endpoint: Some("/v1/completions".to_string()),
1330            healthcheck: Some(HealthcheckDef {
1331                method: "GET".to_string(),
1332                path: Some("/health".to_string()),
1333                url: None,
1334                body: None,
1335            }),
1336            cost_per_1k_in: Some(0.0),
1337            cost_per_1k_out: Some(0.0),
1338            latency_p50_ms: Some(950),
1339            ..Default::default()
1340        },
1341    );
1342
1343    // Default inference rules
1344    config.inference_rules = vec![
1345        InferenceRule {
1346            pattern: Some("claude-*".to_string()),
1347            contains: None,
1348            exact: None,
1349            provider: "anthropic".to_string(),
1350        },
1351        InferenceRule {
1352            pattern: Some("gpt-*".to_string()),
1353            contains: None,
1354            exact: None,
1355            provider: "openai".to_string(),
1356        },
1357        InferenceRule {
1358            pattern: Some("o1*".to_string()),
1359            contains: None,
1360            exact: None,
1361            provider: "openai".to_string(),
1362        },
1363        InferenceRule {
1364            pattern: Some("o3*".to_string()),
1365            contains: None,
1366            exact: None,
1367            provider: "openai".to_string(),
1368        },
1369        InferenceRule {
1370            pattern: Some("o4*".to_string()),
1371            contains: None,
1372            exact: None,
1373            provider: "openai".to_string(),
1374        },
1375        InferenceRule {
1376            pattern: Some("anthropic.claude-*".to_string()),
1377            contains: None,
1378            exact: None,
1379            provider: "bedrock".to_string(),
1380        },
1381        InferenceRule {
1382            pattern: Some("meta.llama*".to_string()),
1383            contains: None,
1384            exact: None,
1385            provider: "bedrock".to_string(),
1386        },
1387        InferenceRule {
1388            pattern: Some("amazon.*".to_string()),
1389            contains: None,
1390            exact: None,
1391            provider: "bedrock".to_string(),
1392        },
1393        InferenceRule {
1394            pattern: Some("mistral.*".to_string()),
1395            contains: None,
1396            exact: None,
1397            provider: "bedrock".to_string(),
1398        },
1399        InferenceRule {
1400            pattern: Some("cohere.*".to_string()),
1401            contains: None,
1402            exact: None,
1403            provider: "bedrock".to_string(),
1404        },
1405        InferenceRule {
1406            pattern: Some("gemini-*".to_string()),
1407            contains: None,
1408            exact: None,
1409            provider: "gemini".to_string(),
1410        },
1411    ];
1412
1413    // Default tier rules
1414    config.tier_rules = vec![
1415        TierRule {
1416            contains: Some("9b".to_string()),
1417            pattern: None,
1418            exact: None,
1419            tier: "small".to_string(),
1420        },
1421        TierRule {
1422            contains: Some("a3b".to_string()),
1423            pattern: None,
1424            exact: None,
1425            tier: "small".to_string(),
1426        },
1427        TierRule {
1428            contains: Some("gemma-4-e2b".to_string()),
1429            pattern: None,
1430            exact: None,
1431            tier: "small".to_string(),
1432        },
1433        TierRule {
1434            contains: Some("gemma-4-e4b".to_string()),
1435            pattern: None,
1436            exact: None,
1437            tier: "small".to_string(),
1438        },
1439        TierRule {
1440            contains: Some("gemma-4-26b".to_string()),
1441            pattern: None,
1442            exact: None,
1443            tier: "mid".to_string(),
1444        },
1445        TierRule {
1446            contains: Some("gemma-4-31b".to_string()),
1447            pattern: None,
1448            exact: None,
1449            tier: "frontier".to_string(),
1450        },
1451        TierRule {
1452            contains: Some("gemma4:26b".to_string()),
1453            pattern: None,
1454            exact: None,
1455            tier: "mid".to_string(),
1456        },
1457        TierRule {
1458            contains: Some("gemma4:31b".to_string()),
1459            pattern: None,
1460            exact: None,
1461            tier: "frontier".to_string(),
1462        },
1463        TierRule {
1464            pattern: Some("claude-*".to_string()),
1465            contains: None,
1466            exact: None,
1467            tier: "frontier".to_string(),
1468        },
1469        TierRule {
1470            exact: Some("gpt-4o".to_string()),
1471            contains: None,
1472            pattern: None,
1473            tier: "frontier".to_string(),
1474        },
1475    ];
1476
1477    config.tier_defaults = TierDefaults {
1478        default: "mid".to_string(),
1479    };
1480
1481    config.aliases.insert(
1482        "frontier".to_string(),
1483        AliasDef {
1484            id: "claude-sonnet-4-20250514".to_string(),
1485            provider: "anthropic".to_string(),
1486            tool_format: None,
1487        },
1488    );
1489    config.aliases.insert(
1490        "tier/frontier".to_string(),
1491        AliasDef {
1492            id: "claude-sonnet-4-20250514".to_string(),
1493            provider: "anthropic".to_string(),
1494            tool_format: None,
1495        },
1496    );
1497    config.aliases.insert(
1498        "mid".to_string(),
1499        AliasDef {
1500            id: "gpt-4o-mini".to_string(),
1501            provider: "openai".to_string(),
1502            tool_format: None,
1503        },
1504    );
1505    config.aliases.insert(
1506        "tier/mid".to_string(),
1507        AliasDef {
1508            id: "gpt-4o-mini".to_string(),
1509            provider: "openai".to_string(),
1510            tool_format: None,
1511        },
1512    );
1513    config.aliases.insert(
1514        "small".to_string(),
1515        AliasDef {
1516            id: "Qwen/Qwen3.5-9B".to_string(),
1517            provider: "openrouter".to_string(),
1518            tool_format: None,
1519        },
1520    );
1521    config.aliases.insert(
1522        "tier/small".to_string(),
1523        AliasDef {
1524            id: "Qwen/Qwen3.5-9B".to_string(),
1525            provider: "openrouter".to_string(),
1526            tool_format: None,
1527        },
1528    );
1529    config.aliases.insert(
1530        "local-gemma4".to_string(),
1531        AliasDef {
1532            id: "gemma-4-26b-a4b-it".to_string(),
1533            provider: "local".to_string(),
1534            tool_format: None,
1535        },
1536    );
1537    config.aliases.insert(
1538        "local-gemma4-26b".to_string(),
1539        AliasDef {
1540            id: "gemma-4-26b-a4b-it".to_string(),
1541            provider: "local".to_string(),
1542            tool_format: None,
1543        },
1544    );
1545    config.aliases.insert(
1546        "local-gemma4-31b".to_string(),
1547        AliasDef {
1548            id: "gemma-4-31b-it".to_string(),
1549            provider: "local".to_string(),
1550            tool_format: None,
1551        },
1552    );
1553    config.aliases.insert(
1554        "local-gemma4-e4b".to_string(),
1555        AliasDef {
1556            id: "gemma-4-e4b-it".to_string(),
1557            provider: "local".to_string(),
1558            tool_format: None,
1559        },
1560    );
1561    config.aliases.insert(
1562        "local-gemma4-e2b".to_string(),
1563        AliasDef {
1564            id: "gemma-4-e2b-it".to_string(),
1565            provider: "local".to_string(),
1566            tool_format: None,
1567        },
1568    );
1569    config.aliases.insert(
1570        "mlx-qwen36-27b".to_string(),
1571        AliasDef {
1572            id: "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string(),
1573            provider: "mlx".to_string(),
1574            tool_format: None,
1575        },
1576    );
1577
1578    config.qc_defaults.extend(BTreeMap::from([
1579        (
1580            "anthropic".to_string(),
1581            "claude-3-5-haiku-20241022".to_string(),
1582        ),
1583        ("openai".to_string(), "gpt-4o-mini".to_string()),
1584        (
1585            "openrouter".to_string(),
1586            "google/gemini-2.5-flash".to_string(),
1587        ),
1588        ("ollama".to_string(), "llama3.2".to_string()),
1589        ("local".to_string(), "gpt-4o".to_string()),
1590    ]));
1591
1592    config.models.extend(BTreeMap::from([
1593        (
1594            "claude-sonnet-4-20250514".to_string(),
1595            ModelDef {
1596                name: "Claude Sonnet 4".to_string(),
1597                provider: "anthropic".to_string(),
1598                context_window: 200_000,
1599                runtime_context_window: None,
1600                stream_timeout: None,
1601                capabilities: vec![
1602                    "tools".to_string(),
1603                    "streaming".to_string(),
1604                    "prompt_caching".to_string(),
1605                    "thinking".to_string(),
1606                ],
1607                pricing: Some(ModelPricing {
1608                    input_per_mtok: 3.0,
1609                    output_per_mtok: 15.0,
1610                    cache_read_per_mtok: Some(0.3),
1611                    cache_write_per_mtok: Some(3.75),
1612                }),
1613            },
1614        ),
1615        (
1616            "gpt-4o-mini".to_string(),
1617            ModelDef {
1618                name: "GPT-4o Mini".to_string(),
1619                provider: "openai".to_string(),
1620                context_window: 128_000,
1621                runtime_context_window: None,
1622                stream_timeout: None,
1623                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1624                pricing: Some(ModelPricing {
1625                    input_per_mtok: 0.15,
1626                    output_per_mtok: 0.60,
1627                    cache_read_per_mtok: None,
1628                    cache_write_per_mtok: None,
1629                }),
1630            },
1631        ),
1632        (
1633            "Qwen/Qwen3.5-9B".to_string(),
1634            ModelDef {
1635                name: "Qwen3.5 9B".to_string(),
1636                provider: "openrouter".to_string(),
1637                context_window: 131_072,
1638                runtime_context_window: None,
1639                stream_timeout: None,
1640                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1641                pricing: None,
1642            },
1643        ),
1644        (
1645            "llama3.2".to_string(),
1646            ModelDef {
1647                name: "Llama 3.2".to_string(),
1648                provider: "ollama".to_string(),
1649                context_window: 32_000,
1650                runtime_context_window: None,
1651                stream_timeout: Some(300.0),
1652                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1653                pricing: None,
1654            },
1655        ),
1656    ]));
1657
1658    config.models.extend(canonical_priced_models());
1659
1660    config
1661}
1662
1663/// Canonical hosted-model pricing entries (USD per 1M tokens). Provenance: the
1664/// public Anthropic, OpenAI, Google Gemini, and Mistral pricing pages snapshot
1665/// at 2026-01. These replace the previous `model_pricing_per_million` Rust
1666/// fallback table that lived in `llm/cost.rs` and could silently drift; if a
1667/// listed rate changes, edit the literal here so the change shows up in `git
1668/// blame`. Users can override or extend the table per environment via
1669/// `HARN_PROVIDERS_CONFIG` or `harn.toml`.
1670fn canonical_priced_models() -> BTreeMap<String, ModelDef> {
1671    let mut out = BTreeMap::new();
1672    let anthropic_caps = vec![
1673        "tools".to_string(),
1674        "streaming".to_string(),
1675        "prompt_caching".to_string(),
1676        "thinking".to_string(),
1677    ];
1678    let openai_caps = vec!["tools".to_string(), "streaming".to_string()];
1679    let gemini_caps = vec!["tools".to_string(), "streaming".to_string()];
1680
1681    let mut anthropic = |id: &str,
1682                         name: &str,
1683                         context_window: u64,
1684                         input: f64,
1685                         output: f64,
1686                         cache_read: Option<f64>,
1687                         cache_write: Option<f64>| {
1688        out.insert(
1689            id.to_string(),
1690            ModelDef {
1691                name: name.to_string(),
1692                provider: "anthropic".to_string(),
1693                context_window,
1694                runtime_context_window: None,
1695                stream_timeout: None,
1696                capabilities: anthropic_caps.clone(),
1697                pricing: Some(ModelPricing {
1698                    input_per_mtok: input,
1699                    output_per_mtok: output,
1700                    cache_read_per_mtok: cache_read,
1701                    cache_write_per_mtok: cache_write,
1702                }),
1703            },
1704        );
1705    };
1706    anthropic(
1707        "claude-3-5-haiku-20241022",
1708        "Claude Haiku 3.5",
1709        200_000,
1710        0.80,
1711        4.00,
1712        Some(0.08),
1713        Some(1.00),
1714    );
1715    anthropic(
1716        "claude-haiku-4-5-20251001",
1717        "Claude Haiku 4.5",
1718        200_000,
1719        1.00,
1720        5.00,
1721        Some(0.10),
1722        Some(1.25),
1723    );
1724    anthropic(
1725        "claude-3-5-sonnet-20240620",
1726        "Claude Sonnet 3.5 (2024-06-20)",
1727        200_000,
1728        3.00,
1729        15.00,
1730        Some(0.30),
1731        Some(3.75),
1732    );
1733    anthropic(
1734        "claude-3-5-sonnet-20241022",
1735        "Claude Sonnet 3.5 (2024-10-22)",
1736        200_000,
1737        3.00,
1738        15.00,
1739        Some(0.30),
1740        Some(3.75),
1741    );
1742    anthropic(
1743        "claude-3-opus-20240229",
1744        "Claude Opus 3",
1745        200_000,
1746        15.00,
1747        75.00,
1748        Some(1.50),
1749        Some(18.75),
1750    );
1751    anthropic(
1752        "claude-opus-4-20250514",
1753        "Claude Opus 4",
1754        200_000,
1755        15.00,
1756        75.00,
1757        Some(1.50),
1758        Some(18.75),
1759    );
1760    anthropic(
1761        "claude-opus-4-1-20250805",
1762        "Claude Opus 4.1",
1763        200_000,
1764        15.00,
1765        75.00,
1766        Some(1.50),
1767        Some(18.75),
1768    );
1769
1770    let mut openai = |id: &str,
1771                      name: &str,
1772                      context_window: u64,
1773                      input: f64,
1774                      output: f64,
1775                      cache_read: Option<f64>| {
1776        out.insert(
1777            id.to_string(),
1778            ModelDef {
1779                name: name.to_string(),
1780                provider: "openai".to_string(),
1781                context_window,
1782                runtime_context_window: None,
1783                stream_timeout: None,
1784                capabilities: openai_caps.clone(),
1785                pricing: Some(ModelPricing {
1786                    input_per_mtok: input,
1787                    output_per_mtok: output,
1788                    cache_read_per_mtok: cache_read,
1789                    cache_write_per_mtok: None,
1790                }),
1791            },
1792        );
1793    };
1794    openai("gpt-4o", "GPT-4o", 128_000, 2.50, 10.00, Some(1.25));
1795    openai("gpt-4-turbo", "GPT-4 Turbo", 128_000, 10.00, 30.00, None);
1796    openai("o1", "OpenAI o1", 200_000, 15.00, 60.00, Some(7.50));
1797    openai(
1798        "o1-mini",
1799        "OpenAI o1-mini",
1800        128_000,
1801        3.00,
1802        12.00,
1803        Some(1.50),
1804    );
1805    openai("o3", "OpenAI o3", 200_000, 15.00, 60.00, Some(7.50));
1806    openai("o3-mini", "OpenAI o3-mini", 200_000, 1.10, 4.40, Some(0.55));
1807
1808    let mut gemini = |id: &str,
1809                      name: &str,
1810                      context_window: u64,
1811                      input: f64,
1812                      output: f64,
1813                      cache_read: Option<f64>| {
1814        out.insert(
1815            id.to_string(),
1816            ModelDef {
1817                name: name.to_string(),
1818                provider: "gemini".to_string(),
1819                context_window,
1820                runtime_context_window: None,
1821                stream_timeout: None,
1822                capabilities: gemini_caps.clone(),
1823                pricing: Some(ModelPricing {
1824                    input_per_mtok: input,
1825                    output_per_mtok: output,
1826                    cache_read_per_mtok: cache_read,
1827                    cache_write_per_mtok: None,
1828                }),
1829            },
1830        );
1831    };
1832    gemini(
1833        "gemini-2.5-flash",
1834        "Gemini 2.5 Flash",
1835        1_048_576,
1836        0.10,
1837        0.40,
1838        Some(0.025),
1839    );
1840    gemini(
1841        "gemini-2.5-pro",
1842        "Gemini 2.5 Pro",
1843        2_097_152,
1844        1.25,
1845        5.00,
1846        Some(0.3125),
1847    );
1848
1849    out.insert(
1850        "mistral-large-latest".to_string(),
1851        ModelDef {
1852            name: "Mistral Large".to_string(),
1853            provider: "openrouter".to_string(),
1854            context_window: 128_000,
1855            runtime_context_window: None,
1856            stream_timeout: None,
1857            capabilities: openai_caps.clone(),
1858            pricing: Some(ModelPricing {
1859                input_per_mtok: 2.00,
1860                output_per_mtok: 6.00,
1861                cache_read_per_mtok: None,
1862                cache_write_per_mtok: None,
1863            }),
1864        },
1865    );
1866    out.insert(
1867        "mistral-small-latest".to_string(),
1868        ModelDef {
1869            name: "Mistral Small".to_string(),
1870            provider: "openrouter".to_string(),
1871            context_window: 128_000,
1872            runtime_context_window: None,
1873            stream_timeout: None,
1874            capabilities: openai_caps,
1875            pricing: Some(ModelPricing {
1876                input_per_mtok: 0.20,
1877                output_per_mtok: 0.60,
1878                cache_read_per_mtok: None,
1879                cache_write_per_mtok: None,
1880            }),
1881        },
1882    );
1883    out
1884}
1885
1886#[cfg(test)]
1887fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
1888    let mut config = default_config();
1889    config.merge_from(&overlay);
1890    config
1891}
1892
1893#[cfg(test)]
1894mod tests {
1895    use super::*;
1896
1897    fn reset_overrides() {
1898        clear_user_overrides();
1899    }
1900
1901    #[test]
1902    fn test_glob_match_prefix() {
1903        assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
1904        assert!(glob_match("gpt-*", "gpt-4o"));
1905        assert!(!glob_match("claude-*", "gpt-4o"));
1906    }
1907
1908    #[test]
1909    fn test_glob_match_suffix() {
1910        assert!(glob_match("*-latest", "llama3.2-latest"));
1911        assert!(!glob_match("*-latest", "llama3.2"));
1912    }
1913
1914    #[test]
1915    fn test_glob_match_middle() {
1916        assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
1917        assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
1918    }
1919
1920    #[test]
1921    fn test_glob_match_exact() {
1922        assert!(glob_match("gpt-4o", "gpt-4o"));
1923        assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
1924    }
1925
1926    #[test]
1927    fn test_infer_provider_from_defaults() {
1928        let _guard = crate::llm::env_lock().lock().expect("env lock");
1929        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1930        unsafe {
1931            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1932        }
1933
1934        assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
1935        assert_eq!(infer_provider("gpt-4o"), "openai");
1936        assert_eq!(infer_provider("o1-preview"), "openai");
1937        assert_eq!(infer_provider("o3-mini"), "openai");
1938        assert_eq!(infer_provider("o4-mini"), "openai");
1939        assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
1940        assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
1941        assert_eq!(infer_provider("llama3.2:latest"), "ollama");
1942        assert_eq!(infer_provider("unknown-model"), "anthropic");
1943
1944        unsafe {
1945            match prev_default_provider {
1946                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1947                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1948            }
1949        }
1950    }
1951
1952    #[test]
1953    fn test_infer_provider_prefix_rules() {
1954        assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
1955        assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
1956        // Even when the id also contains `/`, the local transport prefix wins.
1957        assert_eq!(infer_provider("local:owner/model"), "ollama");
1958        assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
1959    }
1960
1961    #[test]
1962    fn test_openrouter_inference_requires_one_slash() {
1963        let _guard = crate::llm::env_lock().lock().expect("env lock");
1964        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1965        unsafe {
1966            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1967        }
1968
1969        assert_eq!(infer_provider("org/model"), "openrouter");
1970        assert_eq!(infer_provider("org/team/model"), "anthropic");
1971
1972        unsafe {
1973            match prev_default_provider {
1974                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1975                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1976            }
1977        }
1978    }
1979
1980    #[test]
1981    fn test_resolve_model_info_normalizes_provider_prefixes() {
1982        let local = resolve_model_info("local:gemma-4-e4b-it");
1983        assert_eq!(local.id, "gemma-4-e4b-it");
1984        assert_eq!(local.provider, "ollama");
1985
1986        let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
1987        assert_eq!(ollama.id, "qwen3:30b-a3b");
1988        assert_eq!(ollama.provider, "ollama");
1989
1990        let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
1991        assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
1992        assert_eq!(hf.provider, "huggingface");
1993    }
1994
1995    #[test]
1996    fn test_model_tier_from_defaults() {
1997        assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
1998        assert_eq!(model_tier("gpt-4o"), "frontier");
1999        assert_eq!(model_tier("Qwen3.5-9B"), "small");
2000        assert_eq!(model_tier("deepseek-v3"), "mid");
2001    }
2002
2003    #[test]
2004    fn test_resolve_model_unknown_alias() {
2005        let (id, provider) = resolve_model("gpt-4o");
2006        assert_eq!(id, "gpt-4o");
2007        assert!(provider.is_none());
2008    }
2009
2010    #[test]
2011    fn test_provider_names() {
2012        let names = provider_names();
2013        assert!(names.len() >= 7);
2014        assert!(names.contains(&"anthropic".to_string()));
2015        assert!(names.contains(&"together".to_string()));
2016        assert!(names.contains(&"local".to_string()));
2017        assert!(names.contains(&"mlx".to_string()));
2018        assert!(names.contains(&"openai".to_string()));
2019        assert!(names.contains(&"ollama".to_string()));
2020        assert!(names.contains(&"bedrock".to_string()));
2021        assert!(names.contains(&"azure_openai".to_string()));
2022        assert!(names.contains(&"vertex".to_string()));
2023    }
2024
2025    #[test]
2026    fn global_provider_file_is_an_overlay_on_builtin_defaults() {
2027        let mut overlay = ProvidersConfig {
2028            default_provider: Some("ollama".to_string()),
2029            ..Default::default()
2030        };
2031        overlay.aliases.insert(
2032            "quickstart".to_string(),
2033            AliasDef {
2034                id: "llama3.2".to_string(),
2035                provider: "ollama".to_string(),
2036                tool_format: None,
2037            },
2038        );
2039
2040        let merged = merge_global_config(overlay);
2041
2042        assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
2043        assert!(merged.providers.contains_key("anthropic"));
2044        assert!(merged.providers.contains_key("ollama"));
2045        assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
2046    }
2047
2048    #[test]
2049    fn test_resolve_tier_model_default_aliases() {
2050        let (model, provider) = resolve_tier_model("frontier", None).unwrap();
2051        assert_eq!(model, "claude-sonnet-4-20250514");
2052        assert_eq!(provider, "anthropic");
2053
2054        let (model, provider) = resolve_tier_model("small", None).unwrap();
2055        assert_eq!(model, "Qwen/Qwen3.5-9B");
2056        assert_eq!(provider, "openrouter");
2057    }
2058
2059    #[test]
2060    fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
2061        let (model, provider) = resolve_tier_model("mid", Some("openai")).unwrap();
2062        assert_eq!(model, "gpt-4o-mini");
2063        assert_eq!(provider, "openai");
2064    }
2065
2066    #[test]
2067    fn test_provider_config_anthropic() {
2068        let pdef = provider_config("anthropic").unwrap();
2069        assert_eq!(pdef.auth_style, "header");
2070        assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
2071    }
2072
2073    #[test]
2074    fn test_provider_config_mlx() {
2075        let pdef = provider_config("mlx").unwrap();
2076        assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
2077        assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
2078        assert_eq!(
2079            pdef.healthcheck.unwrap().path.as_deref(),
2080            Some("/v1/models")
2081        );
2082
2083        let (model, provider) = resolve_model("mlx-qwen36-27b");
2084        assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
2085        assert_eq!(provider.as_deref(), Some("mlx"));
2086    }
2087
2088    #[test]
2089    fn test_enterprise_provider_defaults_and_inference() {
2090        let bedrock = provider_config("bedrock").unwrap();
2091        assert_eq!(bedrock.auth_style, "aws_sigv4");
2092        assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
2093        assert_eq!(
2094            infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
2095            "bedrock"
2096        );
2097        assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
2098
2099        let azure = provider_config("azure_openai").unwrap();
2100        assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
2101        assert_eq!(
2102            auth_env_names(&azure.auth_env),
2103            vec![
2104                "AZURE_OPENAI_API_KEY".to_string(),
2105                "AZURE_OPENAI_AD_TOKEN".to_string(),
2106                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
2107            ]
2108        );
2109
2110        let vertex = provider_config("vertex").unwrap();
2111        assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
2112        assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
2113    }
2114
2115    #[test]
2116    fn test_default_provider_env_override_for_unknown_model() {
2117        let _guard = crate::llm::env_lock().lock().expect("env lock");
2118        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2119        unsafe {
2120            std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
2121        }
2122
2123        let inference = infer_provider_detail("unknown-model");
2124
2125        unsafe {
2126            match prev_default_provider {
2127                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2128                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2129            }
2130        }
2131
2132        assert_eq!(inference.provider, "openai");
2133        assert_eq!(
2134            inference.source,
2135            crate::llm::provider::ProviderInferenceSource::DefaultFallback
2136        );
2137    }
2138
2139    #[test]
2140    fn test_resolve_base_url_no_env() {
2141        let pdef = ProviderDef {
2142            base_url: "https://example.com".to_string(),
2143            ..Default::default()
2144        };
2145        assert_eq!(resolve_base_url(&pdef), "https://example.com");
2146    }
2147
2148    #[test]
2149    fn test_default_config_roundtrip() {
2150        let config = default_config();
2151        assert!(!config.providers.is_empty());
2152        assert!(!config.inference_rules.is_empty());
2153        assert!(!config.tier_rules.is_empty());
2154        assert_eq!(config.tier_defaults.default, "mid");
2155    }
2156
2157    #[test]
2158    fn test_external_config_overlays_default_catalog() {
2159        let mut config = default_config();
2160        let mut overlay = ProvidersConfig {
2161            default_provider: Some("ollama".to_string()),
2162            ..Default::default()
2163        };
2164        overlay.providers.insert(
2165            "custom".to_string(),
2166            ProviderDef {
2167                base_url: "https://llm.example.test/v1".to_string(),
2168                chat_endpoint: "/chat/completions".to_string(),
2169                ..Default::default()
2170            },
2171        );
2172
2173        config.merge_from(&overlay);
2174
2175        assert_eq!(config.default_provider.as_deref(), Some("ollama"));
2176        assert!(config.providers.contains_key("custom"));
2177        assert!(config.providers.contains_key("anthropic"));
2178        assert!(config.providers.contains_key("ollama"));
2179    }
2180
2181    #[test]
2182    fn test_model_params_empty() {
2183        let params = model_params("claude-sonnet-4-20250514");
2184        assert!(params.is_empty());
2185    }
2186
2187    #[test]
2188    fn test_user_overrides_add_provider_and_alias() {
2189        reset_overrides();
2190        let mut overlay = ProvidersConfig::default();
2191        overlay.providers.insert(
2192            "acme".to_string(),
2193            ProviderDef {
2194                base_url: "https://llm.acme.test/v1".to_string(),
2195                chat_endpoint: "/chat/completions".to_string(),
2196                ..Default::default()
2197            },
2198        );
2199        overlay.aliases.insert(
2200            "acme-fast".to_string(),
2201            AliasDef {
2202                id: "acme/model-fast".to_string(),
2203                provider: "acme".to_string(),
2204                tool_format: Some("native".to_string()),
2205            },
2206        );
2207        set_user_overrides(Some(overlay));
2208
2209        let (model, provider) = resolve_model("acme-fast");
2210        assert_eq!(model, "acme/model-fast");
2211        assert_eq!(provider.as_deref(), Some("acme"));
2212        assert!(provider_names().contains(&"acme".to_string()));
2213        assert_eq!(
2214            provider_config("acme").map(|provider| provider.base_url),
2215            Some("https://llm.acme.test/v1".to_string())
2216        );
2217
2218        reset_overrides();
2219    }
2220
2221    #[test]
2222    fn test_default_tool_format_uses_capability_matrix() {
2223        reset_overrides();
2224
2225        assert_eq!(
2226            default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
2227            "native"
2228        );
2229        assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "text");
2230    }
2231
2232    #[test]
2233    fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
2234        reset_overrides();
2235        let mut overlay = ProvidersConfig::default();
2236        overlay.models.insert(
2237            "acme/model-fast".to_string(),
2238            ModelDef {
2239                name: "Acme Fast".to_string(),
2240                provider: "acme".to_string(),
2241                context_window: 65_536,
2242                runtime_context_window: None,
2243                stream_timeout: Some(42.0),
2244                capabilities: vec!["tools".to_string(), "streaming".to_string()],
2245                pricing: Some(ModelPricing {
2246                    input_per_mtok: 1.25,
2247                    output_per_mtok: 2.5,
2248                    cache_read_per_mtok: Some(0.25),
2249                    cache_write_per_mtok: None,
2250                }),
2251            },
2252        );
2253        overlay
2254            .qc_defaults
2255            .insert("acme".to_string(), "acme/model-cheap".to_string());
2256        set_user_overrides(Some(overlay));
2257
2258        let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
2259        assert_eq!(entry.context_window, 65_536);
2260        assert_eq!(entry.capabilities, vec!["streaming".to_string()]);
2261        assert_eq!(
2262            entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
2263            Some(1.25)
2264        );
2265        assert_eq!(
2266            pricing_per_1k_for("acme", "acme/model-fast"),
2267            Some((0.00125, 0.0025))
2268        );
2269        assert_eq!(
2270            qc_default_model("acme").as_deref(),
2271            Some("acme/model-cheap")
2272        );
2273
2274        reset_overrides();
2275    }
2276
2277    #[test]
2278    fn test_user_overrides_prepend_inference_rules() {
2279        reset_overrides();
2280        let mut overlay = ProvidersConfig::default();
2281        overlay.inference_rules.push(InferenceRule {
2282            pattern: Some("internal-*".to_string()),
2283            contains: None,
2284            exact: None,
2285            provider: "openai".to_string(),
2286        });
2287        set_user_overrides(Some(overlay));
2288
2289        assert_eq!(infer_provider("internal-foo"), "openai");
2290
2291        reset_overrides();
2292    }
2293}
harn_vm/llm_config.rs

harn_vm/
llm_config.rs