harn_vm/
llm_config.rs

1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::BTreeMap;
4use std::sync::OnceLock;
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8
9thread_local! {
10    /// Thread-local provider config overlays installed by the CLI after it
11    /// reads the nearest `harn.toml` plus any installed package manifests.
12    /// Kept thread-local so tests and multi-VM hosts can scope extensions to
13    /// the current run without mutating the process-wide default config.
14    static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
15}
16
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct ProvidersConfig {
19    #[serde(default)]
20    pub default_provider: Option<String>,
21    #[serde(default)]
22    pub providers: BTreeMap<String, ProviderDef>,
23    #[serde(default)]
24    pub aliases: BTreeMap<String, AliasDef>,
25    #[serde(default)]
26    pub models: BTreeMap<String, ModelDef>,
27    #[serde(default)]
28    pub qc_defaults: BTreeMap<String, String>,
29    #[serde(default)]
30    pub inference_rules: Vec<InferenceRule>,
31    #[serde(default)]
32    pub tier_rules: Vec<TierRule>,
33    #[serde(default)]
34    pub tier_defaults: TierDefaults,
35    #[serde(default)]
36    pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
37}
38
39impl ProvidersConfig {
40    pub fn is_empty(&self) -> bool {
41        self.default_provider.is_none()
42            && self.providers.is_empty()
43            && self.aliases.is_empty()
44            && self.models.is_empty()
45            && self.qc_defaults.is_empty()
46            && self.inference_rules.is_empty()
47            && self.tier_rules.is_empty()
48            && self.model_defaults.is_empty()
49            && self.tier_defaults.default == default_mid()
50    }
51
52    pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
53        self.providers.extend(overlay.providers.clone());
54        self.aliases.extend(overlay.aliases.clone());
55        self.models.extend(overlay.models.clone());
56        self.qc_defaults.extend(overlay.qc_defaults.clone());
57
58        if overlay.default_provider.is_some() {
59            self.default_provider = overlay.default_provider.clone();
60        }
61
62        if !overlay.inference_rules.is_empty() {
63            let mut merged = overlay.inference_rules.clone();
64            merged.extend(self.inference_rules.clone());
65            self.inference_rules = merged;
66        }
67
68        if !overlay.tier_rules.is_empty() {
69            let mut merged = overlay.tier_rules.clone();
70            merged.extend(self.tier_rules.clone());
71            self.tier_rules = merged;
72        }
73
74        if overlay.tier_defaults.default != default_mid() {
75            self.tier_defaults = overlay.tier_defaults.clone();
76        }
77
78        for (pattern, defaults) in &overlay.model_defaults {
79            self.model_defaults
80                .entry(pattern.clone())
81                .or_default()
82                .extend(defaults.clone());
83        }
84    }
85}
86
87#[derive(Debug, Clone, Deserialize)]
88pub struct ProviderDef {
89    #[serde(default)]
90    pub display_name: Option<String>,
91    #[serde(default)]
92    pub icon: Option<String>,
93    pub base_url: String,
94    #[serde(default)]
95    pub base_url_env: Option<String>,
96    #[serde(default = "default_bearer")]
97    pub auth_style: String,
98    #[serde(default)]
99    pub auth_header: Option<String>,
100    #[serde(default)]
101    pub auth_env: AuthEnv,
102    #[serde(default)]
103    pub extra_headers: BTreeMap<String, String>,
104    #[serde(default)]
105    pub chat_endpoint: String,
106    #[serde(default)]
107    pub completion_endpoint: Option<String>,
108    #[serde(default)]
109    pub healthcheck: Option<HealthcheckDef>,
110    #[serde(default)]
111    pub features: Vec<String>,
112    /// Fallback provider name to try if this provider fails.
113    #[serde(default)]
114    pub fallback: Option<String>,
115    /// Number of retries before falling back (default 0).
116    #[serde(default)]
117    pub retry_count: Option<u32>,
118    /// Delay between retries in milliseconds (default 1000).
119    #[serde(default)]
120    pub retry_delay_ms: Option<u64>,
121    /// Maximum requests per minute. None = unlimited.
122    #[serde(default)]
123    pub rpm: Option<u32>,
124    /// Provider/catalog pricing in USD per 1k input tokens.
125    #[serde(default)]
126    pub cost_per_1k_in: Option<f64>,
127    /// Provider/catalog pricing in USD per 1k output tokens.
128    #[serde(default)]
129    pub cost_per_1k_out: Option<f64>,
130    /// Observed or configured p50 latency in milliseconds.
131    #[serde(default)]
132    pub latency_p50_ms: Option<u64>,
133}
134
135impl Default for ProviderDef {
136    fn default() -> Self {
137        Self {
138            display_name: None,
139            icon: None,
140            base_url: String::new(),
141            base_url_env: None,
142            auth_style: default_bearer(),
143            auth_header: None,
144            auth_env: AuthEnv::None,
145            extra_headers: BTreeMap::new(),
146            chat_endpoint: String::new(),
147            completion_endpoint: None,
148            healthcheck: None,
149            features: Vec::new(),
150            fallback: None,
151            retry_count: None,
152            retry_delay_ms: None,
153            rpm: None,
154            cost_per_1k_in: None,
155            cost_per_1k_out: None,
156            latency_p50_ms: None,
157        }
158    }
159}
160
161fn default_bearer() -> String {
162    "bearer".to_string()
163}
164
165/// Auth env var name(s) for the provider. Can be a single string or an array
166/// (tried in order until one is set).
167#[derive(Debug, Clone, Deserialize, Default)]
168#[serde(untagged)]
169pub enum AuthEnv {
170    #[default]
171    None,
172    Single(String),
173    Multiple(Vec<String>),
174}
175
176#[derive(Debug, Clone, Deserialize)]
177pub struct HealthcheckDef {
178    pub method: String,
179    #[serde(default)]
180    pub path: Option<String>,
181    #[serde(default)]
182    pub url: Option<String>,
183    #[serde(default)]
184    pub body: Option<String>,
185}
186
187#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
188pub struct AliasDef {
189    pub id: String,
190    pub provider: String,
191    /// Per-model tool format override: "native" or "text". When set, this
192    /// takes precedence over the provider-level default. Models with strong
193    /// tool-calling fine-tuning (Kimi-K2.5, GPT-4o) should use "native";
194    /// models better served by text-based tool calling use "text".
195    #[serde(default)]
196    pub tool_format: Option<String>,
197}
198
199#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
200pub struct ModelPricing {
201    pub input_per_mtok: f64,
202    pub output_per_mtok: f64,
203    #[serde(default)]
204    pub cache_read_per_mtok: Option<f64>,
205    #[serde(default)]
206    pub cache_write_per_mtok: Option<f64>,
207}
208
209#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
210pub struct ModelDef {
211    pub name: String,
212    pub provider: String,
213    pub context_window: u64,
214    #[serde(default)]
215    pub stream_timeout: Option<f64>,
216    #[serde(default)]
217    pub capabilities: Vec<String>,
218    #[serde(default)]
219    pub pricing: Option<ModelPricing>,
220}
221
222#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
223pub struct ResolvedModel {
224    pub id: String,
225    pub provider: String,
226    pub alias: Option<String>,
227    pub tool_format: String,
228    pub tier: String,
229}
230
231#[derive(Debug, Clone, Deserialize)]
232pub struct InferenceRule {
233    #[serde(default)]
234    pub pattern: Option<String>,
235    #[serde(default)]
236    pub contains: Option<String>,
237    #[serde(default)]
238    pub exact: Option<String>,
239    pub provider: String,
240}
241
242#[derive(Debug, Clone, Deserialize)]
243pub struct TierRule {
244    #[serde(default)]
245    pub pattern: Option<String>,
246    #[serde(default)]
247    pub contains: Option<String>,
248    #[serde(default)]
249    pub exact: Option<String>,
250    pub tier: String,
251}
252
253#[derive(Debug, Clone, Deserialize)]
254pub struct TierDefaults {
255    #[serde(default = "default_mid")]
256    pub default: String,
257}
258
259impl Default for TierDefaults {
260    fn default() -> Self {
261        Self {
262            default: default_mid(),
263        }
264    }
265}
266
267fn default_mid() -> String {
268    "mid".to_string()
269}
270
271/// Load and cache the providers config. Called once at VM startup.
272pub fn load_config() -> &'static ProvidersConfig {
273    CONFIG.get_or_init(|| {
274        let mut config = default_config();
275        let verbose_config_logging = matches!(
276            std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
277            Some("1" | "true" | "TRUE" | "yes" | "YES")
278        ) || matches!(
279            std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
280            Some("1" | "true" | "TRUE" | "yes" | "YES")
281        );
282        if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
283            if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
284                config.merge_from(&overlay);
285                let _ = CONFIG_PATH.set(path);
286                return config;
287            }
288        }
289        if let Some(home) = dirs_or_home() {
290            let path = format!("{home}/.config/harn/providers.toml");
291            if let Some(overlay) = read_external_config(&path, false) {
292                config.merge_from(&overlay);
293                let _ = CONFIG_PATH.set(path);
294                return config;
295            }
296        }
297        config
298    })
299}
300
301fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
302    match std::fs::read_to_string(path) {
303        Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
304            Ok(config) => {
305                if verbose {
306                    eprintln!(
307                        "[llm_config] Loaded {} providers, {} aliases from {}",
308                        config.providers.len(),
309                        config.aliases.len(),
310                        path
311                    );
312                }
313                Some(config)
314            }
315            Err(error) => {
316                eprintln!("[llm_config] TOML parse error in {}: {}", path, error);
317                None
318            }
319        },
320        Err(error) => {
321            if verbose {
322                eprintln!("[llm_config] Cannot read {}: {}", path, error);
323            }
324            None
325        }
326    }
327}
328
329/// Returns the filesystem path of the currently-loaded providers config, if
330/// any. Returns `None` when built-in defaults are active.
331pub fn loaded_config_path() -> Option<std::path::PathBuf> {
332    // Force lazy init so CONFIG_PATH is populated if a file was loaded.
333    let _ = load_config();
334    CONFIG_PATH.get().map(std::path::PathBuf::from)
335}
336
337/// Install per-run provider config overlays. The overlay uses the same shape as
338/// `providers.toml`, but lives under `[llm]` in `harn.toml` and package
339/// manifests. Passing `None` clears the overlay.
340pub fn set_user_overrides(config: Option<ProvidersConfig>) {
341    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
342}
343
344/// Clear per-run provider config overlays.
345pub fn clear_user_overrides() {
346    set_user_overrides(None);
347}
348
349fn effective_config() -> ProvidersConfig {
350    let mut merged = load_config().clone();
351    USER_OVERRIDES.with(|cell| {
352        if let Some(overlay) = cell.borrow().as_ref() {
353            merged.merge_from(overlay);
354        }
355    });
356    merged
357}
358
359/// Resolve a model alias to (model_id, provider_name).
360pub fn resolve_model(alias: &str) -> (String, Option<String>) {
361    let config = effective_config();
362    if let Some(a) = config.aliases.get(alias) {
363        return (a.id.clone(), Some(a.provider.clone()));
364    }
365    (normalize_model_id(alias), None)
366}
367
368/// Strip host/provider selector prefixes that identify transport, not the
369/// provider-native model id. This mirrors Burin's existing normalization so
370/// `ollama:qwen3:30b` reaches Ollama as `qwen3:30b` instead of an invalid
371/// model named `ollama`.
372pub fn normalize_model_id(raw: &str) -> String {
373    for prefix in ["ollama:", "local:", "huggingface:", "hf:"] {
374        if let Some(stripped) = raw.strip_prefix(prefix) {
375            return stripped.to_string();
376        }
377    }
378    raw.to_string()
379}
380
381/// Resolve an alias or selector into the complete catalog identity hosts need:
382/// provider inference, prefix-normalized model id, default tool format, and tier.
383pub fn resolve_model_info(selector: &str) -> ResolvedModel {
384    let config = effective_config();
385    if let Some(alias) = config.aliases.get(selector) {
386        let id = alias.id.clone();
387        let provider = alias.provider.clone();
388        let tool_format = alias
389            .tool_format
390            .clone()
391            .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
392        return ResolvedModel {
393            tier: model_tier_with_config(&config, &id),
394            id,
395            provider,
396            alias: Some(selector.to_string()),
397            tool_format,
398        };
399    }
400
401    let provider = infer_provider_with_config(&config, selector).provider;
402    let id = normalize_model_id(selector);
403    let tool_format = default_tool_format_with_config(&config, &id, &provider);
404    let tier = model_tier_with_config(&config, &id);
405    ResolvedModel {
406        id,
407        provider,
408        alias: None,
409        tool_format,
410        tier,
411    }
412}
413
414/// Infer provider from a model ID using inference rules.
415pub fn infer_provider(model_id: &str) -> String {
416    infer_provider_detail(model_id).provider
417}
418
419/// Infer provider from a model ID and retain whether the configured default was used.
420pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
421    let config = effective_config();
422    infer_provider_with_config(&config, model_id)
423}
424
425fn infer_provider_with_config(
426    config: &ProvidersConfig,
427    model_id: &str,
428) -> crate::llm::provider::ProviderInference {
429    if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
430        return crate::llm::provider::ProviderInference::builtin("ollama");
431    }
432    if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
433        return crate::llm::provider::ProviderInference::builtin("huggingface");
434    }
435    for rule in &config.inference_rules {
436        if let Some(exact) = &rule.exact {
437            if model_id == exact {
438                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
439            }
440        }
441        if let Some(pattern) = &rule.pattern {
442            if glob_match(pattern, model_id) {
443                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
444            }
445        }
446        if let Some(substr) = &rule.contains {
447            if model_id.contains(substr.as_str()) {
448                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
449            }
450        }
451    }
452    crate::llm::provider::infer_provider_from_model_id(
453        model_id,
454        &default_provider_with_config(config),
455    )
456}
457
458pub fn default_provider() -> String {
459    let config = effective_config();
460    default_provider_with_config(&config)
461}
462
463fn default_provider_with_config(config: &ProvidersConfig) -> String {
464    std::env::var("HARN_DEFAULT_PROVIDER")
465        .ok()
466        .map(|value| value.trim().to_string())
467        .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
468        .or_else(|| {
469            config
470                .default_provider
471                .as_deref()
472                .map(str::trim)
473                .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
474                .map(str::to_string)
475        })
476        .unwrap_or_else(|| "anthropic".to_string())
477}
478
479/// Get model tier ("small", "mid", "frontier").
480pub fn model_tier(model_id: &str) -> String {
481    let config = effective_config();
482    model_tier_with_config(&config, model_id)
483}
484
485fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
486    for rule in &config.tier_rules {
487        if let Some(exact) = &rule.exact {
488            if model_id == exact {
489                return rule.tier.clone();
490            }
491        }
492        if let Some(pattern) = &rule.pattern {
493            if glob_match(pattern, model_id) {
494                return rule.tier.clone();
495            }
496        }
497        if let Some(substr) = &rule.contains {
498            if model_id.contains(substr.as_str()) {
499                return rule.tier.clone();
500            }
501        }
502    }
503    let lower = model_id.to_lowercase();
504    if lower.contains("9b") || lower.contains("a3b") {
505        return "small".to_string();
506    }
507    if lower.starts_with("claude-") || lower == "gpt-4o" {
508        return "frontier".to_string();
509    }
510    config.tier_defaults.default.clone()
511}
512
513/// Get provider config for resolving base_url, auth, etc.
514pub fn provider_config(name: &str) -> Option<ProviderDef> {
515    effective_config().providers.get(name).cloned()
516}
517
518/// Get model-specific default parameters (temperature, etc.).
519/// Matches glob patterns in model_defaults keys.
520pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
521    let config = effective_config();
522    let mut params = BTreeMap::new();
523    for (pattern, defaults) in &config.model_defaults {
524        if glob_match(pattern, model_id) {
525            for (k, v) in defaults {
526                params.insert(k.clone(), v.clone());
527            }
528        }
529    }
530    params
531}
532
533/// Get list of configured provider names.
534pub fn provider_names() -> Vec<String> {
535    effective_config().providers.keys().cloned().collect()
536}
537
538/// Return every configured alias name, sorted deterministically.
539pub fn known_model_names() -> Vec<String> {
540    effective_config().aliases.keys().cloned().collect()
541}
542
543pub fn alias_entries() -> Vec<(String, AliasDef)> {
544    effective_config().aliases.into_iter().collect()
545}
546
547/// Return every configured model-catalog entry, sorted by provider then id.
548pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
549    let mut entries: Vec<_> = effective_config()
550        .models
551        .into_iter()
552        .map(|(id, model)| {
553            let provider = model.provider.clone();
554            (
555                id.clone(),
556                with_effective_capability_tags(id, provider, model),
557            )
558        })
559        .collect();
560    entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
561        model_a
562            .provider
563            .cmp(&model_b.provider)
564            .then_with(|| id_a.cmp(id_b))
565    });
566    entries
567}
568
569pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
570    effective_config()
571        .models
572        .get(model_id)
573        .cloned()
574        .map(|model| {
575            let provider = model.provider.clone();
576            with_effective_capability_tags(model_id.to_string(), provider, model)
577        })
578}
579
580pub fn qc_default_model(provider: &str) -> Option<String> {
581    std::env::var("BURIN_QC_MODEL")
582        .ok()
583        .filter(|value| !value.trim().is_empty())
584        .or_else(|| {
585            effective_config()
586                .qc_defaults
587                .get(&provider.to_lowercase())
588                .cloned()
589        })
590}
591
592pub fn default_model_for_provider(provider: &str) -> String {
593    match provider {
594        "local" => std::env::var("LOCAL_LLM_MODEL")
595            .or_else(|_| std::env::var("HARN_LLM_MODEL"))
596            .unwrap_or_else(|_| "gpt-4o".to_string()),
597        "mlx" => std::env::var("MLX_MODEL_ID")
598            .unwrap_or_else(|_| "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string()),
599        "openai" => "gpt-4o".to_string(),
600        "ollama" => "llama3.2".to_string(),
601        "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
602        _ => "claude-sonnet-4-20250514".to_string(),
603    }
604}
605
606pub fn qc_defaults() -> BTreeMap<String, String> {
607    effective_config().qc_defaults
608}
609
610pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
611    effective_config()
612        .models
613        .get(model_id)
614        .and_then(|model| model.pricing.clone())
615}
616
617pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
618    model_pricing_per_mtok(model_id)
619        .map(|pricing| {
620            (
621                pricing.input_per_mtok / 1000.0,
622                pricing.output_per_mtok / 1000.0,
623            )
624        })
625        .or_else(|| {
626            let (input, output, _) = provider_economics(provider);
627            match (input, output) {
628                (Some(input), Some(output)) => Some((input, output)),
629                _ => None,
630            }
631        })
632}
633
634pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
635    match auth_env {
636        AuthEnv::None => Vec::new(),
637        AuthEnv::Single(name) => vec![name.clone()],
638        AuthEnv::Multiple(names) => names.clone(),
639    }
640}
641
642pub fn provider_key_available(provider: &str) -> bool {
643    let Some(pdef) = provider_config(provider) else {
644        return provider == "ollama";
645    };
646    if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
647        return true;
648    }
649    auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
650        std::env::var(env_name)
651            .ok()
652            .is_some_and(|value| !value.trim().is_empty())
653    })
654}
655
656pub fn available_provider_names() -> Vec<String> {
657    provider_names()
658        .into_iter()
659        .filter(|provider| provider_key_available(provider))
660        .collect()
661}
662
663/// Check if a provider advertises a legacy provider-level feature.
664pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
665    provider_config(provider)
666        .map(|p| p.features.iter().any(|f| f == feature))
667        .unwrap_or(false)
668}
669
670/// Provider-level catalog pricing/latency. Model-specific static pricing in
671/// `llm::cost` still wins when available; this is the adapter-level fallback
672/// used by routing and portal summaries.
673pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
674    provider_config(provider)
675        .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
676        .unwrap_or((None, None, None))
677}
678
679/// Resolve the default tool format for a model+provider combination.
680/// Priority: alias `tool_format` (matched by model ID) > provider/model
681/// capability matrix > legacy provider feature > "text".
682pub fn default_tool_format(model: &str, provider: &str) -> String {
683    let config = effective_config();
684    default_tool_format_with_config(&config, model, provider)
685}
686
687fn default_tool_format_with_config(
688    config: &ProvidersConfig,
689    model: &str,
690    provider: &str,
691) -> String {
692    // Aliases match by model ID + provider, or by alias name.
693    for (name, alias) in &config.aliases {
694        let matches = (alias.id == model && alias.provider == provider) || name == model;
695        if matches {
696            if let Some(ref fmt) = alias.tool_format {
697                return fmt.clone();
698            }
699        }
700    }
701    let capability_matrix_native = crate::llm::capabilities::lookup(provider, model).native_tools;
702    let legacy_provider_native = config
703        .providers
704        .get(provider)
705        .map(|p| p.features.iter().any(|f| f == "native_tools"))
706        .unwrap_or(false);
707    if capability_matrix_native || legacy_provider_native {
708        "native".to_string()
709    } else {
710        "text".to_string()
711    }
712}
713
714fn with_effective_capability_tags(
715    model_id: String,
716    provider: String,
717    mut model: ModelDef,
718) -> ModelDef {
719    model.capabilities = effective_model_capability_tags(&provider, &model_id);
720    model
721}
722
723/// Legacy display tags derived from the canonical provider/model capability
724/// matrix. The matrix is the source of truth; `models.*.capabilities` in
725/// providers.toml is accepted only for backwards-compatible parsing.
726pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
727    let caps = crate::llm::capabilities::lookup(provider, model_id);
728    let mut tags = Vec::new();
729    // Today all Harn chat providers expose streaming. Keep this as a
730    // transport baseline rather than a duplicated per-model declaration.
731    tags.push("streaming".to_string());
732    if caps.native_tools {
733        tags.push("tools".to_string());
734    }
735    if !caps.tool_search.is_empty() {
736        tags.push("tool_search".to_string());
737    }
738    if caps.vision || caps.vision_supported {
739        tags.push("vision".to_string());
740    }
741    if caps.audio {
742        tags.push("audio".to_string());
743    }
744    if caps.pdf {
745        tags.push("pdf".to_string());
746    }
747    if caps.files_api_supported {
748        tags.push("files".to_string());
749    }
750    if caps.prompt_caching {
751        tags.push("prompt_caching".to_string());
752    }
753    if !caps.thinking_modes.is_empty() {
754        tags.push("thinking".to_string());
755    }
756    if caps.interleaved_thinking_supported
757        || caps
758            .thinking_modes
759            .iter()
760            .any(|mode| mode == "adaptive" || mode == "effort")
761    {
762        tags.push("extended_thinking".to_string());
763    }
764    if caps.json_schema.is_some() {
765        tags.push("structured_output".to_string());
766    }
767    tags
768}
769
770/// Resolve a tier or alias into a concrete model/provider pair.
771pub fn resolve_tier_model(
772    target: &str,
773    preferred_provider: Option<&str>,
774) -> Option<(String, String)> {
775    let config = effective_config();
776
777    if let Some(alias) = config.aliases.get(target) {
778        return Some((alias.id.clone(), alias.provider.clone()));
779    }
780
781    let candidate_aliases = if let Some(provider) = preferred_provider {
782        vec![
783            format!("{provider}/{target}"),
784            format!("{provider}:{target}"),
785            format!("tier/{target}"),
786            target.to_string(),
787        ]
788    } else {
789        vec![format!("tier/{target}"), target.to_string()]
790    };
791
792    for alias_name in candidate_aliases {
793        if let Some(alias) = config.aliases.get(&alias_name) {
794            return Some((alias.id.clone(), alias.provider.clone()));
795        }
796    }
797
798    None
799}
800
801/// Return all configured alias-backed model/provider pairs whose resolved
802/// model falls into the requested capability tier. The result is de-duplicated
803/// and sorted deterministically by provider then model id.
804pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
805    let config = effective_config();
806    let mut seen = std::collections::BTreeSet::new();
807    let mut candidates = Vec::new();
808
809    for alias in config.aliases.values() {
810        let pair = (alias.id.clone(), alias.provider.clone());
811        if seen.contains(&pair) {
812            continue;
813        }
814        if model_tier(&alias.id) == target {
815            seen.insert(pair.clone());
816            candidates.push(pair);
817        }
818    }
819
820    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
821        provider_a
822            .cmp(provider_b)
823            .then_with(|| model_a.cmp(model_b))
824    });
825    candidates
826}
827
828/// Return all configured alias-backed model/provider pairs. Used by routing
829/// policies that need to compare alternatives across tiers.
830pub fn all_model_candidates() -> Vec<(String, String)> {
831    let config = effective_config();
832    let mut seen = std::collections::BTreeSet::new();
833    let mut candidates = Vec::new();
834
835    for alias in config.aliases.values() {
836        let pair = (alias.id.clone(), alias.provider.clone());
837        if seen.insert(pair.clone()) {
838            candidates.push(pair);
839        }
840    }
841
842    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
843        provider_a
844            .cmp(provider_b)
845            .then_with(|| model_a.cmp(model_b))
846    });
847    candidates
848}
849
850/// Simple glob matching for patterns like "claude-*", "qwen/*", "ollama:*".
851fn glob_match(pattern: &str, input: &str) -> bool {
852    if let Some(prefix) = pattern.strip_suffix('*') {
853        input.starts_with(prefix)
854    } else if let Some(suffix) = pattern.strip_prefix('*') {
855        input.ends_with(suffix)
856    } else if pattern.contains('*') {
857        let parts: Vec<&str> = pattern.split('*').collect();
858        if parts.len() == 2 {
859            input.starts_with(parts[0]) && input.ends_with(parts[1])
860        } else {
861            input == pattern
862        }
863    } else {
864        input == pattern
865    }
866}
867
868fn dirs_or_home() -> Option<String> {
869    std::env::var("HOME").ok()
870}
871
872/// Resolve the effective base URL for a provider, checking the `base_url_env`
873/// override first, then falling back to the configured `base_url`.
874pub fn resolve_base_url(pdef: &ProviderDef) -> String {
875    if let Some(env_name) = &pdef.base_url_env {
876        if let Ok(val) = std::env::var(env_name) {
877            // Strip surrounding quotes that some .env parsers leave intact.
878            let trimmed = val.trim().trim_matches('"').trim_matches('\'');
879            if !trimmed.is_empty() {
880                return trimmed.to_string();
881            }
882        }
883    }
884    pdef.base_url.clone()
885}
886
887fn default_config() -> ProvidersConfig {
888    let mut config = ProvidersConfig {
889        default_provider: Some("anthropic".to_string()),
890        ..Default::default()
891    };
892
893    config.providers.insert(
894        "anthropic".to_string(),
895        ProviderDef {
896            base_url: "https://api.anthropic.com/v1".to_string(),
897            auth_style: "header".to_string(),
898            auth_header: Some("x-api-key".to_string()),
899            auth_env: AuthEnv::Single("ANTHROPIC_API_KEY".to_string()),
900            extra_headers: BTreeMap::from([(
901                "anthropic-version".to_string(),
902                "2023-06-01".to_string(),
903            )]),
904            chat_endpoint: "/messages".to_string(),
905            completion_endpoint: None,
906            healthcheck: Some(HealthcheckDef {
907                method: "POST".to_string(),
908                path: Some("/messages/count_tokens".to_string()),
909                url: None,
910                body: Some(
911                    r#"{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":"x"}]}"#
912                        .to_string(),
913                ),
914            }),
915            features: vec!["prompt_caching".to_string(), "thinking".to_string()],
916            cost_per_1k_in: Some(0.003),
917            cost_per_1k_out: Some(0.015),
918            latency_p50_ms: Some(2500),
919            ..Default::default()
920        },
921    );
922
923    // OpenAI
924    config.providers.insert(
925        "openai".to_string(),
926        ProviderDef {
927            base_url: "https://api.openai.com/v1".to_string(),
928            auth_style: "bearer".to_string(),
929            auth_env: AuthEnv::Single("OPENAI_API_KEY".to_string()),
930            chat_endpoint: "/chat/completions".to_string(),
931            completion_endpoint: Some("/completions".to_string()),
932            healthcheck: Some(HealthcheckDef {
933                method: "GET".to_string(),
934                path: Some("/models".to_string()),
935                url: None,
936                body: None,
937            }),
938            cost_per_1k_in: Some(0.0025),
939            cost_per_1k_out: Some(0.010),
940            latency_p50_ms: Some(1800),
941            ..Default::default()
942        },
943    );
944
945    // OpenRouter
946    config.providers.insert(
947        "openrouter".to_string(),
948        ProviderDef {
949            base_url: "https://openrouter.ai/api/v1".to_string(),
950            auth_style: "bearer".to_string(),
951            auth_env: AuthEnv::Single("OPENROUTER_API_KEY".to_string()),
952            chat_endpoint: "/chat/completions".to_string(),
953            completion_endpoint: Some("/completions".to_string()),
954            healthcheck: Some(HealthcheckDef {
955                method: "GET".to_string(),
956                path: Some("/auth/key".to_string()),
957                url: None,
958                body: None,
959            }),
960            cost_per_1k_in: Some(0.003),
961            cost_per_1k_out: Some(0.015),
962            latency_p50_ms: Some(2200),
963            ..Default::default()
964        },
965    );
966
967    // HuggingFace
968    config.providers.insert(
969        "huggingface".to_string(),
970        ProviderDef {
971            base_url: "https://router.huggingface.co/v1".to_string(),
972            auth_style: "bearer".to_string(),
973            auth_env: AuthEnv::Multiple(vec![
974                "HF_TOKEN".to_string(),
975                "HUGGINGFACE_API_KEY".to_string(),
976            ]),
977            chat_endpoint: "/chat/completions".to_string(),
978            completion_endpoint: Some("/completions".to_string()),
979            healthcheck: Some(HealthcheckDef {
980                method: "GET".to_string(),
981                url: Some("https://huggingface.co/api/whoami-v2".to_string()),
982                path: None,
983                body: None,
984            }),
985            cost_per_1k_in: Some(0.0002),
986            cost_per_1k_out: Some(0.0006),
987            latency_p50_ms: Some(2400),
988            ..Default::default()
989        },
990    );
991
992    // Ollama default. Hosts can override this to `/v1/chat/completions`
993    // via a bundled `providers.toml` (loaded by setting
994    // `HARN_PROVIDERS_CONFIG` in the host process). The OpenAI-compat
995    // path bypasses Ollama's per-model tool-call post-processors
996    // (qwen3coder.go, qwen35.go) which raise HTTP 500s on text-mode
997    // responses for the Qwen3.5 family. The default here stays on
998    // `/api/chat` so the harn-vm test stub keeps working with Ollama's
999    // native NDJSON wire format.
1000    config.providers.insert(
1001        "ollama".to_string(),
1002        ProviderDef {
1003            base_url: "http://localhost:11434".to_string(),
1004            base_url_env: Some("OLLAMA_HOST".to_string()),
1005            auth_style: "none".to_string(),
1006            chat_endpoint: "/api/chat".to_string(),
1007            completion_endpoint: Some("/api/generate".to_string()),
1008            healthcheck: Some(HealthcheckDef {
1009                method: "GET".to_string(),
1010                path: Some("/api/tags".to_string()),
1011                url: None,
1012                body: None,
1013            }),
1014            cost_per_1k_in: Some(0.0),
1015            cost_per_1k_out: Some(0.0),
1016            latency_p50_ms: Some(1200),
1017            ..Default::default()
1018        },
1019    );
1020
1021    // Google Gemini native API.
1022    config.providers.insert(
1023        "gemini".to_string(),
1024        ProviderDef {
1025            base_url: "https://generativelanguage.googleapis.com".to_string(),
1026            base_url_env: Some("GEMINI_BASE_URL".to_string()),
1027            auth_style: "header".to_string(),
1028            auth_header: Some("x-goog-api-key".to_string()),
1029            auth_env: AuthEnv::Multiple(vec![
1030                "GEMINI_API_KEY".to_string(),
1031                "GOOGLE_API_KEY".to_string(),
1032            ]),
1033            chat_endpoint: "/v1beta/models".to_string(),
1034            healthcheck: Some(HealthcheckDef {
1035                method: "GET".to_string(),
1036                path: Some("/v1beta/models".to_string()),
1037                url: None,
1038                body: None,
1039            }),
1040            cost_per_1k_in: Some(0.00125),
1041            cost_per_1k_out: Some(0.005),
1042            latency_p50_ms: Some(1800),
1043            ..Default::default()
1044        },
1045    );
1046
1047    // Together AI (OpenAI-compatible)
1048    config.providers.insert(
1049        "together".to_string(),
1050        ProviderDef {
1051            base_url: "https://api.together.xyz/v1".to_string(),
1052            base_url_env: Some("TOGETHER_AI_BASE_URL".to_string()),
1053            auth_style: "bearer".to_string(),
1054            auth_env: AuthEnv::Single("TOGETHER_AI_API_KEY".to_string()),
1055            chat_endpoint: "/chat/completions".to_string(),
1056            completion_endpoint: Some("/completions".to_string()),
1057            healthcheck: Some(HealthcheckDef {
1058                method: "GET".to_string(),
1059                path: Some("/models".to_string()),
1060                url: None,
1061                body: None,
1062            }),
1063            cost_per_1k_in: Some(0.0002),
1064            cost_per_1k_out: Some(0.0006),
1065            latency_p50_ms: Some(1600),
1066            ..Default::default()
1067        },
1068    );
1069
1070    // Groq (OpenAI-compatible)
1071    config.providers.insert(
1072        "groq".to_string(),
1073        ProviderDef {
1074            base_url: "https://api.groq.com/openai/v1".to_string(),
1075            base_url_env: Some("GROQ_BASE_URL".to_string()),
1076            auth_style: "bearer".to_string(),
1077            auth_env: AuthEnv::Single("GROQ_API_KEY".to_string()),
1078            chat_endpoint: "/chat/completions".to_string(),
1079            completion_endpoint: Some("/completions".to_string()),
1080            healthcheck: Some(HealthcheckDef {
1081                method: "GET".to_string(),
1082                path: Some("/models".to_string()),
1083                url: None,
1084                body: None,
1085            }),
1086            cost_per_1k_in: Some(0.0001),
1087            cost_per_1k_out: Some(0.0003),
1088            latency_p50_ms: Some(450),
1089            ..Default::default()
1090        },
1091    );
1092
1093    // DeepSeek (OpenAI-compatible)
1094    config.providers.insert(
1095        "deepseek".to_string(),
1096        ProviderDef {
1097            base_url: "https://api.deepseek.com/v1".to_string(),
1098            base_url_env: Some("DEEPSEEK_BASE_URL".to_string()),
1099            auth_style: "bearer".to_string(),
1100            auth_env: AuthEnv::Single("DEEPSEEK_API_KEY".to_string()),
1101            chat_endpoint: "/chat/completions".to_string(),
1102            completion_endpoint: Some("/completions".to_string()),
1103            healthcheck: Some(HealthcheckDef {
1104                method: "GET".to_string(),
1105                path: Some("/models".to_string()),
1106                url: None,
1107                body: None,
1108            }),
1109            cost_per_1k_in: Some(0.00014),
1110            cost_per_1k_out: Some(0.00028),
1111            latency_p50_ms: Some(1800),
1112            ..Default::default()
1113        },
1114    );
1115
1116    // Fireworks (OpenAI-compatible open-weight hosting)
1117    config.providers.insert(
1118        "fireworks".to_string(),
1119        ProviderDef {
1120            base_url: "https://api.fireworks.ai/inference/v1".to_string(),
1121            base_url_env: Some("FIREWORKS_BASE_URL".to_string()),
1122            auth_style: "bearer".to_string(),
1123            auth_env: AuthEnv::Single("FIREWORKS_API_KEY".to_string()),
1124            chat_endpoint: "/chat/completions".to_string(),
1125            completion_endpoint: Some("/completions".to_string()),
1126            healthcheck: Some(HealthcheckDef {
1127                method: "GET".to_string(),
1128                path: Some("/models".to_string()),
1129                url: None,
1130                body: None,
1131            }),
1132            cost_per_1k_in: Some(0.0002),
1133            cost_per_1k_out: Some(0.0006),
1134            latency_p50_ms: Some(1400),
1135            ..Default::default()
1136        },
1137    );
1138
1139    // Alibaba DashScope (OpenAI-compatible Qwen host)
1140    config.providers.insert(
1141        "dashscope".to_string(),
1142        ProviderDef {
1143            base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".to_string(),
1144            base_url_env: Some("DASHSCOPE_BASE_URL".to_string()),
1145            auth_style: "bearer".to_string(),
1146            auth_env: AuthEnv::Single("DASHSCOPE_API_KEY".to_string()),
1147            chat_endpoint: "/chat/completions".to_string(),
1148            completion_endpoint: Some("/completions".to_string()),
1149            healthcheck: Some(HealthcheckDef {
1150                method: "GET".to_string(),
1151                path: Some("/models".to_string()),
1152                url: None,
1153                body: None,
1154            }),
1155            cost_per_1k_in: Some(0.0003),
1156            cost_per_1k_out: Some(0.0012),
1157            latency_p50_ms: Some(1600),
1158            ..Default::default()
1159        },
1160    );
1161
1162    // AWS Bedrock Runtime. The provider shim resolves AWS credentials through
1163    // env vars, the selected/default profile, container credentials, or EC2
1164    // instance profile credentials, then signs Converse API calls with SigV4.
1165    config.providers.insert(
1166        "bedrock".to_string(),
1167        ProviderDef {
1168            base_url: String::new(),
1169            base_url_env: Some("BEDROCK_BASE_URL".to_string()),
1170            auth_style: "aws_sigv4".to_string(),
1171            auth_env: AuthEnv::None,
1172            chat_endpoint: "/model/{model}/converse".to_string(),
1173            features: vec!["native_tools".to_string()],
1174            latency_p50_ms: Some(2600),
1175            ..Default::default()
1176        },
1177    );
1178
1179    // Azure OpenAI. The deployment name is routed in the URL; callers can
1180    // use the Harn model field as the deployment name or set
1181    // AZURE_OPENAI_DEPLOYMENT.
1182    config.providers.insert(
1183        "azure_openai".to_string(),
1184        ProviderDef {
1185            base_url: "https://{resource}.openai.azure.com".to_string(),
1186            base_url_env: Some("AZURE_OPENAI_ENDPOINT".to_string()),
1187            auth_style: "azure_openai".to_string(),
1188            auth_env: AuthEnv::Multiple(vec![
1189                "AZURE_OPENAI_API_KEY".to_string(),
1190                "AZURE_OPENAI_AD_TOKEN".to_string(),
1191                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1192            ]),
1193            chat_endpoint:
1194                "/openai/deployments/{deployment}/chat/completions?api-version={api_version}"
1195                    .to_string(),
1196            features: vec!["native_tools".to_string()],
1197            cost_per_1k_in: Some(0.0025),
1198            cost_per_1k_out: Some(0.010),
1199            latency_p50_ms: Some(1900),
1200            ..Default::default()
1201        },
1202    );
1203
1204    // Google Vertex AI Gemini.
1205    config.providers.insert(
1206        "vertex".to_string(),
1207        ProviderDef {
1208            base_url: "https://aiplatform.googleapis.com/v1".to_string(),
1209            base_url_env: Some("VERTEX_AI_BASE_URL".to_string()),
1210            auth_style: "bearer".to_string(),
1211            auth_env: AuthEnv::Multiple(vec![
1212                "VERTEX_AI_ACCESS_TOKEN".to_string(),
1213                "GOOGLE_OAUTH_ACCESS_TOKEN".to_string(),
1214                "GOOGLE_APPLICATION_CREDENTIALS".to_string(),
1215            ]),
1216            chat_endpoint:
1217                "/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent"
1218                    .to_string(),
1219            features: vec!["native_tools".to_string()],
1220            cost_per_1k_in: Some(0.00125),
1221            cost_per_1k_out: Some(0.005),
1222            latency_p50_ms: Some(2100),
1223            ..Default::default()
1224        },
1225    );
1226
1227    // Local OpenAI-compatible server
1228    config.providers.insert(
1229        "local".to_string(),
1230        ProviderDef {
1231            base_url: "http://localhost:8000".to_string(),
1232            base_url_env: Some("LOCAL_LLM_BASE_URL".to_string()),
1233            auth_style: "none".to_string(),
1234            chat_endpoint: "/v1/chat/completions".to_string(),
1235            completion_endpoint: Some("/v1/completions".to_string()),
1236            healthcheck: Some(HealthcheckDef {
1237                method: "GET".to_string(),
1238                path: Some("/v1/models".to_string()),
1239                url: None,
1240                body: None,
1241            }),
1242            cost_per_1k_in: Some(0.0),
1243            cost_per_1k_out: Some(0.0),
1244            latency_p50_ms: Some(900),
1245            ..Default::default()
1246        },
1247    );
1248
1249    // llama.cpp / llama-server OpenAI-compatible server. This is separate
1250    // from `local` so capability rules can distinguish Qwen chat-template
1251    // thinking quirks from other local OpenAI-compatible hosts.
1252    config.providers.insert(
1253        "llamacpp".to_string(),
1254        ProviderDef {
1255            base_url: "http://127.0.0.1:8001".to_string(),
1256            base_url_env: Some("LLAMACPP_BASE_URL".to_string()),
1257            auth_style: "none".to_string(),
1258            chat_endpoint: "/v1/chat/completions".to_string(),
1259            completion_endpoint: Some("/v1/completions".to_string()),
1260            healthcheck: Some(HealthcheckDef {
1261                method: "GET".to_string(),
1262                path: Some("/v1/models".to_string()),
1263                url: None,
1264                body: None,
1265            }),
1266            cost_per_1k_in: Some(0.0),
1267            cost_per_1k_out: Some(0.0),
1268            latency_p50_ms: Some(900),
1269            ..Default::default()
1270        },
1271    );
1272
1273    // Apple Silicon MLX OpenAI-compatible server. Harn owns readiness
1274    // probing; hosts that want script-based auto-start should launch the
1275    // process first, then call Harn again to verify readiness.
1276    config.providers.insert(
1277        "mlx".to_string(),
1278        ProviderDef {
1279            base_url: "http://127.0.0.1:8002".to_string(),
1280            base_url_env: Some("MLX_BASE_URL".to_string()),
1281            auth_style: "none".to_string(),
1282            chat_endpoint: "/v1/chat/completions".to_string(),
1283            completion_endpoint: Some("/v1/completions".to_string()),
1284            healthcheck: Some(HealthcheckDef {
1285                method: "GET".to_string(),
1286                path: Some("/v1/models".to_string()),
1287                url: None,
1288                body: None,
1289            }),
1290            cost_per_1k_in: Some(0.0),
1291            cost_per_1k_out: Some(0.0),
1292            latency_p50_ms: Some(900),
1293            ..Default::default()
1294        },
1295    );
1296
1297    // vLLM OpenAI-compatible server.
1298    config.providers.insert(
1299        "vllm".to_string(),
1300        ProviderDef {
1301            base_url: "http://localhost:8000".to_string(),
1302            base_url_env: Some("VLLM_BASE_URL".to_string()),
1303            auth_style: "none".to_string(),
1304            chat_endpoint: "/v1/chat/completions".to_string(),
1305            completion_endpoint: Some("/v1/completions".to_string()),
1306            healthcheck: Some(HealthcheckDef {
1307                method: "GET".to_string(),
1308                path: Some("/v1/models".to_string()),
1309                url: None,
1310                body: None,
1311            }),
1312            cost_per_1k_in: Some(0.0),
1313            cost_per_1k_out: Some(0.0),
1314            latency_p50_ms: Some(800),
1315            ..Default::default()
1316        },
1317    );
1318
1319    // HuggingFace Text Generation Inference OpenAI-compatible endpoint.
1320    config.providers.insert(
1321        "tgi".to_string(),
1322        ProviderDef {
1323            base_url: "http://localhost:8080".to_string(),
1324            base_url_env: Some("TGI_BASE_URL".to_string()),
1325            auth_style: "none".to_string(),
1326            chat_endpoint: "/v1/chat/completions".to_string(),
1327            completion_endpoint: Some("/v1/completions".to_string()),
1328            healthcheck: Some(HealthcheckDef {
1329                method: "GET".to_string(),
1330                path: Some("/health".to_string()),
1331                url: None,
1332                body: None,
1333            }),
1334            cost_per_1k_in: Some(0.0),
1335            cost_per_1k_out: Some(0.0),
1336            latency_p50_ms: Some(950),
1337            ..Default::default()
1338        },
1339    );
1340
1341    // Default inference rules
1342    config.inference_rules = vec![
1343        InferenceRule {
1344            pattern: Some("claude-*".to_string()),
1345            contains: None,
1346            exact: None,
1347            provider: "anthropic".to_string(),
1348        },
1349        InferenceRule {
1350            pattern: Some("gpt-*".to_string()),
1351            contains: None,
1352            exact: None,
1353            provider: "openai".to_string(),
1354        },
1355        InferenceRule {
1356            pattern: Some("o1*".to_string()),
1357            contains: None,
1358            exact: None,
1359            provider: "openai".to_string(),
1360        },
1361        InferenceRule {
1362            pattern: Some("o3*".to_string()),
1363            contains: None,
1364            exact: None,
1365            provider: "openai".to_string(),
1366        },
1367        InferenceRule {
1368            pattern: Some("o4*".to_string()),
1369            contains: None,
1370            exact: None,
1371            provider: "openai".to_string(),
1372        },
1373        InferenceRule {
1374            pattern: Some("anthropic.claude-*".to_string()),
1375            contains: None,
1376            exact: None,
1377            provider: "bedrock".to_string(),
1378        },
1379        InferenceRule {
1380            pattern: Some("meta.llama*".to_string()),
1381            contains: None,
1382            exact: None,
1383            provider: "bedrock".to_string(),
1384        },
1385        InferenceRule {
1386            pattern: Some("amazon.*".to_string()),
1387            contains: None,
1388            exact: None,
1389            provider: "bedrock".to_string(),
1390        },
1391        InferenceRule {
1392            pattern: Some("mistral.*".to_string()),
1393            contains: None,
1394            exact: None,
1395            provider: "bedrock".to_string(),
1396        },
1397        InferenceRule {
1398            pattern: Some("cohere.*".to_string()),
1399            contains: None,
1400            exact: None,
1401            provider: "bedrock".to_string(),
1402        },
1403        InferenceRule {
1404            pattern: Some("gemini-*".to_string()),
1405            contains: None,
1406            exact: None,
1407            provider: "gemini".to_string(),
1408        },
1409    ];
1410
1411    // Default tier rules
1412    config.tier_rules = vec![
1413        TierRule {
1414            contains: Some("9b".to_string()),
1415            pattern: None,
1416            exact: None,
1417            tier: "small".to_string(),
1418        },
1419        TierRule {
1420            contains: Some("a3b".to_string()),
1421            pattern: None,
1422            exact: None,
1423            tier: "small".to_string(),
1424        },
1425        TierRule {
1426            contains: Some("gemma-4-e2b".to_string()),
1427            pattern: None,
1428            exact: None,
1429            tier: "small".to_string(),
1430        },
1431        TierRule {
1432            contains: Some("gemma-4-e4b".to_string()),
1433            pattern: None,
1434            exact: None,
1435            tier: "small".to_string(),
1436        },
1437        TierRule {
1438            contains: Some("gemma-4-26b".to_string()),
1439            pattern: None,
1440            exact: None,
1441            tier: "mid".to_string(),
1442        },
1443        TierRule {
1444            contains: Some("gemma-4-31b".to_string()),
1445            pattern: None,
1446            exact: None,
1447            tier: "frontier".to_string(),
1448        },
1449        TierRule {
1450            contains: Some("gemma4:26b".to_string()),
1451            pattern: None,
1452            exact: None,
1453            tier: "mid".to_string(),
1454        },
1455        TierRule {
1456            contains: Some("gemma4:31b".to_string()),
1457            pattern: None,
1458            exact: None,
1459            tier: "frontier".to_string(),
1460        },
1461        TierRule {
1462            pattern: Some("claude-*".to_string()),
1463            contains: None,
1464            exact: None,
1465            tier: "frontier".to_string(),
1466        },
1467        TierRule {
1468            exact: Some("gpt-4o".to_string()),
1469            contains: None,
1470            pattern: None,
1471            tier: "frontier".to_string(),
1472        },
1473    ];
1474
1475    config.tier_defaults = TierDefaults {
1476        default: "mid".to_string(),
1477    };
1478
1479    config.aliases.insert(
1480        "frontier".to_string(),
1481        AliasDef {
1482            id: "claude-sonnet-4-20250514".to_string(),
1483            provider: "anthropic".to_string(),
1484            tool_format: None,
1485        },
1486    );
1487    config.aliases.insert(
1488        "tier/frontier".to_string(),
1489        AliasDef {
1490            id: "claude-sonnet-4-20250514".to_string(),
1491            provider: "anthropic".to_string(),
1492            tool_format: None,
1493        },
1494    );
1495    config.aliases.insert(
1496        "mid".to_string(),
1497        AliasDef {
1498            id: "gpt-4o-mini".to_string(),
1499            provider: "openai".to_string(),
1500            tool_format: None,
1501        },
1502    );
1503    config.aliases.insert(
1504        "tier/mid".to_string(),
1505        AliasDef {
1506            id: "gpt-4o-mini".to_string(),
1507            provider: "openai".to_string(),
1508            tool_format: None,
1509        },
1510    );
1511    config.aliases.insert(
1512        "small".to_string(),
1513        AliasDef {
1514            id: "Qwen/Qwen3.5-9B".to_string(),
1515            provider: "openrouter".to_string(),
1516            tool_format: None,
1517        },
1518    );
1519    config.aliases.insert(
1520        "tier/small".to_string(),
1521        AliasDef {
1522            id: "Qwen/Qwen3.5-9B".to_string(),
1523            provider: "openrouter".to_string(),
1524            tool_format: None,
1525        },
1526    );
1527    config.aliases.insert(
1528        "local-gemma4".to_string(),
1529        AliasDef {
1530            id: "gemma-4-26b-a4b-it".to_string(),
1531            provider: "local".to_string(),
1532            tool_format: None,
1533        },
1534    );
1535    config.aliases.insert(
1536        "local-gemma4-26b".to_string(),
1537        AliasDef {
1538            id: "gemma-4-26b-a4b-it".to_string(),
1539            provider: "local".to_string(),
1540            tool_format: None,
1541        },
1542    );
1543    config.aliases.insert(
1544        "local-gemma4-31b".to_string(),
1545        AliasDef {
1546            id: "gemma-4-31b-it".to_string(),
1547            provider: "local".to_string(),
1548            tool_format: None,
1549        },
1550    );
1551    config.aliases.insert(
1552        "local-gemma4-e4b".to_string(),
1553        AliasDef {
1554            id: "gemma-4-e4b-it".to_string(),
1555            provider: "local".to_string(),
1556            tool_format: None,
1557        },
1558    );
1559    config.aliases.insert(
1560        "local-gemma4-e2b".to_string(),
1561        AliasDef {
1562            id: "gemma-4-e2b-it".to_string(),
1563            provider: "local".to_string(),
1564            tool_format: None,
1565        },
1566    );
1567    config.aliases.insert(
1568        "mlx-qwen36-27b".to_string(),
1569        AliasDef {
1570            id: "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string(),
1571            provider: "mlx".to_string(),
1572            tool_format: None,
1573        },
1574    );
1575
1576    config.qc_defaults.extend(BTreeMap::from([
1577        (
1578            "anthropic".to_string(),
1579            "claude-3-5-haiku-20241022".to_string(),
1580        ),
1581        ("openai".to_string(), "gpt-4o-mini".to_string()),
1582        (
1583            "openrouter".to_string(),
1584            "google/gemini-2.5-flash".to_string(),
1585        ),
1586        ("ollama".to_string(), "llama3.2".to_string()),
1587        ("local".to_string(), "gpt-4o".to_string()),
1588    ]));
1589
1590    config.models.extend(BTreeMap::from([
1591        (
1592            "claude-sonnet-4-20250514".to_string(),
1593            ModelDef {
1594                name: "Claude Sonnet 4".to_string(),
1595                provider: "anthropic".to_string(),
1596                context_window: 200_000,
1597                stream_timeout: None,
1598                capabilities: vec![
1599                    "tools".to_string(),
1600                    "streaming".to_string(),
1601                    "prompt_caching".to_string(),
1602                    "thinking".to_string(),
1603                ],
1604                pricing: Some(ModelPricing {
1605                    input_per_mtok: 3.0,
1606                    output_per_mtok: 15.0,
1607                    cache_read_per_mtok: Some(0.3),
1608                    cache_write_per_mtok: Some(3.75),
1609                }),
1610            },
1611        ),
1612        (
1613            "gpt-4o-mini".to_string(),
1614            ModelDef {
1615                name: "GPT-4o Mini".to_string(),
1616                provider: "openai".to_string(),
1617                context_window: 128_000,
1618                stream_timeout: None,
1619                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1620                pricing: Some(ModelPricing {
1621                    input_per_mtok: 0.15,
1622                    output_per_mtok: 0.60,
1623                    cache_read_per_mtok: None,
1624                    cache_write_per_mtok: None,
1625                }),
1626            },
1627        ),
1628        (
1629            "Qwen/Qwen3.5-9B".to_string(),
1630            ModelDef {
1631                name: "Qwen3.5 9B".to_string(),
1632                provider: "openrouter".to_string(),
1633                context_window: 131_072,
1634                stream_timeout: None,
1635                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1636                pricing: None,
1637            },
1638        ),
1639        (
1640            "llama3.2".to_string(),
1641            ModelDef {
1642                name: "Llama 3.2".to_string(),
1643                provider: "ollama".to_string(),
1644                context_window: 32_000,
1645                stream_timeout: Some(300.0),
1646                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1647                pricing: None,
1648            },
1649        ),
1650    ]));
1651
1652    config
1653}
1654
1655#[cfg(test)]
1656fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
1657    let mut config = default_config();
1658    config.merge_from(&overlay);
1659    config
1660}
1661
1662#[cfg(test)]
1663mod tests {
1664    use super::*;
1665
1666    fn reset_overrides() {
1667        clear_user_overrides();
1668    }
1669
1670    #[test]
1671    fn test_glob_match_prefix() {
1672        assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
1673        assert!(glob_match("gpt-*", "gpt-4o"));
1674        assert!(!glob_match("claude-*", "gpt-4o"));
1675    }
1676
1677    #[test]
1678    fn test_glob_match_suffix() {
1679        assert!(glob_match("*-latest", "llama3.2-latest"));
1680        assert!(!glob_match("*-latest", "llama3.2"));
1681    }
1682
1683    #[test]
1684    fn test_glob_match_middle() {
1685        assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
1686        assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
1687    }
1688
1689    #[test]
1690    fn test_glob_match_exact() {
1691        assert!(glob_match("gpt-4o", "gpt-4o"));
1692        assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
1693    }
1694
1695    #[test]
1696    fn test_infer_provider_from_defaults() {
1697        let _guard = crate::llm::env_lock().lock().expect("env lock");
1698        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1699        unsafe {
1700            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1701        }
1702
1703        assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
1704        assert_eq!(infer_provider("gpt-4o"), "openai");
1705        assert_eq!(infer_provider("o1-preview"), "openai");
1706        assert_eq!(infer_provider("o3-mini"), "openai");
1707        assert_eq!(infer_provider("o4-mini"), "openai");
1708        assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
1709        assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
1710        assert_eq!(infer_provider("llama3.2:latest"), "ollama");
1711        assert_eq!(infer_provider("unknown-model"), "anthropic");
1712
1713        unsafe {
1714            match prev_default_provider {
1715                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1716                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1717            }
1718        }
1719    }
1720
1721    #[test]
1722    fn test_infer_provider_prefix_rules() {
1723        assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
1724        assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
1725        // Even when the id also contains `/`, the local transport prefix wins.
1726        assert_eq!(infer_provider("local:owner/model"), "ollama");
1727        assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
1728    }
1729
1730    #[test]
1731    fn test_openrouter_inference_requires_one_slash() {
1732        let _guard = crate::llm::env_lock().lock().expect("env lock");
1733        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1734        unsafe {
1735            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1736        }
1737
1738        assert_eq!(infer_provider("org/model"), "openrouter");
1739        assert_eq!(infer_provider("org/team/model"), "anthropic");
1740
1741        unsafe {
1742            match prev_default_provider {
1743                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1744                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1745            }
1746        }
1747    }
1748
1749    #[test]
1750    fn test_resolve_model_info_normalizes_provider_prefixes() {
1751        let local = resolve_model_info("local:gemma-4-e4b-it");
1752        assert_eq!(local.id, "gemma-4-e4b-it");
1753        assert_eq!(local.provider, "ollama");
1754
1755        let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
1756        assert_eq!(ollama.id, "qwen3:30b-a3b");
1757        assert_eq!(ollama.provider, "ollama");
1758
1759        let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
1760        assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
1761        assert_eq!(hf.provider, "huggingface");
1762    }
1763
1764    #[test]
1765    fn test_model_tier_from_defaults() {
1766        assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
1767        assert_eq!(model_tier("gpt-4o"), "frontier");
1768        assert_eq!(model_tier("Qwen3.5-9B"), "small");
1769        assert_eq!(model_tier("deepseek-v3"), "mid");
1770    }
1771
1772    #[test]
1773    fn test_resolve_model_unknown_alias() {
1774        let (id, provider) = resolve_model("gpt-4o");
1775        assert_eq!(id, "gpt-4o");
1776        assert!(provider.is_none());
1777    }
1778
1779    #[test]
1780    fn test_provider_names() {
1781        let names = provider_names();
1782        assert!(names.len() >= 7);
1783        assert!(names.contains(&"anthropic".to_string()));
1784        assert!(names.contains(&"together".to_string()));
1785        assert!(names.contains(&"local".to_string()));
1786        assert!(names.contains(&"mlx".to_string()));
1787        assert!(names.contains(&"openai".to_string()));
1788        assert!(names.contains(&"ollama".to_string()));
1789        assert!(names.contains(&"bedrock".to_string()));
1790        assert!(names.contains(&"azure_openai".to_string()));
1791        assert!(names.contains(&"vertex".to_string()));
1792    }
1793
1794    #[test]
1795    fn global_provider_file_is_an_overlay_on_builtin_defaults() {
1796        let mut overlay = ProvidersConfig {
1797            default_provider: Some("ollama".to_string()),
1798            ..Default::default()
1799        };
1800        overlay.aliases.insert(
1801            "quickstart".to_string(),
1802            AliasDef {
1803                id: "llama3.2".to_string(),
1804                provider: "ollama".to_string(),
1805                tool_format: None,
1806            },
1807        );
1808
1809        let merged = merge_global_config(overlay);
1810
1811        assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
1812        assert!(merged.providers.contains_key("anthropic"));
1813        assert!(merged.providers.contains_key("ollama"));
1814        assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
1815    }
1816
1817    #[test]
1818    fn test_resolve_tier_model_default_aliases() {
1819        let (model, provider) = resolve_tier_model("frontier", None).unwrap();
1820        assert_eq!(model, "claude-sonnet-4-20250514");
1821        assert_eq!(provider, "anthropic");
1822
1823        let (model, provider) = resolve_tier_model("small", None).unwrap();
1824        assert_eq!(model, "Qwen/Qwen3.5-9B");
1825        assert_eq!(provider, "openrouter");
1826    }
1827
1828    #[test]
1829    fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
1830        let (model, provider) = resolve_tier_model("mid", Some("openai")).unwrap();
1831        assert_eq!(model, "gpt-4o-mini");
1832        assert_eq!(provider, "openai");
1833    }
1834
1835    #[test]
1836    fn test_provider_config_anthropic() {
1837        let pdef = provider_config("anthropic").unwrap();
1838        assert_eq!(pdef.auth_style, "header");
1839        assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
1840    }
1841
1842    #[test]
1843    fn test_provider_config_mlx() {
1844        let pdef = provider_config("mlx").unwrap();
1845        assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
1846        assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
1847        assert_eq!(
1848            pdef.healthcheck.unwrap().path.as_deref(),
1849            Some("/v1/models")
1850        );
1851
1852        let (model, provider) = resolve_model("mlx-qwen36-27b");
1853        assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
1854        assert_eq!(provider.as_deref(), Some("mlx"));
1855    }
1856
1857    #[test]
1858    fn test_enterprise_provider_defaults_and_inference() {
1859        let bedrock = provider_config("bedrock").unwrap();
1860        assert_eq!(bedrock.auth_style, "aws_sigv4");
1861        assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
1862        assert_eq!(
1863            infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
1864            "bedrock"
1865        );
1866        assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
1867
1868        let azure = provider_config("azure_openai").unwrap();
1869        assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
1870        assert_eq!(
1871            auth_env_names(&azure.auth_env),
1872            vec![
1873                "AZURE_OPENAI_API_KEY".to_string(),
1874                "AZURE_OPENAI_AD_TOKEN".to_string(),
1875                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1876            ]
1877        );
1878
1879        let vertex = provider_config("vertex").unwrap();
1880        assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
1881        assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
1882    }
1883
1884    #[test]
1885    fn test_default_provider_env_override_for_unknown_model() {
1886        let _guard = crate::llm::env_lock().lock().expect("env lock");
1887        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1888        unsafe {
1889            std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
1890        }
1891
1892        let inference = infer_provider_detail("unknown-model");
1893
1894        unsafe {
1895            match prev_default_provider {
1896                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1897                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1898            }
1899        }
1900
1901        assert_eq!(inference.provider, "openai");
1902        assert_eq!(
1903            inference.source,
1904            crate::llm::provider::ProviderInferenceSource::DefaultFallback
1905        );
1906    }
1907
1908    #[test]
1909    fn test_resolve_base_url_no_env() {
1910        let pdef = ProviderDef {
1911            base_url: "https://example.com".to_string(),
1912            ..Default::default()
1913        };
1914        assert_eq!(resolve_base_url(&pdef), "https://example.com");
1915    }
1916
1917    #[test]
1918    fn test_default_config_roundtrip() {
1919        let config = default_config();
1920        assert!(!config.providers.is_empty());
1921        assert!(!config.inference_rules.is_empty());
1922        assert!(!config.tier_rules.is_empty());
1923        assert_eq!(config.tier_defaults.default, "mid");
1924    }
1925
1926    #[test]
1927    fn test_external_config_overlays_default_catalog() {
1928        let mut config = default_config();
1929        let mut overlay = ProvidersConfig {
1930            default_provider: Some("ollama".to_string()),
1931            ..Default::default()
1932        };
1933        overlay.providers.insert(
1934            "custom".to_string(),
1935            ProviderDef {
1936                base_url: "https://llm.example.test/v1".to_string(),
1937                chat_endpoint: "/chat/completions".to_string(),
1938                ..Default::default()
1939            },
1940        );
1941
1942        config.merge_from(&overlay);
1943
1944        assert_eq!(config.default_provider.as_deref(), Some("ollama"));
1945        assert!(config.providers.contains_key("custom"));
1946        assert!(config.providers.contains_key("anthropic"));
1947        assert!(config.providers.contains_key("ollama"));
1948    }
1949
1950    #[test]
1951    fn test_model_params_empty() {
1952        let params = model_params("claude-sonnet-4-20250514");
1953        assert!(params.is_empty());
1954    }
1955
1956    #[test]
1957    fn test_user_overrides_add_provider_and_alias() {
1958        reset_overrides();
1959        let mut overlay = ProvidersConfig::default();
1960        overlay.providers.insert(
1961            "acme".to_string(),
1962            ProviderDef {
1963                base_url: "https://llm.acme.test/v1".to_string(),
1964                chat_endpoint: "/chat/completions".to_string(),
1965                ..Default::default()
1966            },
1967        );
1968        overlay.aliases.insert(
1969            "acme-fast".to_string(),
1970            AliasDef {
1971                id: "acme/model-fast".to_string(),
1972                provider: "acme".to_string(),
1973                tool_format: Some("native".to_string()),
1974            },
1975        );
1976        set_user_overrides(Some(overlay));
1977
1978        let (model, provider) = resolve_model("acme-fast");
1979        assert_eq!(model, "acme/model-fast");
1980        assert_eq!(provider.as_deref(), Some("acme"));
1981        assert!(provider_names().contains(&"acme".to_string()));
1982        assert_eq!(
1983            provider_config("acme").map(|provider| provider.base_url),
1984            Some("https://llm.acme.test/v1".to_string())
1985        );
1986
1987        reset_overrides();
1988    }
1989
1990    #[test]
1991    fn test_default_tool_format_uses_capability_matrix() {
1992        reset_overrides();
1993
1994        assert_eq!(
1995            default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
1996            "native"
1997        );
1998        assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "text");
1999    }
2000
2001    #[test]
2002    fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
2003        reset_overrides();
2004        let mut overlay = ProvidersConfig::default();
2005        overlay.models.insert(
2006            "acme/model-fast".to_string(),
2007            ModelDef {
2008                name: "Acme Fast".to_string(),
2009                provider: "acme".to_string(),
2010                context_window: 65_536,
2011                stream_timeout: Some(42.0),
2012                capabilities: vec!["tools".to_string(), "streaming".to_string()],
2013                pricing: Some(ModelPricing {
2014                    input_per_mtok: 1.25,
2015                    output_per_mtok: 2.5,
2016                    cache_read_per_mtok: Some(0.25),
2017                    cache_write_per_mtok: None,
2018                }),
2019            },
2020        );
2021        overlay
2022            .qc_defaults
2023            .insert("acme".to_string(), "acme/model-cheap".to_string());
2024        set_user_overrides(Some(overlay));
2025
2026        let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
2027        assert_eq!(entry.context_window, 65_536);
2028        assert_eq!(entry.capabilities, vec!["streaming".to_string()]);
2029        assert_eq!(
2030            entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
2031            Some(1.25)
2032        );
2033        assert_eq!(
2034            pricing_per_1k_for("acme", "acme/model-fast"),
2035            Some((0.00125, 0.0025))
2036        );
2037        assert_eq!(
2038            qc_default_model("acme").as_deref(),
2039            Some("acme/model-cheap")
2040        );
2041
2042        reset_overrides();
2043    }
2044
2045    #[test]
2046    fn test_user_overrides_prepend_inference_rules() {
2047        reset_overrides();
2048        let mut overlay = ProvidersConfig::default();
2049        overlay.inference_rules.push(InferenceRule {
2050            pattern: Some("internal-*".to_string()),
2051            contains: None,
2052            exact: None,
2053            provider: "openai".to_string(),
2054        });
2055        set_user_overrides(Some(overlay));
2056
2057        assert_eq!(infer_provider("internal-foo"), "openai");
2058
2059        reset_overrides();
2060    }
2061}
harn_vm/llm_config.rs

harn_vm/
llm_config.rs