harn_vm/
llm_config.rs

1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::BTreeMap;
4use std::sync::OnceLock;
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8
9thread_local! {
10    /// Thread-local provider config overlays installed by the CLI after it
11    /// reads the nearest `harn.toml` plus any installed package manifests.
12    /// Kept thread-local so tests and multi-VM hosts can scope extensions to
13    /// the current run without mutating the process-wide default config.
14    static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
15}
16
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct ProvidersConfig {
19    #[serde(default)]
20    pub default_provider: Option<String>,
21    #[serde(default)]
22    pub providers: BTreeMap<String, ProviderDef>,
23    #[serde(default)]
24    pub aliases: BTreeMap<String, AliasDef>,
25    #[serde(default)]
26    pub models: BTreeMap<String, ModelDef>,
27    #[serde(default)]
28    pub qc_defaults: BTreeMap<String, String>,
29    #[serde(default)]
30    pub inference_rules: Vec<InferenceRule>,
31    #[serde(default)]
32    pub tier_rules: Vec<TierRule>,
33    #[serde(default)]
34    pub tier_defaults: TierDefaults,
35    #[serde(default)]
36    pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
37}
38
39impl ProvidersConfig {
40    pub fn is_empty(&self) -> bool {
41        self.default_provider.is_none()
42            && self.providers.is_empty()
43            && self.aliases.is_empty()
44            && self.models.is_empty()
45            && self.qc_defaults.is_empty()
46            && self.inference_rules.is_empty()
47            && self.tier_rules.is_empty()
48            && self.model_defaults.is_empty()
49            && self.tier_defaults.default == default_mid()
50    }
51
52    pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
53        self.providers.extend(overlay.providers.clone());
54        self.aliases.extend(overlay.aliases.clone());
55        self.models.extend(overlay.models.clone());
56        self.qc_defaults.extend(overlay.qc_defaults.clone());
57
58        if overlay.default_provider.is_some() {
59            self.default_provider = overlay.default_provider.clone();
60        }
61
62        if !overlay.inference_rules.is_empty() {
63            let mut merged = overlay.inference_rules.clone();
64            merged.extend(self.inference_rules.clone());
65            self.inference_rules = merged;
66        }
67
68        if !overlay.tier_rules.is_empty() {
69            let mut merged = overlay.tier_rules.clone();
70            merged.extend(self.tier_rules.clone());
71            self.tier_rules = merged;
72        }
73
74        if overlay.tier_defaults.default != default_mid() {
75            self.tier_defaults = overlay.tier_defaults.clone();
76        }
77
78        for (pattern, defaults) in &overlay.model_defaults {
79            self.model_defaults
80                .entry(pattern.clone())
81                .or_default()
82                .extend(defaults.clone());
83        }
84    }
85}
86
87#[derive(Debug, Clone, Deserialize)]
88pub struct ProviderDef {
89    #[serde(default)]
90    pub display_name: Option<String>,
91    #[serde(default)]
92    pub icon: Option<String>,
93    pub base_url: String,
94    #[serde(default)]
95    pub base_url_env: Option<String>,
96    #[serde(default = "default_bearer")]
97    pub auth_style: String,
98    #[serde(default)]
99    pub auth_header: Option<String>,
100    #[serde(default)]
101    pub auth_env: AuthEnv,
102    #[serde(default)]
103    pub extra_headers: BTreeMap<String, String>,
104    #[serde(default)]
105    pub chat_endpoint: String,
106    #[serde(default)]
107    pub completion_endpoint: Option<String>,
108    #[serde(default)]
109    pub healthcheck: Option<HealthcheckDef>,
110    #[serde(default)]
111    pub features: Vec<String>,
112    /// Fallback provider name to try if this provider fails.
113    #[serde(default)]
114    pub fallback: Option<String>,
115    /// Number of retries before falling back (default 0).
116    #[serde(default)]
117    pub retry_count: Option<u32>,
118    /// Delay between retries in milliseconds (default 1000).
119    #[serde(default)]
120    pub retry_delay_ms: Option<u64>,
121    /// Maximum requests per minute. None = unlimited.
122    #[serde(default)]
123    pub rpm: Option<u32>,
124    /// Provider/catalog pricing in USD per 1k input tokens.
125    #[serde(default)]
126    pub cost_per_1k_in: Option<f64>,
127    /// Provider/catalog pricing in USD per 1k output tokens.
128    #[serde(default)]
129    pub cost_per_1k_out: Option<f64>,
130    /// Observed or configured p50 latency in milliseconds.
131    #[serde(default)]
132    pub latency_p50_ms: Option<u64>,
133}
134
135impl Default for ProviderDef {
136    fn default() -> Self {
137        Self {
138            display_name: None,
139            icon: None,
140            base_url: String::new(),
141            base_url_env: None,
142            auth_style: default_bearer(),
143            auth_header: None,
144            auth_env: AuthEnv::None,
145            extra_headers: BTreeMap::new(),
146            chat_endpoint: String::new(),
147            completion_endpoint: None,
148            healthcheck: None,
149            features: Vec::new(),
150            fallback: None,
151            retry_count: None,
152            retry_delay_ms: None,
153            rpm: None,
154            cost_per_1k_in: None,
155            cost_per_1k_out: None,
156            latency_p50_ms: None,
157        }
158    }
159}
160
161fn default_bearer() -> String {
162    "bearer".to_string()
163}
164
165/// Auth env var name(s) for the provider. Can be a single string or an array
166/// (tried in order until one is set).
167#[derive(Debug, Clone, Deserialize, Default)]
168#[serde(untagged)]
169pub enum AuthEnv {
170    #[default]
171    None,
172    Single(String),
173    Multiple(Vec<String>),
174}
175
176#[derive(Debug, Clone, Deserialize)]
177pub struct HealthcheckDef {
178    pub method: String,
179    #[serde(default)]
180    pub path: Option<String>,
181    #[serde(default)]
182    pub url: Option<String>,
183    #[serde(default)]
184    pub body: Option<String>,
185}
186
187#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
188pub struct AliasDef {
189    pub id: String,
190    pub provider: String,
191    /// Per-model tool format override: "native" or "text". When set, this
192    /// takes precedence over the provider-level default. Models with strong
193    /// tool-calling fine-tuning (Kimi-K2.5, GPT-4o) should use "native";
194    /// models better served by text-based tool calling use "text".
195    #[serde(default)]
196    pub tool_format: Option<String>,
197}
198
199#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
200pub struct ModelPricing {
201    pub input_per_mtok: f64,
202    pub output_per_mtok: f64,
203    #[serde(default)]
204    pub cache_read_per_mtok: Option<f64>,
205    #[serde(default)]
206    pub cache_write_per_mtok: Option<f64>,
207}
208
209#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
210pub struct ModelDef {
211    pub name: String,
212    pub provider: String,
213    pub context_window: u64,
214    #[serde(default)]
215    pub stream_timeout: Option<f64>,
216    #[serde(default)]
217    pub capabilities: Vec<String>,
218    #[serde(default)]
219    pub pricing: Option<ModelPricing>,
220}
221
222#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
223pub struct ResolvedModel {
224    pub id: String,
225    pub provider: String,
226    pub alias: Option<String>,
227    pub tool_format: String,
228    pub tier: String,
229}
230
231#[derive(Debug, Clone, Deserialize)]
232pub struct InferenceRule {
233    #[serde(default)]
234    pub pattern: Option<String>,
235    #[serde(default)]
236    pub contains: Option<String>,
237    #[serde(default)]
238    pub exact: Option<String>,
239    pub provider: String,
240}
241
242#[derive(Debug, Clone, Deserialize)]
243pub struct TierRule {
244    #[serde(default)]
245    pub pattern: Option<String>,
246    #[serde(default)]
247    pub contains: Option<String>,
248    #[serde(default)]
249    pub exact: Option<String>,
250    pub tier: String,
251}
252
253#[derive(Debug, Clone, Deserialize)]
254pub struct TierDefaults {
255    #[serde(default = "default_mid")]
256    pub default: String,
257}
258
259impl Default for TierDefaults {
260    fn default() -> Self {
261        Self {
262            default: default_mid(),
263        }
264    }
265}
266
267fn default_mid() -> String {
268    "mid".to_string()
269}
270
271/// Load and cache the providers config. Called once at VM startup.
272pub fn load_config() -> &'static ProvidersConfig {
273    CONFIG.get_or_init(|| {
274        let mut config = default_config();
275        let verbose_config_logging = matches!(
276            std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
277            Some("1" | "true" | "TRUE" | "yes" | "YES")
278        ) || matches!(
279            std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
280            Some("1" | "true" | "TRUE" | "yes" | "YES")
281        );
282        if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
283            if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
284                config.merge_from(&overlay);
285                let _ = CONFIG_PATH.set(path);
286                return config;
287            }
288        }
289        if let Some(home) = dirs_or_home() {
290            let path = format!("{home}/.config/harn/providers.toml");
291            if let Some(overlay) = read_external_config(&path, false) {
292                config.merge_from(&overlay);
293                let _ = CONFIG_PATH.set(path);
294                return config;
295            }
296        }
297        config
298    })
299}
300
301fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
302    match std::fs::read_to_string(path) {
303        Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
304            Ok(config) => {
305                if verbose {
306                    eprintln!(
307                        "[llm_config] Loaded {} providers, {} aliases from {}",
308                        config.providers.len(),
309                        config.aliases.len(),
310                        path
311                    );
312                }
313                Some(config)
314            }
315            Err(error) => {
316                eprintln!("[llm_config] TOML parse error in {}: {}", path, error);
317                None
318            }
319        },
320        Err(error) => {
321            if verbose {
322                eprintln!("[llm_config] Cannot read {}: {}", path, error);
323            }
324            None
325        }
326    }
327}
328
329/// Returns the filesystem path of the currently-loaded providers config, if
330/// any. Returns `None` when built-in defaults are active.
331pub fn loaded_config_path() -> Option<std::path::PathBuf> {
332    // Force lazy init so CONFIG_PATH is populated if a file was loaded.
333    let _ = load_config();
334    CONFIG_PATH.get().map(std::path::PathBuf::from)
335}
336
337/// Install per-run provider config overlays. The overlay uses the same shape as
338/// `providers.toml`, but lives under `[llm]` in `harn.toml` and package
339/// manifests. Passing `None` clears the overlay.
340pub fn set_user_overrides(config: Option<ProvidersConfig>) {
341    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
342}
343
344/// Clear per-run provider config overlays.
345pub fn clear_user_overrides() {
346    set_user_overrides(None);
347}
348
349fn effective_config() -> ProvidersConfig {
350    let mut merged = load_config().clone();
351    USER_OVERRIDES.with(|cell| {
352        if let Some(overlay) = cell.borrow().as_ref() {
353            merged.merge_from(overlay);
354        }
355    });
356    merged
357}
358
359/// Resolve a model alias to (model_id, provider_name).
360pub fn resolve_model(alias: &str) -> (String, Option<String>) {
361    let config = effective_config();
362    if let Some(a) = config.aliases.get(alias) {
363        return (a.id.clone(), Some(a.provider.clone()));
364    }
365    (normalize_model_id(alias), None)
366}
367
368/// Strip host/provider selector prefixes that identify transport, not the
369/// provider-native model id. This mirrors Burin's existing normalization so
370/// `ollama:qwen3:30b` reaches Ollama as `qwen3:30b` instead of an invalid
371/// model named `ollama`.
372pub fn normalize_model_id(raw: &str) -> String {
373    for prefix in ["ollama:", "local:", "huggingface:", "hf:"] {
374        if let Some(stripped) = raw.strip_prefix(prefix) {
375            return stripped.to_string();
376        }
377    }
378    raw.to_string()
379}
380
381/// Resolve an alias or selector into the complete catalog identity hosts need:
382/// provider inference, prefix-normalized model id, default tool format, and tier.
383pub fn resolve_model_info(selector: &str) -> ResolvedModel {
384    let config = effective_config();
385    if let Some(alias) = config.aliases.get(selector) {
386        let id = alias.id.clone();
387        let provider = alias.provider.clone();
388        let tool_format = alias
389            .tool_format
390            .clone()
391            .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
392        return ResolvedModel {
393            tier: model_tier_with_config(&config, &id),
394            id,
395            provider,
396            alias: Some(selector.to_string()),
397            tool_format,
398        };
399    }
400
401    let provider = infer_provider_with_config(&config, selector).provider;
402    let id = normalize_model_id(selector);
403    let tool_format = default_tool_format_with_config(&config, &id, &provider);
404    let tier = model_tier_with_config(&config, &id);
405    ResolvedModel {
406        id,
407        provider,
408        alias: None,
409        tool_format,
410        tier,
411    }
412}
413
414/// Infer provider from a model ID using inference rules.
415pub fn infer_provider(model_id: &str) -> String {
416    infer_provider_detail(model_id).provider
417}
418
419/// Infer provider from a model ID and retain whether the configured default was used.
420pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
421    let config = effective_config();
422    infer_provider_with_config(&config, model_id)
423}
424
425fn infer_provider_with_config(
426    config: &ProvidersConfig,
427    model_id: &str,
428) -> crate::llm::provider::ProviderInference {
429    if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
430        return crate::llm::provider::ProviderInference::builtin("ollama");
431    }
432    if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
433        return crate::llm::provider::ProviderInference::builtin("huggingface");
434    }
435    for rule in &config.inference_rules {
436        if let Some(exact) = &rule.exact {
437            if model_id == exact {
438                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
439            }
440        }
441        if let Some(pattern) = &rule.pattern {
442            if glob_match(pattern, model_id) {
443                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
444            }
445        }
446        if let Some(substr) = &rule.contains {
447            if model_id.contains(substr.as_str()) {
448                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
449            }
450        }
451    }
452    crate::llm::provider::infer_provider_from_model_id(
453        model_id,
454        &default_provider_with_config(config),
455    )
456}
457
458pub fn default_provider() -> String {
459    let config = effective_config();
460    default_provider_with_config(&config)
461}
462
463fn default_provider_with_config(config: &ProvidersConfig) -> String {
464    std::env::var("HARN_DEFAULT_PROVIDER")
465        .ok()
466        .map(|value| value.trim().to_string())
467        .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
468        .or_else(|| {
469            config
470                .default_provider
471                .as_deref()
472                .map(str::trim)
473                .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
474                .map(str::to_string)
475        })
476        .unwrap_or_else(|| "anthropic".to_string())
477}
478
479/// Get model tier ("small", "mid", "frontier").
480pub fn model_tier(model_id: &str) -> String {
481    let config = effective_config();
482    model_tier_with_config(&config, model_id)
483}
484
485fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
486    for rule in &config.tier_rules {
487        if let Some(exact) = &rule.exact {
488            if model_id == exact {
489                return rule.tier.clone();
490            }
491        }
492        if let Some(pattern) = &rule.pattern {
493            if glob_match(pattern, model_id) {
494                return rule.tier.clone();
495            }
496        }
497        if let Some(substr) = &rule.contains {
498            if model_id.contains(substr.as_str()) {
499                return rule.tier.clone();
500            }
501        }
502    }
503    let lower = model_id.to_lowercase();
504    if lower.contains("9b") || lower.contains("a3b") {
505        return "small".to_string();
506    }
507    if lower.starts_with("claude-") || lower == "gpt-4o" {
508        return "frontier".to_string();
509    }
510    config.tier_defaults.default.clone()
511}
512
513/// Get provider config for resolving base_url, auth, etc.
514pub fn provider_config(name: &str) -> Option<ProviderDef> {
515    effective_config().providers.get(name).cloned()
516}
517
518/// Get model-specific default parameters (temperature, etc.).
519/// Matches glob patterns in model_defaults keys.
520pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
521    let config = effective_config();
522    let mut params = BTreeMap::new();
523    for (pattern, defaults) in &config.model_defaults {
524        if glob_match(pattern, model_id) {
525            for (k, v) in defaults {
526                params.insert(k.clone(), v.clone());
527            }
528        }
529    }
530    params
531}
532
533/// Get list of configured provider names.
534pub fn provider_names() -> Vec<String> {
535    effective_config().providers.keys().cloned().collect()
536}
537
538/// Return every configured alias name, sorted deterministically.
539pub fn known_model_names() -> Vec<String> {
540    effective_config().aliases.keys().cloned().collect()
541}
542
543pub fn alias_entries() -> Vec<(String, AliasDef)> {
544    effective_config().aliases.into_iter().collect()
545}
546
547/// Return every configured model-catalog entry, sorted by provider then id.
548pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
549    let mut entries: Vec<_> = effective_config()
550        .models
551        .into_iter()
552        .map(|(id, model)| {
553            let provider = model.provider.clone();
554            (
555                id.clone(),
556                with_effective_capability_tags(id, provider, model),
557            )
558        })
559        .collect();
560    entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
561        model_a
562            .provider
563            .cmp(&model_b.provider)
564            .then_with(|| id_a.cmp(id_b))
565    });
566    entries
567}
568
569pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
570    effective_config()
571        .models
572        .get(model_id)
573        .cloned()
574        .map(|model| {
575            let provider = model.provider.clone();
576            with_effective_capability_tags(model_id.to_string(), provider, model)
577        })
578}
579
580pub fn qc_default_model(provider: &str) -> Option<String> {
581    std::env::var("BURIN_QC_MODEL")
582        .ok()
583        .filter(|value| !value.trim().is_empty())
584        .or_else(|| {
585            effective_config()
586                .qc_defaults
587                .get(&provider.to_lowercase())
588                .cloned()
589        })
590}
591
592pub fn default_model_for_provider(provider: &str) -> String {
593    match provider {
594        "local" => std::env::var("LOCAL_LLM_MODEL")
595            .or_else(|_| std::env::var("HARN_LLM_MODEL"))
596            .unwrap_or_else(|_| "gpt-4o".to_string()),
597        "mlx" => std::env::var("MLX_MODEL_ID")
598            .unwrap_or_else(|_| "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string()),
599        "openai" => "gpt-4o".to_string(),
600        "ollama" => "llama3.2".to_string(),
601        "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
602        _ => "claude-sonnet-4-20250514".to_string(),
603    }
604}
605
606pub fn qc_defaults() -> BTreeMap<String, String> {
607    effective_config().qc_defaults
608}
609
610pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
611    effective_config()
612        .models
613        .get(model_id)
614        .and_then(|model| model.pricing.clone())
615}
616
617pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
618    model_pricing_per_mtok(model_id)
619        .map(|pricing| {
620            (
621                pricing.input_per_mtok / 1000.0,
622                pricing.output_per_mtok / 1000.0,
623            )
624        })
625        .or_else(|| {
626            let (input, output, _) = provider_economics(provider);
627            match (input, output) {
628                (Some(input), Some(output)) => Some((input, output)),
629                _ => None,
630            }
631        })
632}
633
634pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
635    match auth_env {
636        AuthEnv::None => Vec::new(),
637        AuthEnv::Single(name) => vec![name.clone()],
638        AuthEnv::Multiple(names) => names.clone(),
639    }
640}
641
642pub fn provider_key_available(provider: &str) -> bool {
643    let Some(pdef) = provider_config(provider) else {
644        return provider == "ollama";
645    };
646    if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
647        return true;
648    }
649    auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
650        std::env::var(env_name)
651            .ok()
652            .is_some_and(|value| !value.trim().is_empty())
653    })
654}
655
656pub fn available_provider_names() -> Vec<String> {
657    provider_names()
658        .into_iter()
659        .filter(|provider| provider_key_available(provider))
660        .collect()
661}
662
663/// Check if a provider advertises a legacy provider-level feature.
664pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
665    provider_config(provider)
666        .map(|p| p.features.iter().any(|f| f == feature))
667        .unwrap_or(false)
668}
669
670/// Provider-level catalog pricing/latency. Model-specific static pricing in
671/// `llm::cost` still wins when available; this is the adapter-level fallback
672/// used by routing and portal summaries.
673pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
674    provider_config(provider)
675        .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
676        .unwrap_or((None, None, None))
677}
678
679/// Resolve the default tool format for a model+provider combination.
680/// Priority: alias `tool_format` (matched by model ID) > provider/model
681/// capability matrix > legacy provider feature > "text".
682pub fn default_tool_format(model: &str, provider: &str) -> String {
683    let config = effective_config();
684    default_tool_format_with_config(&config, model, provider)
685}
686
687fn default_tool_format_with_config(
688    config: &ProvidersConfig,
689    model: &str,
690    provider: &str,
691) -> String {
692    // Aliases match by model ID + provider, or by alias name.
693    for (name, alias) in &config.aliases {
694        let matches = (alias.id == model && alias.provider == provider) || name == model;
695        if matches {
696            if let Some(ref fmt) = alias.tool_format {
697                return fmt.clone();
698            }
699        }
700    }
701    let capability_matrix_native = crate::llm::capabilities::lookup(provider, model).native_tools;
702    let legacy_provider_native = config
703        .providers
704        .get(provider)
705        .map(|p| p.features.iter().any(|f| f == "native_tools"))
706        .unwrap_or(false);
707    if capability_matrix_native || legacy_provider_native {
708        "native".to_string()
709    } else {
710        "text".to_string()
711    }
712}
713
714fn with_effective_capability_tags(
715    model_id: String,
716    provider: String,
717    mut model: ModelDef,
718) -> ModelDef {
719    model.capabilities = effective_model_capability_tags(&provider, &model_id);
720    model
721}
722
723/// Legacy display tags derived from the canonical provider/model capability
724/// matrix. The matrix is the source of truth; `models.*.capabilities` in
725/// providers.toml is accepted only for backwards-compatible parsing.
726pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
727    let caps = crate::llm::capabilities::lookup(provider, model_id);
728    let mut tags = Vec::new();
729    // Today all Harn chat providers expose streaming. Keep this as a
730    // transport baseline rather than a duplicated per-model declaration.
731    tags.push("streaming".to_string());
732    if caps.native_tools {
733        tags.push("tools".to_string());
734    }
735    if !caps.tool_search.is_empty() {
736        tags.push("tool_search".to_string());
737    }
738    if caps.vision || caps.vision_supported {
739        tags.push("vision".to_string());
740    }
741    if caps.audio {
742        tags.push("audio".to_string());
743    }
744    if caps.pdf {
745        tags.push("pdf".to_string());
746    }
747    if caps.files_api_supported {
748        tags.push("files".to_string());
749    }
750    if caps.prompt_caching {
751        tags.push("prompt_caching".to_string());
752    }
753    if !caps.thinking_modes.is_empty() {
754        tags.push("thinking".to_string());
755    }
756    if caps.interleaved_thinking_supported
757        || caps
758            .thinking_modes
759            .iter()
760            .any(|mode| mode == "adaptive" || mode == "effort")
761    {
762        tags.push("extended_thinking".to_string());
763    }
764    if caps.json_schema.is_some() {
765        tags.push("structured_output".to_string());
766    }
767    tags
768}
769
770/// Resolve a tier or alias into a concrete model/provider pair.
771pub fn resolve_tier_model(
772    target: &str,
773    preferred_provider: Option<&str>,
774) -> Option<(String, String)> {
775    let config = effective_config();
776
777    if let Some(alias) = config.aliases.get(target) {
778        return Some((alias.id.clone(), alias.provider.clone()));
779    }
780
781    let candidate_aliases = if let Some(provider) = preferred_provider {
782        vec![
783            format!("{provider}/{target}"),
784            format!("{provider}:{target}"),
785            format!("tier/{target}"),
786            target.to_string(),
787        ]
788    } else {
789        vec![format!("tier/{target}"), target.to_string()]
790    };
791
792    for alias_name in candidate_aliases {
793        if let Some(alias) = config.aliases.get(&alias_name) {
794            return Some((alias.id.clone(), alias.provider.clone()));
795        }
796    }
797
798    None
799}
800
801/// Return all configured alias-backed model/provider pairs whose resolved
802/// model falls into the requested capability tier. The result is de-duplicated
803/// and sorted deterministically by provider then model id.
804pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
805    let config = effective_config();
806    let mut seen = std::collections::BTreeSet::new();
807    let mut candidates = Vec::new();
808
809    for alias in config.aliases.values() {
810        let pair = (alias.id.clone(), alias.provider.clone());
811        if seen.contains(&pair) {
812            continue;
813        }
814        if model_tier(&alias.id) == target {
815            seen.insert(pair.clone());
816            candidates.push(pair);
817        }
818    }
819
820    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
821        provider_a
822            .cmp(provider_b)
823            .then_with(|| model_a.cmp(model_b))
824    });
825    candidates
826}
827
828/// Return all configured alias-backed model/provider pairs. Used by routing
829/// policies that need to compare alternatives across tiers.
830pub fn all_model_candidates() -> Vec<(String, String)> {
831    let config = effective_config();
832    let mut seen = std::collections::BTreeSet::new();
833    let mut candidates = Vec::new();
834
835    for alias in config.aliases.values() {
836        let pair = (alias.id.clone(), alias.provider.clone());
837        if seen.insert(pair.clone()) {
838            candidates.push(pair);
839        }
840    }
841
842    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
843        provider_a
844            .cmp(provider_b)
845            .then_with(|| model_a.cmp(model_b))
846    });
847    candidates
848}
849
850/// Simple glob matching for patterns like "claude-*", "qwen/*", "ollama:*".
851fn glob_match(pattern: &str, input: &str) -> bool {
852    if let Some(prefix) = pattern.strip_suffix('*') {
853        input.starts_with(prefix)
854    } else if let Some(suffix) = pattern.strip_prefix('*') {
855        input.ends_with(suffix)
856    } else if pattern.contains('*') {
857        let parts: Vec<&str> = pattern.split('*').collect();
858        if parts.len() == 2 {
859            input.starts_with(parts[0]) && input.ends_with(parts[1])
860        } else {
861            input == pattern
862        }
863    } else {
864        input == pattern
865    }
866}
867
868fn dirs_or_home() -> Option<String> {
869    std::env::var("HOME").ok()
870}
871
872/// Resolve the effective base URL for a provider, checking the `base_url_env`
873/// override first, then falling back to the configured `base_url`.
874pub fn resolve_base_url(pdef: &ProviderDef) -> String {
875    if let Some(env_name) = &pdef.base_url_env {
876        if let Ok(val) = std::env::var(env_name) {
877            // Strip surrounding quotes that some .env parsers leave intact.
878            let trimmed = val.trim().trim_matches('"').trim_matches('\'');
879            if !trimmed.is_empty() {
880                return trimmed.to_string();
881            }
882        }
883    }
884    pdef.base_url.clone()
885}
886
887fn default_config() -> ProvidersConfig {
888    let mut config = ProvidersConfig {
889        default_provider: Some("anthropic".to_string()),
890        ..Default::default()
891    };
892
893    config.providers.insert(
894        "anthropic".to_string(),
895        ProviderDef {
896            base_url: "https://api.anthropic.com/v1".to_string(),
897            auth_style: "header".to_string(),
898            auth_header: Some("x-api-key".to_string()),
899            auth_env: AuthEnv::Single("ANTHROPIC_API_KEY".to_string()),
900            extra_headers: BTreeMap::from([(
901                "anthropic-version".to_string(),
902                "2023-06-01".to_string(),
903            )]),
904            chat_endpoint: "/messages".to_string(),
905            completion_endpoint: None,
906            healthcheck: Some(HealthcheckDef {
907                method: "POST".to_string(),
908                path: Some("/messages/count_tokens".to_string()),
909                url: None,
910                body: Some(
911                    r#"{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":"x"}]}"#
912                        .to_string(),
913                ),
914            }),
915            features: vec!["prompt_caching".to_string(), "thinking".to_string()],
916            cost_per_1k_in: Some(0.003),
917            cost_per_1k_out: Some(0.015),
918            latency_p50_ms: Some(2500),
919            ..Default::default()
920        },
921    );
922
923    // OpenAI
924    config.providers.insert(
925        "openai".to_string(),
926        ProviderDef {
927            base_url: "https://api.openai.com/v1".to_string(),
928            auth_style: "bearer".to_string(),
929            auth_env: AuthEnv::Single("OPENAI_API_KEY".to_string()),
930            chat_endpoint: "/chat/completions".to_string(),
931            completion_endpoint: Some("/completions".to_string()),
932            healthcheck: Some(HealthcheckDef {
933                method: "GET".to_string(),
934                path: Some("/models".to_string()),
935                url: None,
936                body: None,
937            }),
938            cost_per_1k_in: Some(0.0025),
939            cost_per_1k_out: Some(0.010),
940            latency_p50_ms: Some(1800),
941            ..Default::default()
942        },
943    );
944
945    // OpenRouter
946    config.providers.insert(
947        "openrouter".to_string(),
948        ProviderDef {
949            base_url: "https://openrouter.ai/api/v1".to_string(),
950            auth_style: "bearer".to_string(),
951            auth_env: AuthEnv::Single("OPENROUTER_API_KEY".to_string()),
952            chat_endpoint: "/chat/completions".to_string(),
953            completion_endpoint: Some("/completions".to_string()),
954            healthcheck: Some(HealthcheckDef {
955                method: "GET".to_string(),
956                path: Some("/auth/key".to_string()),
957                url: None,
958                body: None,
959            }),
960            cost_per_1k_in: Some(0.003),
961            cost_per_1k_out: Some(0.015),
962            latency_p50_ms: Some(2200),
963            ..Default::default()
964        },
965    );
966
967    // HuggingFace
968    config.providers.insert(
969        "huggingface".to_string(),
970        ProviderDef {
971            base_url: "https://router.huggingface.co/v1".to_string(),
972            auth_style: "bearer".to_string(),
973            auth_env: AuthEnv::Multiple(vec![
974                "HF_TOKEN".to_string(),
975                "HUGGINGFACE_API_KEY".to_string(),
976            ]),
977            chat_endpoint: "/chat/completions".to_string(),
978            completion_endpoint: Some("/completions".to_string()),
979            healthcheck: Some(HealthcheckDef {
980                method: "GET".to_string(),
981                url: Some("https://huggingface.co/api/whoami-v2".to_string()),
982                path: None,
983                body: None,
984            }),
985            cost_per_1k_in: Some(0.0002),
986            cost_per_1k_out: Some(0.0006),
987            latency_p50_ms: Some(2400),
988            ..Default::default()
989        },
990    );
991
992    // Ollama default. Hosts can override this to `/v1/chat/completions`
993    // via a bundled `providers.toml` (loaded by setting
994    // `HARN_PROVIDERS_CONFIG` in the host process). The OpenAI-compat
995    // path bypasses Ollama's per-model tool-call post-processors
996    // (qwen3coder.go, qwen35.go) which raise HTTP 500s on text-mode
997    // responses for the Qwen3.5 family. The default here stays on
998    // `/api/chat` so the harn-vm test stub keeps working with Ollama's
999    // native NDJSON wire format.
1000    config.providers.insert(
1001        "ollama".to_string(),
1002        ProviderDef {
1003            base_url: "http://localhost:11434".to_string(),
1004            base_url_env: Some("OLLAMA_HOST".to_string()),
1005            auth_style: "none".to_string(),
1006            chat_endpoint: "/api/chat".to_string(),
1007            completion_endpoint: Some("/api/generate".to_string()),
1008            healthcheck: Some(HealthcheckDef {
1009                method: "GET".to_string(),
1010                path: Some("/api/tags".to_string()),
1011                url: None,
1012                body: None,
1013            }),
1014            cost_per_1k_in: Some(0.0),
1015            cost_per_1k_out: Some(0.0),
1016            latency_p50_ms: Some(1200),
1017            ..Default::default()
1018        },
1019    );
1020
1021    // Google Gemini native API.
1022    config.providers.insert(
1023        "gemini".to_string(),
1024        ProviderDef {
1025            base_url: "https://generativelanguage.googleapis.com".to_string(),
1026            base_url_env: Some("GEMINI_BASE_URL".to_string()),
1027            auth_style: "header".to_string(),
1028            auth_header: Some("x-goog-api-key".to_string()),
1029            auth_env: AuthEnv::Multiple(vec![
1030                "GEMINI_API_KEY".to_string(),
1031                "GOOGLE_API_KEY".to_string(),
1032            ]),
1033            chat_endpoint: "/v1beta/models".to_string(),
1034            healthcheck: Some(HealthcheckDef {
1035                method: "GET".to_string(),
1036                path: Some("/v1beta/models".to_string()),
1037                url: None,
1038                body: None,
1039            }),
1040            cost_per_1k_in: Some(0.00125),
1041            cost_per_1k_out: Some(0.005),
1042            latency_p50_ms: Some(1800),
1043            ..Default::default()
1044        },
1045    );
1046
1047    // Together AI (OpenAI-compatible)
1048    config.providers.insert(
1049        "together".to_string(),
1050        ProviderDef {
1051            base_url: "https://api.together.xyz/v1".to_string(),
1052            base_url_env: Some("TOGETHER_AI_BASE_URL".to_string()),
1053            auth_style: "bearer".to_string(),
1054            auth_env: AuthEnv::Single("TOGETHER_AI_API_KEY".to_string()),
1055            chat_endpoint: "/chat/completions".to_string(),
1056            completion_endpoint: Some("/completions".to_string()),
1057            healthcheck: Some(HealthcheckDef {
1058                method: "GET".to_string(),
1059                path: Some("/models".to_string()),
1060                url: None,
1061                body: None,
1062            }),
1063            cost_per_1k_in: Some(0.0002),
1064            cost_per_1k_out: Some(0.0006),
1065            latency_p50_ms: Some(1600),
1066            ..Default::default()
1067        },
1068    );
1069
1070    // Groq (OpenAI-compatible)
1071    config.providers.insert(
1072        "groq".to_string(),
1073        ProviderDef {
1074            base_url: "https://api.groq.com/openai/v1".to_string(),
1075            base_url_env: Some("GROQ_BASE_URL".to_string()),
1076            auth_style: "bearer".to_string(),
1077            auth_env: AuthEnv::Single("GROQ_API_KEY".to_string()),
1078            chat_endpoint: "/chat/completions".to_string(),
1079            completion_endpoint: Some("/completions".to_string()),
1080            healthcheck: Some(HealthcheckDef {
1081                method: "GET".to_string(),
1082                path: Some("/models".to_string()),
1083                url: None,
1084                body: None,
1085            }),
1086            cost_per_1k_in: Some(0.0001),
1087            cost_per_1k_out: Some(0.0003),
1088            latency_p50_ms: Some(450),
1089            ..Default::default()
1090        },
1091    );
1092
1093    // DeepSeek (OpenAI-compatible)
1094    config.providers.insert(
1095        "deepseek".to_string(),
1096        ProviderDef {
1097            base_url: "https://api.deepseek.com/v1".to_string(),
1098            base_url_env: Some("DEEPSEEK_BASE_URL".to_string()),
1099            auth_style: "bearer".to_string(),
1100            auth_env: AuthEnv::Single("DEEPSEEK_API_KEY".to_string()),
1101            chat_endpoint: "/chat/completions".to_string(),
1102            completion_endpoint: Some("/completions".to_string()),
1103            healthcheck: Some(HealthcheckDef {
1104                method: "GET".to_string(),
1105                path: Some("/models".to_string()),
1106                url: None,
1107                body: None,
1108            }),
1109            cost_per_1k_in: Some(0.00014),
1110            cost_per_1k_out: Some(0.00028),
1111            latency_p50_ms: Some(1800),
1112            ..Default::default()
1113        },
1114    );
1115
1116    // Fireworks (OpenAI-compatible open-weight hosting)
1117    config.providers.insert(
1118        "fireworks".to_string(),
1119        ProviderDef {
1120            base_url: "https://api.fireworks.ai/inference/v1".to_string(),
1121            base_url_env: Some("FIREWORKS_BASE_URL".to_string()),
1122            auth_style: "bearer".to_string(),
1123            auth_env: AuthEnv::Single("FIREWORKS_API_KEY".to_string()),
1124            chat_endpoint: "/chat/completions".to_string(),
1125            completion_endpoint: Some("/completions".to_string()),
1126            healthcheck: Some(HealthcheckDef {
1127                method: "GET".to_string(),
1128                path: Some("/models".to_string()),
1129                url: None,
1130                body: None,
1131            }),
1132            cost_per_1k_in: Some(0.0002),
1133            cost_per_1k_out: Some(0.0006),
1134            latency_p50_ms: Some(1400),
1135            ..Default::default()
1136        },
1137    );
1138
1139    // Alibaba DashScope (OpenAI-compatible Qwen host)
1140    config.providers.insert(
1141        "dashscope".to_string(),
1142        ProviderDef {
1143            base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".to_string(),
1144            base_url_env: Some("DASHSCOPE_BASE_URL".to_string()),
1145            auth_style: "bearer".to_string(),
1146            auth_env: AuthEnv::Single("DASHSCOPE_API_KEY".to_string()),
1147            chat_endpoint: "/chat/completions".to_string(),
1148            completion_endpoint: Some("/completions".to_string()),
1149            healthcheck: Some(HealthcheckDef {
1150                method: "GET".to_string(),
1151                path: Some("/models".to_string()),
1152                url: None,
1153                body: None,
1154            }),
1155            cost_per_1k_in: Some(0.0003),
1156            cost_per_1k_out: Some(0.0012),
1157            latency_p50_ms: Some(1600),
1158            ..Default::default()
1159        },
1160    );
1161
1162    // AWS Bedrock Runtime. The provider shim resolves AWS credentials through
1163    // env vars, the selected/default profile, container credentials, or EC2
1164    // instance profile credentials, then signs Converse API calls with SigV4.
1165    config.providers.insert(
1166        "bedrock".to_string(),
1167        ProviderDef {
1168            base_url: String::new(),
1169            base_url_env: Some("BEDROCK_BASE_URL".to_string()),
1170            auth_style: "aws_sigv4".to_string(),
1171            auth_env: AuthEnv::None,
1172            chat_endpoint: "/model/{model}/converse".to_string(),
1173            features: vec!["native_tools".to_string()],
1174            latency_p50_ms: Some(2600),
1175            ..Default::default()
1176        },
1177    );
1178
1179    // Azure OpenAI. The deployment name is routed in the URL; callers can
1180    // use the Harn model field as the deployment name or set
1181    // AZURE_OPENAI_DEPLOYMENT.
1182    config.providers.insert(
1183        "azure_openai".to_string(),
1184        ProviderDef {
1185            base_url: "https://{resource}.openai.azure.com".to_string(),
1186            base_url_env: Some("AZURE_OPENAI_ENDPOINT".to_string()),
1187            auth_style: "azure_openai".to_string(),
1188            auth_env: AuthEnv::Multiple(vec![
1189                "AZURE_OPENAI_API_KEY".to_string(),
1190                "AZURE_OPENAI_AD_TOKEN".to_string(),
1191                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1192            ]),
1193            chat_endpoint:
1194                "/openai/deployments/{deployment}/chat/completions?api-version={api_version}"
1195                    .to_string(),
1196            features: vec!["native_tools".to_string()],
1197            cost_per_1k_in: Some(0.0025),
1198            cost_per_1k_out: Some(0.010),
1199            latency_p50_ms: Some(1900),
1200            ..Default::default()
1201        },
1202    );
1203
1204    // Google Vertex AI Gemini.
1205    config.providers.insert(
1206        "vertex".to_string(),
1207        ProviderDef {
1208            base_url: "https://aiplatform.googleapis.com/v1".to_string(),
1209            base_url_env: Some("VERTEX_AI_BASE_URL".to_string()),
1210            auth_style: "bearer".to_string(),
1211            auth_env: AuthEnv::Multiple(vec![
1212                "VERTEX_AI_ACCESS_TOKEN".to_string(),
1213                "GOOGLE_OAUTH_ACCESS_TOKEN".to_string(),
1214                "GOOGLE_APPLICATION_CREDENTIALS".to_string(),
1215            ]),
1216            chat_endpoint:
1217                "/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent"
1218                    .to_string(),
1219            features: vec!["native_tools".to_string()],
1220            cost_per_1k_in: Some(0.00125),
1221            cost_per_1k_out: Some(0.005),
1222            latency_p50_ms: Some(2100),
1223            ..Default::default()
1224        },
1225    );
1226
1227    // Local OpenAI-compatible server
1228    config.providers.insert(
1229        "local".to_string(),
1230        ProviderDef {
1231            base_url: "http://localhost:8000".to_string(),
1232            base_url_env: Some("LOCAL_LLM_BASE_URL".to_string()),
1233            auth_style: "none".to_string(),
1234            chat_endpoint: "/v1/chat/completions".to_string(),
1235            completion_endpoint: Some("/v1/completions".to_string()),
1236            healthcheck: Some(HealthcheckDef {
1237                method: "GET".to_string(),
1238                path: Some("/v1/models".to_string()),
1239                url: None,
1240                body: None,
1241            }),
1242            cost_per_1k_in: Some(0.0),
1243            cost_per_1k_out: Some(0.0),
1244            latency_p50_ms: Some(900),
1245            ..Default::default()
1246        },
1247    );
1248
1249    // Apple Silicon MLX OpenAI-compatible server. Harn owns readiness
1250    // probing; hosts that want script-based auto-start should launch the
1251    // process first, then call Harn again to verify readiness.
1252    config.providers.insert(
1253        "mlx".to_string(),
1254        ProviderDef {
1255            base_url: "http://127.0.0.1:8002".to_string(),
1256            base_url_env: Some("MLX_BASE_URL".to_string()),
1257            auth_style: "none".to_string(),
1258            chat_endpoint: "/v1/chat/completions".to_string(),
1259            completion_endpoint: Some("/v1/completions".to_string()),
1260            healthcheck: Some(HealthcheckDef {
1261                method: "GET".to_string(),
1262                path: Some("/v1/models".to_string()),
1263                url: None,
1264                body: None,
1265            }),
1266            cost_per_1k_in: Some(0.0),
1267            cost_per_1k_out: Some(0.0),
1268            latency_p50_ms: Some(900),
1269            ..Default::default()
1270        },
1271    );
1272
1273    // vLLM OpenAI-compatible server.
1274    config.providers.insert(
1275        "vllm".to_string(),
1276        ProviderDef {
1277            base_url: "http://localhost:8000".to_string(),
1278            base_url_env: Some("VLLM_BASE_URL".to_string()),
1279            auth_style: "none".to_string(),
1280            chat_endpoint: "/v1/chat/completions".to_string(),
1281            completion_endpoint: Some("/v1/completions".to_string()),
1282            healthcheck: Some(HealthcheckDef {
1283                method: "GET".to_string(),
1284                path: Some("/v1/models".to_string()),
1285                url: None,
1286                body: None,
1287            }),
1288            cost_per_1k_in: Some(0.0),
1289            cost_per_1k_out: Some(0.0),
1290            latency_p50_ms: Some(800),
1291            ..Default::default()
1292        },
1293    );
1294
1295    // HuggingFace Text Generation Inference OpenAI-compatible endpoint.
1296    config.providers.insert(
1297        "tgi".to_string(),
1298        ProviderDef {
1299            base_url: "http://localhost:8080".to_string(),
1300            base_url_env: Some("TGI_BASE_URL".to_string()),
1301            auth_style: "none".to_string(),
1302            chat_endpoint: "/v1/chat/completions".to_string(),
1303            completion_endpoint: Some("/v1/completions".to_string()),
1304            healthcheck: Some(HealthcheckDef {
1305                method: "GET".to_string(),
1306                path: Some("/health".to_string()),
1307                url: None,
1308                body: None,
1309            }),
1310            cost_per_1k_in: Some(0.0),
1311            cost_per_1k_out: Some(0.0),
1312            latency_p50_ms: Some(950),
1313            ..Default::default()
1314        },
1315    );
1316
1317    // Default inference rules
1318    config.inference_rules = vec![
1319        InferenceRule {
1320            pattern: Some("claude-*".to_string()),
1321            contains: None,
1322            exact: None,
1323            provider: "anthropic".to_string(),
1324        },
1325        InferenceRule {
1326            pattern: Some("gpt-*".to_string()),
1327            contains: None,
1328            exact: None,
1329            provider: "openai".to_string(),
1330        },
1331        InferenceRule {
1332            pattern: Some("o1*".to_string()),
1333            contains: None,
1334            exact: None,
1335            provider: "openai".to_string(),
1336        },
1337        InferenceRule {
1338            pattern: Some("o3*".to_string()),
1339            contains: None,
1340            exact: None,
1341            provider: "openai".to_string(),
1342        },
1343        InferenceRule {
1344            pattern: Some("o4*".to_string()),
1345            contains: None,
1346            exact: None,
1347            provider: "openai".to_string(),
1348        },
1349        InferenceRule {
1350            pattern: Some("anthropic.claude-*".to_string()),
1351            contains: None,
1352            exact: None,
1353            provider: "bedrock".to_string(),
1354        },
1355        InferenceRule {
1356            pattern: Some("meta.llama*".to_string()),
1357            contains: None,
1358            exact: None,
1359            provider: "bedrock".to_string(),
1360        },
1361        InferenceRule {
1362            pattern: Some("amazon.*".to_string()),
1363            contains: None,
1364            exact: None,
1365            provider: "bedrock".to_string(),
1366        },
1367        InferenceRule {
1368            pattern: Some("mistral.*".to_string()),
1369            contains: None,
1370            exact: None,
1371            provider: "bedrock".to_string(),
1372        },
1373        InferenceRule {
1374            pattern: Some("cohere.*".to_string()),
1375            contains: None,
1376            exact: None,
1377            provider: "bedrock".to_string(),
1378        },
1379        InferenceRule {
1380            pattern: Some("gemini-*".to_string()),
1381            contains: None,
1382            exact: None,
1383            provider: "gemini".to_string(),
1384        },
1385    ];
1386
1387    // Default tier rules
1388    config.tier_rules = vec![
1389        TierRule {
1390            contains: Some("9b".to_string()),
1391            pattern: None,
1392            exact: None,
1393            tier: "small".to_string(),
1394        },
1395        TierRule {
1396            contains: Some("a3b".to_string()),
1397            pattern: None,
1398            exact: None,
1399            tier: "small".to_string(),
1400        },
1401        TierRule {
1402            contains: Some("gemma-4-e2b".to_string()),
1403            pattern: None,
1404            exact: None,
1405            tier: "small".to_string(),
1406        },
1407        TierRule {
1408            contains: Some("gemma-4-e4b".to_string()),
1409            pattern: None,
1410            exact: None,
1411            tier: "small".to_string(),
1412        },
1413        TierRule {
1414            contains: Some("gemma-4-26b".to_string()),
1415            pattern: None,
1416            exact: None,
1417            tier: "mid".to_string(),
1418        },
1419        TierRule {
1420            contains: Some("gemma-4-31b".to_string()),
1421            pattern: None,
1422            exact: None,
1423            tier: "frontier".to_string(),
1424        },
1425        TierRule {
1426            contains: Some("gemma4:26b".to_string()),
1427            pattern: None,
1428            exact: None,
1429            tier: "mid".to_string(),
1430        },
1431        TierRule {
1432            contains: Some("gemma4:31b".to_string()),
1433            pattern: None,
1434            exact: None,
1435            tier: "frontier".to_string(),
1436        },
1437        TierRule {
1438            pattern: Some("claude-*".to_string()),
1439            contains: None,
1440            exact: None,
1441            tier: "frontier".to_string(),
1442        },
1443        TierRule {
1444            exact: Some("gpt-4o".to_string()),
1445            contains: None,
1446            pattern: None,
1447            tier: "frontier".to_string(),
1448        },
1449    ];
1450
1451    config.tier_defaults = TierDefaults {
1452        default: "mid".to_string(),
1453    };
1454
1455    config.aliases.insert(
1456        "frontier".to_string(),
1457        AliasDef {
1458            id: "claude-sonnet-4-20250514".to_string(),
1459            provider: "anthropic".to_string(),
1460            tool_format: None,
1461        },
1462    );
1463    config.aliases.insert(
1464        "tier/frontier".to_string(),
1465        AliasDef {
1466            id: "claude-sonnet-4-20250514".to_string(),
1467            provider: "anthropic".to_string(),
1468            tool_format: None,
1469        },
1470    );
1471    config.aliases.insert(
1472        "mid".to_string(),
1473        AliasDef {
1474            id: "gpt-4o-mini".to_string(),
1475            provider: "openai".to_string(),
1476            tool_format: None,
1477        },
1478    );
1479    config.aliases.insert(
1480        "tier/mid".to_string(),
1481        AliasDef {
1482            id: "gpt-4o-mini".to_string(),
1483            provider: "openai".to_string(),
1484            tool_format: None,
1485        },
1486    );
1487    config.aliases.insert(
1488        "small".to_string(),
1489        AliasDef {
1490            id: "Qwen/Qwen3.5-9B".to_string(),
1491            provider: "openrouter".to_string(),
1492            tool_format: None,
1493        },
1494    );
1495    config.aliases.insert(
1496        "tier/small".to_string(),
1497        AliasDef {
1498            id: "Qwen/Qwen3.5-9B".to_string(),
1499            provider: "openrouter".to_string(),
1500            tool_format: None,
1501        },
1502    );
1503    config.aliases.insert(
1504        "local-gemma4".to_string(),
1505        AliasDef {
1506            id: "gemma-4-26b-a4b-it".to_string(),
1507            provider: "local".to_string(),
1508            tool_format: None,
1509        },
1510    );
1511    config.aliases.insert(
1512        "local-gemma4-26b".to_string(),
1513        AliasDef {
1514            id: "gemma-4-26b-a4b-it".to_string(),
1515            provider: "local".to_string(),
1516            tool_format: None,
1517        },
1518    );
1519    config.aliases.insert(
1520        "local-gemma4-31b".to_string(),
1521        AliasDef {
1522            id: "gemma-4-31b-it".to_string(),
1523            provider: "local".to_string(),
1524            tool_format: None,
1525        },
1526    );
1527    config.aliases.insert(
1528        "local-gemma4-e4b".to_string(),
1529        AliasDef {
1530            id: "gemma-4-e4b-it".to_string(),
1531            provider: "local".to_string(),
1532            tool_format: None,
1533        },
1534    );
1535    config.aliases.insert(
1536        "local-gemma4-e2b".to_string(),
1537        AliasDef {
1538            id: "gemma-4-e2b-it".to_string(),
1539            provider: "local".to_string(),
1540            tool_format: None,
1541        },
1542    );
1543    config.aliases.insert(
1544        "mlx-qwen36-27b".to_string(),
1545        AliasDef {
1546            id: "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string(),
1547            provider: "mlx".to_string(),
1548            tool_format: None,
1549        },
1550    );
1551
1552    config.qc_defaults.extend(BTreeMap::from([
1553        (
1554            "anthropic".to_string(),
1555            "claude-3-5-haiku-20241022".to_string(),
1556        ),
1557        ("openai".to_string(), "gpt-4o-mini".to_string()),
1558        (
1559            "openrouter".to_string(),
1560            "google/gemini-2.5-flash".to_string(),
1561        ),
1562        ("ollama".to_string(), "llama3.2".to_string()),
1563        ("local".to_string(), "gpt-4o".to_string()),
1564    ]));
1565
1566    config.models.extend(BTreeMap::from([
1567        (
1568            "claude-sonnet-4-20250514".to_string(),
1569            ModelDef {
1570                name: "Claude Sonnet 4".to_string(),
1571                provider: "anthropic".to_string(),
1572                context_window: 200_000,
1573                stream_timeout: None,
1574                capabilities: vec![
1575                    "tools".to_string(),
1576                    "streaming".to_string(),
1577                    "prompt_caching".to_string(),
1578                    "thinking".to_string(),
1579                ],
1580                pricing: Some(ModelPricing {
1581                    input_per_mtok: 3.0,
1582                    output_per_mtok: 15.0,
1583                    cache_read_per_mtok: Some(0.3),
1584                    cache_write_per_mtok: Some(3.75),
1585                }),
1586            },
1587        ),
1588        (
1589            "gpt-4o-mini".to_string(),
1590            ModelDef {
1591                name: "GPT-4o Mini".to_string(),
1592                provider: "openai".to_string(),
1593                context_window: 128_000,
1594                stream_timeout: None,
1595                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1596                pricing: Some(ModelPricing {
1597                    input_per_mtok: 0.15,
1598                    output_per_mtok: 0.60,
1599                    cache_read_per_mtok: None,
1600                    cache_write_per_mtok: None,
1601                }),
1602            },
1603        ),
1604        (
1605            "Qwen/Qwen3.5-9B".to_string(),
1606            ModelDef {
1607                name: "Qwen3.5 9B".to_string(),
1608                provider: "openrouter".to_string(),
1609                context_window: 131_072,
1610                stream_timeout: None,
1611                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1612                pricing: None,
1613            },
1614        ),
1615        (
1616            "llama3.2".to_string(),
1617            ModelDef {
1618                name: "Llama 3.2".to_string(),
1619                provider: "ollama".to_string(),
1620                context_window: 32_000,
1621                stream_timeout: Some(300.0),
1622                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1623                pricing: None,
1624            },
1625        ),
1626    ]));
1627
1628    config
1629}
1630
1631#[cfg(test)]
1632fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
1633    let mut config = default_config();
1634    config.merge_from(&overlay);
1635    config
1636}
1637
1638#[cfg(test)]
1639mod tests {
1640    use super::*;
1641
1642    fn reset_overrides() {
1643        clear_user_overrides();
1644    }
1645
1646    #[test]
1647    fn test_glob_match_prefix() {
1648        assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
1649        assert!(glob_match("gpt-*", "gpt-4o"));
1650        assert!(!glob_match("claude-*", "gpt-4o"));
1651    }
1652
1653    #[test]
1654    fn test_glob_match_suffix() {
1655        assert!(glob_match("*-latest", "llama3.2-latest"));
1656        assert!(!glob_match("*-latest", "llama3.2"));
1657    }
1658
1659    #[test]
1660    fn test_glob_match_middle() {
1661        assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
1662        assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
1663    }
1664
1665    #[test]
1666    fn test_glob_match_exact() {
1667        assert!(glob_match("gpt-4o", "gpt-4o"));
1668        assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
1669    }
1670
1671    #[test]
1672    fn test_infer_provider_from_defaults() {
1673        let _guard = crate::llm::env_lock().lock().expect("env lock");
1674        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1675        unsafe {
1676            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1677        }
1678
1679        assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
1680        assert_eq!(infer_provider("gpt-4o"), "openai");
1681        assert_eq!(infer_provider("o1-preview"), "openai");
1682        assert_eq!(infer_provider("o3-mini"), "openai");
1683        assert_eq!(infer_provider("o4-mini"), "openai");
1684        assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
1685        assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
1686        assert_eq!(infer_provider("llama3.2:latest"), "ollama");
1687        assert_eq!(infer_provider("unknown-model"), "anthropic");
1688
1689        unsafe {
1690            match prev_default_provider {
1691                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1692                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1693            }
1694        }
1695    }
1696
1697    #[test]
1698    fn test_infer_provider_prefix_rules() {
1699        assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
1700        assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
1701        // Even when the id also contains `/`, the local transport prefix wins.
1702        assert_eq!(infer_provider("local:owner/model"), "ollama");
1703        assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
1704    }
1705
1706    #[test]
1707    fn test_openrouter_inference_requires_one_slash() {
1708        let _guard = crate::llm::env_lock().lock().expect("env lock");
1709        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1710        unsafe {
1711            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1712        }
1713
1714        assert_eq!(infer_provider("org/model"), "openrouter");
1715        assert_eq!(infer_provider("org/team/model"), "anthropic");
1716
1717        unsafe {
1718            match prev_default_provider {
1719                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1720                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1721            }
1722        }
1723    }
1724
1725    #[test]
1726    fn test_resolve_model_info_normalizes_provider_prefixes() {
1727        let local = resolve_model_info("local:gemma-4-e4b-it");
1728        assert_eq!(local.id, "gemma-4-e4b-it");
1729        assert_eq!(local.provider, "ollama");
1730
1731        let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
1732        assert_eq!(ollama.id, "qwen3:30b-a3b");
1733        assert_eq!(ollama.provider, "ollama");
1734
1735        let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
1736        assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
1737        assert_eq!(hf.provider, "huggingface");
1738    }
1739
1740    #[test]
1741    fn test_model_tier_from_defaults() {
1742        assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
1743        assert_eq!(model_tier("gpt-4o"), "frontier");
1744        assert_eq!(model_tier("Qwen3.5-9B"), "small");
1745        assert_eq!(model_tier("deepseek-v3"), "mid");
1746    }
1747
1748    #[test]
1749    fn test_resolve_model_unknown_alias() {
1750        let (id, provider) = resolve_model("gpt-4o");
1751        assert_eq!(id, "gpt-4o");
1752        assert!(provider.is_none());
1753    }
1754
1755    #[test]
1756    fn test_provider_names() {
1757        let names = provider_names();
1758        assert!(names.len() >= 7);
1759        assert!(names.contains(&"anthropic".to_string()));
1760        assert!(names.contains(&"together".to_string()));
1761        assert!(names.contains(&"local".to_string()));
1762        assert!(names.contains(&"mlx".to_string()));
1763        assert!(names.contains(&"openai".to_string()));
1764        assert!(names.contains(&"ollama".to_string()));
1765        assert!(names.contains(&"bedrock".to_string()));
1766        assert!(names.contains(&"azure_openai".to_string()));
1767        assert!(names.contains(&"vertex".to_string()));
1768    }
1769
1770    #[test]
1771    fn global_provider_file_is_an_overlay_on_builtin_defaults() {
1772        let mut overlay = ProvidersConfig {
1773            default_provider: Some("ollama".to_string()),
1774            ..Default::default()
1775        };
1776        overlay.aliases.insert(
1777            "quickstart".to_string(),
1778            AliasDef {
1779                id: "llama3.2".to_string(),
1780                provider: "ollama".to_string(),
1781                tool_format: None,
1782            },
1783        );
1784
1785        let merged = merge_global_config(overlay);
1786
1787        assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
1788        assert!(merged.providers.contains_key("anthropic"));
1789        assert!(merged.providers.contains_key("ollama"));
1790        assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
1791    }
1792
1793    #[test]
1794    fn test_resolve_tier_model_default_aliases() {
1795        let (model, provider) = resolve_tier_model("frontier", None).unwrap();
1796        assert_eq!(model, "claude-sonnet-4-20250514");
1797        assert_eq!(provider, "anthropic");
1798
1799        let (model, provider) = resolve_tier_model("small", None).unwrap();
1800        assert_eq!(model, "Qwen/Qwen3.5-9B");
1801        assert_eq!(provider, "openrouter");
1802    }
1803
1804    #[test]
1805    fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
1806        let (model, provider) = resolve_tier_model("mid", Some("openai")).unwrap();
1807        assert_eq!(model, "gpt-4o-mini");
1808        assert_eq!(provider, "openai");
1809    }
1810
1811    #[test]
1812    fn test_provider_config_anthropic() {
1813        let pdef = provider_config("anthropic").unwrap();
1814        assert_eq!(pdef.auth_style, "header");
1815        assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
1816    }
1817
1818    #[test]
1819    fn test_provider_config_mlx() {
1820        let pdef = provider_config("mlx").unwrap();
1821        assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
1822        assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
1823        assert_eq!(
1824            pdef.healthcheck.unwrap().path.as_deref(),
1825            Some("/v1/models")
1826        );
1827
1828        let (model, provider) = resolve_model("mlx-qwen36-27b");
1829        assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
1830        assert_eq!(provider.as_deref(), Some("mlx"));
1831    }
1832
1833    #[test]
1834    fn test_enterprise_provider_defaults_and_inference() {
1835        let bedrock = provider_config("bedrock").unwrap();
1836        assert_eq!(bedrock.auth_style, "aws_sigv4");
1837        assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
1838        assert_eq!(
1839            infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
1840            "bedrock"
1841        );
1842        assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
1843
1844        let azure = provider_config("azure_openai").unwrap();
1845        assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
1846        assert_eq!(
1847            auth_env_names(&azure.auth_env),
1848            vec![
1849                "AZURE_OPENAI_API_KEY".to_string(),
1850                "AZURE_OPENAI_AD_TOKEN".to_string(),
1851                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1852            ]
1853        );
1854
1855        let vertex = provider_config("vertex").unwrap();
1856        assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
1857        assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
1858    }
1859
1860    #[test]
1861    fn test_default_provider_env_override_for_unknown_model() {
1862        let _guard = crate::llm::env_lock().lock().expect("env lock");
1863        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1864        unsafe {
1865            std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
1866        }
1867
1868        let inference = infer_provider_detail("unknown-model");
1869
1870        unsafe {
1871            match prev_default_provider {
1872                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1873                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1874            }
1875        }
1876
1877        assert_eq!(inference.provider, "openai");
1878        assert_eq!(
1879            inference.source,
1880            crate::llm::provider::ProviderInferenceSource::DefaultFallback
1881        );
1882    }
1883
1884    #[test]
1885    fn test_resolve_base_url_no_env() {
1886        let pdef = ProviderDef {
1887            base_url: "https://example.com".to_string(),
1888            ..Default::default()
1889        };
1890        assert_eq!(resolve_base_url(&pdef), "https://example.com");
1891    }
1892
1893    #[test]
1894    fn test_default_config_roundtrip() {
1895        let config = default_config();
1896        assert!(!config.providers.is_empty());
1897        assert!(!config.inference_rules.is_empty());
1898        assert!(!config.tier_rules.is_empty());
1899        assert_eq!(config.tier_defaults.default, "mid");
1900    }
1901
1902    #[test]
1903    fn test_external_config_overlays_default_catalog() {
1904        let mut config = default_config();
1905        let mut overlay = ProvidersConfig {
1906            default_provider: Some("ollama".to_string()),
1907            ..Default::default()
1908        };
1909        overlay.providers.insert(
1910            "custom".to_string(),
1911            ProviderDef {
1912                base_url: "https://llm.example.test/v1".to_string(),
1913                chat_endpoint: "/chat/completions".to_string(),
1914                ..Default::default()
1915            },
1916        );
1917
1918        config.merge_from(&overlay);
1919
1920        assert_eq!(config.default_provider.as_deref(), Some("ollama"));
1921        assert!(config.providers.contains_key("custom"));
1922        assert!(config.providers.contains_key("anthropic"));
1923        assert!(config.providers.contains_key("ollama"));
1924    }
1925
1926    #[test]
1927    fn test_model_params_empty() {
1928        let params = model_params("claude-sonnet-4-20250514");
1929        assert!(params.is_empty());
1930    }
1931
1932    #[test]
1933    fn test_user_overrides_add_provider_and_alias() {
1934        reset_overrides();
1935        let mut overlay = ProvidersConfig::default();
1936        overlay.providers.insert(
1937            "acme".to_string(),
1938            ProviderDef {
1939                base_url: "https://llm.acme.test/v1".to_string(),
1940                chat_endpoint: "/chat/completions".to_string(),
1941                ..Default::default()
1942            },
1943        );
1944        overlay.aliases.insert(
1945            "acme-fast".to_string(),
1946            AliasDef {
1947                id: "acme/model-fast".to_string(),
1948                provider: "acme".to_string(),
1949                tool_format: Some("native".to_string()),
1950            },
1951        );
1952        set_user_overrides(Some(overlay));
1953
1954        let (model, provider) = resolve_model("acme-fast");
1955        assert_eq!(model, "acme/model-fast");
1956        assert_eq!(provider.as_deref(), Some("acme"));
1957        assert!(provider_names().contains(&"acme".to_string()));
1958        assert_eq!(
1959            provider_config("acme").map(|provider| provider.base_url),
1960            Some("https://llm.acme.test/v1".to_string())
1961        );
1962
1963        reset_overrides();
1964    }
1965
1966    #[test]
1967    fn test_default_tool_format_uses_capability_matrix() {
1968        reset_overrides();
1969
1970        assert_eq!(
1971            default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
1972            "native"
1973        );
1974        assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "text");
1975    }
1976
1977    #[test]
1978    fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
1979        reset_overrides();
1980        let mut overlay = ProvidersConfig::default();
1981        overlay.models.insert(
1982            "acme/model-fast".to_string(),
1983            ModelDef {
1984                name: "Acme Fast".to_string(),
1985                provider: "acme".to_string(),
1986                context_window: 65_536,
1987                stream_timeout: Some(42.0),
1988                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1989                pricing: Some(ModelPricing {
1990                    input_per_mtok: 1.25,
1991                    output_per_mtok: 2.5,
1992                    cache_read_per_mtok: Some(0.25),
1993                    cache_write_per_mtok: None,
1994                }),
1995            },
1996        );
1997        overlay
1998            .qc_defaults
1999            .insert("acme".to_string(), "acme/model-cheap".to_string());
2000        set_user_overrides(Some(overlay));
2001
2002        let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
2003        assert_eq!(entry.context_window, 65_536);
2004        assert_eq!(entry.capabilities, vec!["streaming".to_string()]);
2005        assert_eq!(
2006            entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
2007            Some(1.25)
2008        );
2009        assert_eq!(
2010            pricing_per_1k_for("acme", "acme/model-fast"),
2011            Some((0.00125, 0.0025))
2012        );
2013        assert_eq!(
2014            qc_default_model("acme").as_deref(),
2015            Some("acme/model-cheap")
2016        );
2017
2018        reset_overrides();
2019    }
2020
2021    #[test]
2022    fn test_user_overrides_prepend_inference_rules() {
2023        reset_overrides();
2024        let mut overlay = ProvidersConfig::default();
2025        overlay.inference_rules.push(InferenceRule {
2026            pattern: Some("internal-*".to_string()),
2027            contains: None,
2028            exact: None,
2029            provider: "openai".to_string(),
2030        });
2031        set_user_overrides(Some(overlay));
2032
2033        assert_eq!(infer_provider("internal-foo"), "openai");
2034
2035        reset_overrides();
2036    }
2037}
harn_vm/llm_config.rs

harn_vm/
llm_config.rs