harn_vm/
llm_config.rs

1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::BTreeMap;
4use std::sync::OnceLock;
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8
9thread_local! {
10    /// Thread-local provider config overlays installed by the CLI after it
11    /// reads the nearest `harn.toml` plus any installed package manifests.
12    /// Kept thread-local so tests and multi-VM hosts can scope extensions to
13    /// the current run without mutating the process-wide default config.
14    static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
15}
16
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct ProvidersConfig {
19    #[serde(default)]
20    pub default_provider: Option<String>,
21    #[serde(default)]
22    pub providers: BTreeMap<String, ProviderDef>,
23    #[serde(default)]
24    pub aliases: BTreeMap<String, AliasDef>,
25    #[serde(default)]
26    pub models: BTreeMap<String, ModelDef>,
27    #[serde(default)]
28    pub qc_defaults: BTreeMap<String, String>,
29    #[serde(default)]
30    pub inference_rules: Vec<InferenceRule>,
31    #[serde(default)]
32    pub tier_rules: Vec<TierRule>,
33    #[serde(default)]
34    pub tier_defaults: TierDefaults,
35    #[serde(default)]
36    pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
37}
38
39impl ProvidersConfig {
40    pub fn is_empty(&self) -> bool {
41        self.default_provider.is_none()
42            && self.providers.is_empty()
43            && self.aliases.is_empty()
44            && self.models.is_empty()
45            && self.qc_defaults.is_empty()
46            && self.inference_rules.is_empty()
47            && self.tier_rules.is_empty()
48            && self.model_defaults.is_empty()
49            && self.tier_defaults.default == default_mid()
50    }
51
52    pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
53        self.providers.extend(overlay.providers.clone());
54        self.aliases.extend(overlay.aliases.clone());
55        self.models.extend(overlay.models.clone());
56        self.qc_defaults.extend(overlay.qc_defaults.clone());
57
58        if overlay.default_provider.is_some() {
59            self.default_provider = overlay.default_provider.clone();
60        }
61
62        if !overlay.inference_rules.is_empty() {
63            let mut merged = overlay.inference_rules.clone();
64            merged.extend(self.inference_rules.clone());
65            self.inference_rules = merged;
66        }
67
68        if !overlay.tier_rules.is_empty() {
69            let mut merged = overlay.tier_rules.clone();
70            merged.extend(self.tier_rules.clone());
71            self.tier_rules = merged;
72        }
73
74        if overlay.tier_defaults.default != default_mid() {
75            self.tier_defaults = overlay.tier_defaults.clone();
76        }
77
78        for (pattern, defaults) in &overlay.model_defaults {
79            self.model_defaults
80                .entry(pattern.clone())
81                .or_default()
82                .extend(defaults.clone());
83        }
84    }
85}
86
87#[derive(Debug, Clone, Deserialize)]
88pub struct ProviderDef {
89    #[serde(default)]
90    pub display_name: Option<String>,
91    #[serde(default)]
92    pub icon: Option<String>,
93    pub base_url: String,
94    #[serde(default)]
95    pub base_url_env: Option<String>,
96    #[serde(default = "default_bearer")]
97    pub auth_style: String,
98    #[serde(default)]
99    pub auth_header: Option<String>,
100    #[serde(default)]
101    pub auth_env: AuthEnv,
102    #[serde(default)]
103    pub extra_headers: BTreeMap<String, String>,
104    #[serde(default)]
105    pub chat_endpoint: String,
106    #[serde(default)]
107    pub completion_endpoint: Option<String>,
108    #[serde(default)]
109    pub healthcheck: Option<HealthcheckDef>,
110    #[serde(default)]
111    pub features: Vec<String>,
112    /// Fallback provider name to try if this provider fails.
113    #[serde(default)]
114    pub fallback: Option<String>,
115    /// Number of retries before falling back (default 0).
116    #[serde(default)]
117    pub retry_count: Option<u32>,
118    /// Delay between retries in milliseconds (default 1000).
119    #[serde(default)]
120    pub retry_delay_ms: Option<u64>,
121    /// Maximum requests per minute. None = unlimited.
122    #[serde(default)]
123    pub rpm: Option<u32>,
124    /// Provider/catalog pricing in USD per 1k input tokens.
125    #[serde(default)]
126    pub cost_per_1k_in: Option<f64>,
127    /// Provider/catalog pricing in USD per 1k output tokens.
128    #[serde(default)]
129    pub cost_per_1k_out: Option<f64>,
130    /// Observed or configured p50 latency in milliseconds.
131    #[serde(default)]
132    pub latency_p50_ms: Option<u64>,
133}
134
135impl Default for ProviderDef {
136    fn default() -> Self {
137        Self {
138            display_name: None,
139            icon: None,
140            base_url: String::new(),
141            base_url_env: None,
142            auth_style: default_bearer(),
143            auth_header: None,
144            auth_env: AuthEnv::None,
145            extra_headers: BTreeMap::new(),
146            chat_endpoint: String::new(),
147            completion_endpoint: None,
148            healthcheck: None,
149            features: Vec::new(),
150            fallback: None,
151            retry_count: None,
152            retry_delay_ms: None,
153            rpm: None,
154            cost_per_1k_in: None,
155            cost_per_1k_out: None,
156            latency_p50_ms: None,
157        }
158    }
159}
160
161fn default_bearer() -> String {
162    "bearer".to_string()
163}
164
165/// Auth env var name(s) for the provider. Can be a single string or an array
166/// (tried in order until one is set).
167#[derive(Debug, Clone, Deserialize, Default)]
168#[serde(untagged)]
169pub enum AuthEnv {
170    #[default]
171    None,
172    Single(String),
173    Multiple(Vec<String>),
174}
175
176#[derive(Debug, Clone, Deserialize)]
177pub struct HealthcheckDef {
178    pub method: String,
179    #[serde(default)]
180    pub path: Option<String>,
181    #[serde(default)]
182    pub url: Option<String>,
183    #[serde(default)]
184    pub body: Option<String>,
185}
186
187#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
188pub struct AliasDef {
189    pub id: String,
190    pub provider: String,
191    /// Per-model tool format override: "native" or "text". When set, this
192    /// takes precedence over the provider-level default. Models with strong
193    /// tool-calling fine-tuning (Kimi-K2.5, GPT-4o) should use "native";
194    /// models better served by text-based tool calling use "text".
195    #[serde(default)]
196    pub tool_format: Option<String>,
197}
198
199#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
200pub struct ModelPricing {
201    pub input_per_mtok: f64,
202    pub output_per_mtok: f64,
203    #[serde(default)]
204    pub cache_read_per_mtok: Option<f64>,
205    #[serde(default)]
206    pub cache_write_per_mtok: Option<f64>,
207}
208
209#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
210pub struct ModelDef {
211    pub name: String,
212    pub provider: String,
213    pub context_window: u64,
214    #[serde(default)]
215    pub stream_timeout: Option<f64>,
216    #[serde(default)]
217    pub capabilities: Vec<String>,
218    #[serde(default)]
219    pub pricing: Option<ModelPricing>,
220}
221
222#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
223pub struct ResolvedModel {
224    pub id: String,
225    pub provider: String,
226    pub alias: Option<String>,
227    pub tool_format: String,
228    pub tier: String,
229}
230
231#[derive(Debug, Clone, Deserialize)]
232pub struct InferenceRule {
233    #[serde(default)]
234    pub pattern: Option<String>,
235    #[serde(default)]
236    pub contains: Option<String>,
237    #[serde(default)]
238    pub exact: Option<String>,
239    pub provider: String,
240}
241
242#[derive(Debug, Clone, Deserialize)]
243pub struct TierRule {
244    #[serde(default)]
245    pub pattern: Option<String>,
246    #[serde(default)]
247    pub contains: Option<String>,
248    #[serde(default)]
249    pub exact: Option<String>,
250    pub tier: String,
251}
252
253#[derive(Debug, Clone, Deserialize)]
254pub struct TierDefaults {
255    #[serde(default = "default_mid")]
256    pub default: String,
257}
258
259impl Default for TierDefaults {
260    fn default() -> Self {
261        Self {
262            default: default_mid(),
263        }
264    }
265}
266
267fn default_mid() -> String {
268    "mid".to_string()
269}
270
271/// Load and cache the providers config. Called once at VM startup.
272pub fn load_config() -> &'static ProvidersConfig {
273    CONFIG.get_or_init(|| {
274        let verbose_config_logging = matches!(
275            std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
276            Some("1" | "true" | "TRUE" | "yes" | "YES")
277        ) || matches!(
278            std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
279            Some("1" | "true" | "TRUE" | "yes" | "YES")
280        );
281        if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
282            match std::fs::read_to_string(&path) {
283                Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
284                    Ok(config) => {
285                        if verbose_config_logging {
286                            eprintln!(
287                                "[llm_config] Loaded {} providers, {} aliases from {}",
288                                config.providers.len(),
289                                config.aliases.len(),
290                                path
291                            );
292                        }
293                        let _ = CONFIG_PATH.set(path);
294                        return config;
295                    }
296                    Err(e) => eprintln!("[llm_config] TOML parse error in {}: {}", path, e),
297                },
298                Err(e) => eprintln!("[llm_config] Cannot read {}: {}", path, e),
299            }
300        }
301        if let Some(home) = dirs_or_home() {
302            let path = format!("{home}/.config/harn/providers.toml");
303            if let Ok(content) = std::fs::read_to_string(&path) {
304                if let Ok(config) = toml::from_str::<ProvidersConfig>(&content) {
305                    let _ = CONFIG_PATH.set(path);
306                    return config;
307                }
308            }
309        }
310        default_config()
311    })
312}
313
314/// Returns the filesystem path of the currently-loaded providers config, if
315/// any. Returns `None` when built-in defaults are active.
316pub fn loaded_config_path() -> Option<std::path::PathBuf> {
317    // Force lazy init so CONFIG_PATH is populated if a file was loaded.
318    let _ = load_config();
319    CONFIG_PATH.get().map(std::path::PathBuf::from)
320}
321
322/// Install per-run provider config overlays. The overlay uses the same shape as
323/// `providers.toml`, but lives under `[llm]` in `harn.toml` and package
324/// manifests. Passing `None` clears the overlay.
325pub fn set_user_overrides(config: Option<ProvidersConfig>) {
326    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
327}
328
329/// Clear per-run provider config overlays.
330pub fn clear_user_overrides() {
331    set_user_overrides(None);
332}
333
334fn effective_config() -> ProvidersConfig {
335    let mut merged = load_config().clone();
336    USER_OVERRIDES.with(|cell| {
337        if let Some(overlay) = cell.borrow().as_ref() {
338            merged.merge_from(overlay);
339        }
340    });
341    merged
342}
343
344/// Resolve a model alias to (model_id, provider_name).
345pub fn resolve_model(alias: &str) -> (String, Option<String>) {
346    let config = effective_config();
347    if let Some(a) = config.aliases.get(alias) {
348        return (a.id.clone(), Some(a.provider.clone()));
349    }
350    (normalize_model_id(alias), None)
351}
352
353/// Strip host/provider selector prefixes that identify transport, not the
354/// provider-native model id. This mirrors Burin's existing normalization so
355/// `ollama:qwen3:30b` reaches Ollama as `qwen3:30b` instead of an invalid
356/// model named `ollama`.
357pub fn normalize_model_id(raw: &str) -> String {
358    for prefix in ["ollama:", "local:", "huggingface:", "hf:"] {
359        if let Some(stripped) = raw.strip_prefix(prefix) {
360            return stripped.to_string();
361        }
362    }
363    raw.to_string()
364}
365
366/// Resolve an alias or selector into the complete catalog identity hosts need:
367/// provider inference, prefix-normalized model id, default tool format, and tier.
368pub fn resolve_model_info(selector: &str) -> ResolvedModel {
369    let config = effective_config();
370    if let Some(alias) = config.aliases.get(selector) {
371        let id = alias.id.clone();
372        let provider = alias.provider.clone();
373        let tool_format = alias
374            .tool_format
375            .clone()
376            .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
377        return ResolvedModel {
378            tier: model_tier_with_config(&config, &id),
379            id,
380            provider,
381            alias: Some(selector.to_string()),
382            tool_format,
383        };
384    }
385
386    let provider = infer_provider_with_config(&config, selector).provider;
387    let id = normalize_model_id(selector);
388    let tool_format = default_tool_format_with_config(&config, &id, &provider);
389    let tier = model_tier_with_config(&config, &id);
390    ResolvedModel {
391        id,
392        provider,
393        alias: None,
394        tool_format,
395        tier,
396    }
397}
398
399/// Infer provider from a model ID using inference rules.
400pub fn infer_provider(model_id: &str) -> String {
401    infer_provider_detail(model_id).provider
402}
403
404/// Infer provider from a model ID and retain whether the configured default was used.
405pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
406    let config = effective_config();
407    infer_provider_with_config(&config, model_id)
408}
409
410fn infer_provider_with_config(
411    config: &ProvidersConfig,
412    model_id: &str,
413) -> crate::llm::provider::ProviderInference {
414    if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
415        return crate::llm::provider::ProviderInference::builtin("ollama");
416    }
417    if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
418        return crate::llm::provider::ProviderInference::builtin("huggingface");
419    }
420    for rule in &config.inference_rules {
421        if let Some(exact) = &rule.exact {
422            if model_id == exact {
423                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
424            }
425        }
426        if let Some(pattern) = &rule.pattern {
427            if glob_match(pattern, model_id) {
428                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
429            }
430        }
431        if let Some(substr) = &rule.contains {
432            if model_id.contains(substr.as_str()) {
433                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
434            }
435        }
436    }
437    crate::llm::provider::infer_provider_from_model_id(
438        model_id,
439        &default_provider_with_config(config),
440    )
441}
442
443pub fn default_provider() -> String {
444    let config = effective_config();
445    default_provider_with_config(&config)
446}
447
448fn default_provider_with_config(config: &ProvidersConfig) -> String {
449    std::env::var("HARN_DEFAULT_PROVIDER")
450        .ok()
451        .map(|value| value.trim().to_string())
452        .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
453        .or_else(|| {
454            config
455                .default_provider
456                .as_deref()
457                .map(str::trim)
458                .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
459                .map(str::to_string)
460        })
461        .unwrap_or_else(|| "anthropic".to_string())
462}
463
464/// Get model tier ("small", "mid", "frontier").
465pub fn model_tier(model_id: &str) -> String {
466    let config = effective_config();
467    model_tier_with_config(&config, model_id)
468}
469
470fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
471    for rule in &config.tier_rules {
472        if let Some(exact) = &rule.exact {
473            if model_id == exact {
474                return rule.tier.clone();
475            }
476        }
477        if let Some(pattern) = &rule.pattern {
478            if glob_match(pattern, model_id) {
479                return rule.tier.clone();
480            }
481        }
482        if let Some(substr) = &rule.contains {
483            if model_id.contains(substr.as_str()) {
484                return rule.tier.clone();
485            }
486        }
487    }
488    let lower = model_id.to_lowercase();
489    if lower.contains("9b") || lower.contains("a3b") {
490        return "small".to_string();
491    }
492    if lower.starts_with("claude-") || lower == "gpt-4o" {
493        return "frontier".to_string();
494    }
495    config.tier_defaults.default.clone()
496}
497
498/// Get provider config for resolving base_url, auth, etc.
499pub fn provider_config(name: &str) -> Option<ProviderDef> {
500    effective_config().providers.get(name).cloned()
501}
502
503/// Get model-specific default parameters (temperature, etc.).
504/// Matches glob patterns in model_defaults keys.
505pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
506    let config = effective_config();
507    let mut params = BTreeMap::new();
508    for (pattern, defaults) in &config.model_defaults {
509        if glob_match(pattern, model_id) {
510            for (k, v) in defaults {
511                params.insert(k.clone(), v.clone());
512            }
513        }
514    }
515    params
516}
517
518/// Get list of configured provider names.
519pub fn provider_names() -> Vec<String> {
520    effective_config().providers.keys().cloned().collect()
521}
522
523/// Return every configured alias name, sorted deterministically.
524pub fn known_model_names() -> Vec<String> {
525    effective_config().aliases.keys().cloned().collect()
526}
527
528pub fn alias_entries() -> Vec<(String, AliasDef)> {
529    effective_config().aliases.into_iter().collect()
530}
531
532/// Return every configured model-catalog entry, sorted by provider then id.
533pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
534    let mut entries: Vec<_> = effective_config()
535        .models
536        .into_iter()
537        .map(|(id, model)| {
538            let provider = model.provider.clone();
539            (
540                id.clone(),
541                with_effective_capability_tags(id, provider, model),
542            )
543        })
544        .collect();
545    entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
546        model_a
547            .provider
548            .cmp(&model_b.provider)
549            .then_with(|| id_a.cmp(id_b))
550    });
551    entries
552}
553
554pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
555    effective_config()
556        .models
557        .get(model_id)
558        .cloned()
559        .map(|model| {
560            let provider = model.provider.clone();
561            with_effective_capability_tags(model_id.to_string(), provider, model)
562        })
563}
564
565pub fn qc_default_model(provider: &str) -> Option<String> {
566    std::env::var("BURIN_QC_MODEL")
567        .ok()
568        .filter(|value| !value.trim().is_empty())
569        .or_else(|| {
570            effective_config()
571                .qc_defaults
572                .get(&provider.to_lowercase())
573                .cloned()
574        })
575}
576
577pub fn default_model_for_provider(provider: &str) -> String {
578    match provider {
579        "local" => std::env::var("LOCAL_LLM_MODEL")
580            .or_else(|_| std::env::var("HARN_LLM_MODEL"))
581            .unwrap_or_else(|_| "gpt-4o".to_string()),
582        "mlx" => std::env::var("MLX_MODEL_ID")
583            .unwrap_or_else(|_| "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string()),
584        "openai" => "gpt-4o".to_string(),
585        "ollama" => "llama3.2".to_string(),
586        "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
587        _ => "claude-sonnet-4-20250514".to_string(),
588    }
589}
590
591pub fn qc_defaults() -> BTreeMap<String, String> {
592    effective_config().qc_defaults
593}
594
595pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
596    effective_config()
597        .models
598        .get(model_id)
599        .and_then(|model| model.pricing.clone())
600}
601
602pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
603    model_pricing_per_mtok(model_id)
604        .map(|pricing| {
605            (
606                pricing.input_per_mtok / 1000.0,
607                pricing.output_per_mtok / 1000.0,
608            )
609        })
610        .or_else(|| {
611            let (input, output, _) = provider_economics(provider);
612            match (input, output) {
613                (Some(input), Some(output)) => Some((input, output)),
614                _ => None,
615            }
616        })
617}
618
619pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
620    match auth_env {
621        AuthEnv::None => Vec::new(),
622        AuthEnv::Single(name) => vec![name.clone()],
623        AuthEnv::Multiple(names) => names.clone(),
624    }
625}
626
627pub fn provider_key_available(provider: &str) -> bool {
628    let Some(pdef) = provider_config(provider) else {
629        return provider == "ollama";
630    };
631    if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
632        return true;
633    }
634    auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
635        std::env::var(env_name)
636            .ok()
637            .is_some_and(|value| !value.trim().is_empty())
638    })
639}
640
641pub fn available_provider_names() -> Vec<String> {
642    provider_names()
643        .into_iter()
644        .filter(|provider| provider_key_available(provider))
645        .collect()
646}
647
648/// Check if a provider advertises a legacy provider-level feature.
649pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
650    provider_config(provider)
651        .map(|p| p.features.iter().any(|f| f == feature))
652        .unwrap_or(false)
653}
654
655/// Provider-level catalog pricing/latency. Model-specific static pricing in
656/// `llm::cost` still wins when available; this is the adapter-level fallback
657/// used by routing and portal summaries.
658pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
659    provider_config(provider)
660        .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
661        .unwrap_or((None, None, None))
662}
663
664/// Resolve the default tool format for a model+provider combination.
665/// Priority: alias `tool_format` (matched by model ID) > provider/model
666/// capability matrix > legacy provider feature > "text".
667pub fn default_tool_format(model: &str, provider: &str) -> String {
668    let config = effective_config();
669    default_tool_format_with_config(&config, model, provider)
670}
671
672fn default_tool_format_with_config(
673    config: &ProvidersConfig,
674    model: &str,
675    provider: &str,
676) -> String {
677    // Aliases match by model ID + provider, or by alias name.
678    for (name, alias) in &config.aliases {
679        let matches = (alias.id == model && alias.provider == provider) || name == model;
680        if matches {
681            if let Some(ref fmt) = alias.tool_format {
682                return fmt.clone();
683            }
684        }
685    }
686    let capability_matrix_native = crate::llm::capabilities::lookup(provider, model).native_tools;
687    let legacy_provider_native = config
688        .providers
689        .get(provider)
690        .map(|p| p.features.iter().any(|f| f == "native_tools"))
691        .unwrap_or(false);
692    if capability_matrix_native || legacy_provider_native {
693        "native".to_string()
694    } else {
695        "text".to_string()
696    }
697}
698
699fn with_effective_capability_tags(
700    model_id: String,
701    provider: String,
702    mut model: ModelDef,
703) -> ModelDef {
704    model.capabilities = effective_model_capability_tags(&provider, &model_id);
705    model
706}
707
708/// Legacy display tags derived from the canonical provider/model capability
709/// matrix. The matrix is the source of truth; `models.*.capabilities` in
710/// providers.toml is accepted only for backwards-compatible parsing.
711pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
712    let caps = crate::llm::capabilities::lookup(provider, model_id);
713    let mut tags = Vec::new();
714    // Today all Harn chat providers expose streaming. Keep this as a
715    // transport baseline rather than a duplicated per-model declaration.
716    tags.push("streaming".to_string());
717    if caps.native_tools {
718        tags.push("tools".to_string());
719    }
720    if !caps.tool_search.is_empty() {
721        tags.push("tool_search".to_string());
722    }
723    if caps.vision || caps.vision_supported {
724        tags.push("vision".to_string());
725    }
726    if caps.audio {
727        tags.push("audio".to_string());
728    }
729    if caps.pdf {
730        tags.push("pdf".to_string());
731    }
732    if caps.files_api_supported {
733        tags.push("files".to_string());
734    }
735    if caps.prompt_caching {
736        tags.push("prompt_caching".to_string());
737    }
738    if !caps.thinking_modes.is_empty() {
739        tags.push("thinking".to_string());
740    }
741    if caps.interleaved_thinking_supported
742        || caps
743            .thinking_modes
744            .iter()
745            .any(|mode| mode == "adaptive" || mode == "effort")
746    {
747        tags.push("extended_thinking".to_string());
748    }
749    if caps.json_schema.is_some() {
750        tags.push("structured_output".to_string());
751    }
752    tags
753}
754
755/// Resolve a tier or alias into a concrete model/provider pair.
756pub fn resolve_tier_model(
757    target: &str,
758    preferred_provider: Option<&str>,
759) -> Option<(String, String)> {
760    let config = effective_config();
761
762    if let Some(alias) = config.aliases.get(target) {
763        return Some((alias.id.clone(), alias.provider.clone()));
764    }
765
766    let candidate_aliases = if let Some(provider) = preferred_provider {
767        vec![
768            format!("{provider}/{target}"),
769            format!("{provider}:{target}"),
770            format!("tier/{target}"),
771            target.to_string(),
772        ]
773    } else {
774        vec![format!("tier/{target}"), target.to_string()]
775    };
776
777    for alias_name in candidate_aliases {
778        if let Some(alias) = config.aliases.get(&alias_name) {
779            return Some((alias.id.clone(), alias.provider.clone()));
780        }
781    }
782
783    None
784}
785
786/// Return all configured alias-backed model/provider pairs whose resolved
787/// model falls into the requested capability tier. The result is de-duplicated
788/// and sorted deterministically by provider then model id.
789pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
790    let config = effective_config();
791    let mut seen = std::collections::BTreeSet::new();
792    let mut candidates = Vec::new();
793
794    for alias in config.aliases.values() {
795        let pair = (alias.id.clone(), alias.provider.clone());
796        if seen.contains(&pair) {
797            continue;
798        }
799        if model_tier(&alias.id) == target {
800            seen.insert(pair.clone());
801            candidates.push(pair);
802        }
803    }
804
805    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
806        provider_a
807            .cmp(provider_b)
808            .then_with(|| model_a.cmp(model_b))
809    });
810    candidates
811}
812
813/// Return all configured alias-backed model/provider pairs. Used by routing
814/// policies that need to compare alternatives across tiers.
815pub fn all_model_candidates() -> Vec<(String, String)> {
816    let config = effective_config();
817    let mut seen = std::collections::BTreeSet::new();
818    let mut candidates = Vec::new();
819
820    for alias in config.aliases.values() {
821        let pair = (alias.id.clone(), alias.provider.clone());
822        if seen.insert(pair.clone()) {
823            candidates.push(pair);
824        }
825    }
826
827    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
828        provider_a
829            .cmp(provider_b)
830            .then_with(|| model_a.cmp(model_b))
831    });
832    candidates
833}
834
835/// Simple glob matching for patterns like "claude-*", "qwen/*", "ollama:*".
836fn glob_match(pattern: &str, input: &str) -> bool {
837    if let Some(prefix) = pattern.strip_suffix('*') {
838        input.starts_with(prefix)
839    } else if let Some(suffix) = pattern.strip_prefix('*') {
840        input.ends_with(suffix)
841    } else if pattern.contains('*') {
842        let parts: Vec<&str> = pattern.split('*').collect();
843        if parts.len() == 2 {
844            input.starts_with(parts[0]) && input.ends_with(parts[1])
845        } else {
846            input == pattern
847        }
848    } else {
849        input == pattern
850    }
851}
852
853fn dirs_or_home() -> Option<String> {
854    std::env::var("HOME").ok()
855}
856
857/// Resolve the effective base URL for a provider, checking the `base_url_env`
858/// override first, then falling back to the configured `base_url`.
859pub fn resolve_base_url(pdef: &ProviderDef) -> String {
860    if let Some(env_name) = &pdef.base_url_env {
861        if let Ok(val) = std::env::var(env_name) {
862            // Strip surrounding quotes that some .env parsers leave intact.
863            let trimmed = val.trim().trim_matches('"').trim_matches('\'');
864            if !trimmed.is_empty() {
865                return trimmed.to_string();
866            }
867        }
868    }
869    pdef.base_url.clone()
870}
871
872fn default_config() -> ProvidersConfig {
873    let mut config = ProvidersConfig {
874        default_provider: Some("anthropic".to_string()),
875        ..Default::default()
876    };
877
878    config.providers.insert(
879        "anthropic".to_string(),
880        ProviderDef {
881            base_url: "https://api.anthropic.com/v1".to_string(),
882            auth_style: "header".to_string(),
883            auth_header: Some("x-api-key".to_string()),
884            auth_env: AuthEnv::Single("ANTHROPIC_API_KEY".to_string()),
885            extra_headers: BTreeMap::from([(
886                "anthropic-version".to_string(),
887                "2023-06-01".to_string(),
888            )]),
889            chat_endpoint: "/messages".to_string(),
890            completion_endpoint: None,
891            healthcheck: Some(HealthcheckDef {
892                method: "POST".to_string(),
893                path: Some("/messages/count_tokens".to_string()),
894                url: None,
895                body: Some(
896                    r#"{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":"x"}]}"#
897                        .to_string(),
898                ),
899            }),
900            features: vec!["prompt_caching".to_string(), "thinking".to_string()],
901            cost_per_1k_in: Some(0.003),
902            cost_per_1k_out: Some(0.015),
903            latency_p50_ms: Some(2500),
904            ..Default::default()
905        },
906    );
907
908    // OpenAI
909    config.providers.insert(
910        "openai".to_string(),
911        ProviderDef {
912            base_url: "https://api.openai.com/v1".to_string(),
913            auth_style: "bearer".to_string(),
914            auth_env: AuthEnv::Single("OPENAI_API_KEY".to_string()),
915            chat_endpoint: "/chat/completions".to_string(),
916            completion_endpoint: Some("/completions".to_string()),
917            healthcheck: Some(HealthcheckDef {
918                method: "GET".to_string(),
919                path: Some("/models".to_string()),
920                url: None,
921                body: None,
922            }),
923            cost_per_1k_in: Some(0.0025),
924            cost_per_1k_out: Some(0.010),
925            latency_p50_ms: Some(1800),
926            ..Default::default()
927        },
928    );
929
930    // OpenRouter
931    config.providers.insert(
932        "openrouter".to_string(),
933        ProviderDef {
934            base_url: "https://openrouter.ai/api/v1".to_string(),
935            auth_style: "bearer".to_string(),
936            auth_env: AuthEnv::Single("OPENROUTER_API_KEY".to_string()),
937            chat_endpoint: "/chat/completions".to_string(),
938            completion_endpoint: Some("/completions".to_string()),
939            healthcheck: Some(HealthcheckDef {
940                method: "GET".to_string(),
941                path: Some("/auth/key".to_string()),
942                url: None,
943                body: None,
944            }),
945            cost_per_1k_in: Some(0.003),
946            cost_per_1k_out: Some(0.015),
947            latency_p50_ms: Some(2200),
948            ..Default::default()
949        },
950    );
951
952    // HuggingFace
953    config.providers.insert(
954        "huggingface".to_string(),
955        ProviderDef {
956            base_url: "https://router.huggingface.co/v1".to_string(),
957            auth_style: "bearer".to_string(),
958            auth_env: AuthEnv::Multiple(vec![
959                "HF_TOKEN".to_string(),
960                "HUGGINGFACE_API_KEY".to_string(),
961            ]),
962            chat_endpoint: "/chat/completions".to_string(),
963            completion_endpoint: Some("/completions".to_string()),
964            healthcheck: Some(HealthcheckDef {
965                method: "GET".to_string(),
966                url: Some("https://huggingface.co/api/whoami-v2".to_string()),
967                path: None,
968                body: None,
969            }),
970            cost_per_1k_in: Some(0.0002),
971            cost_per_1k_out: Some(0.0006),
972            latency_p50_ms: Some(2400),
973            ..Default::default()
974        },
975    );
976
977    // Ollama default. Hosts can override this to `/v1/chat/completions`
978    // via a bundled `providers.toml` (loaded by setting
979    // `HARN_PROVIDERS_CONFIG` in the host process). The OpenAI-compat
980    // path bypasses Ollama's per-model tool-call post-processors
981    // (qwen3coder.go, qwen35.go) which raise HTTP 500s on text-mode
982    // responses for the Qwen3.5 family. The default here stays on
983    // `/api/chat` so the harn-vm test stub keeps working with Ollama's
984    // native NDJSON wire format.
985    config.providers.insert(
986        "ollama".to_string(),
987        ProviderDef {
988            base_url: "http://localhost:11434".to_string(),
989            base_url_env: Some("OLLAMA_HOST".to_string()),
990            auth_style: "none".to_string(),
991            chat_endpoint: "/api/chat".to_string(),
992            completion_endpoint: Some("/api/generate".to_string()),
993            healthcheck: Some(HealthcheckDef {
994                method: "GET".to_string(),
995                path: Some("/api/tags".to_string()),
996                url: None,
997                body: None,
998            }),
999            cost_per_1k_in: Some(0.0),
1000            cost_per_1k_out: Some(0.0),
1001            latency_p50_ms: Some(1200),
1002            ..Default::default()
1003        },
1004    );
1005
1006    // Google Gemini native API.
1007    config.providers.insert(
1008        "gemini".to_string(),
1009        ProviderDef {
1010            base_url: "https://generativelanguage.googleapis.com".to_string(),
1011            base_url_env: Some("GEMINI_BASE_URL".to_string()),
1012            auth_style: "header".to_string(),
1013            auth_header: Some("x-goog-api-key".to_string()),
1014            auth_env: AuthEnv::Multiple(vec![
1015                "GEMINI_API_KEY".to_string(),
1016                "GOOGLE_API_KEY".to_string(),
1017            ]),
1018            chat_endpoint: "/v1beta/models".to_string(),
1019            healthcheck: Some(HealthcheckDef {
1020                method: "GET".to_string(),
1021                path: Some("/v1beta/models".to_string()),
1022                url: None,
1023                body: None,
1024            }),
1025            cost_per_1k_in: Some(0.00125),
1026            cost_per_1k_out: Some(0.005),
1027            latency_p50_ms: Some(1800),
1028            ..Default::default()
1029        },
1030    );
1031
1032    // Together AI (OpenAI-compatible)
1033    config.providers.insert(
1034        "together".to_string(),
1035        ProviderDef {
1036            base_url: "https://api.together.xyz/v1".to_string(),
1037            base_url_env: Some("TOGETHER_AI_BASE_URL".to_string()),
1038            auth_style: "bearer".to_string(),
1039            auth_env: AuthEnv::Single("TOGETHER_AI_API_KEY".to_string()),
1040            chat_endpoint: "/chat/completions".to_string(),
1041            completion_endpoint: Some("/completions".to_string()),
1042            healthcheck: Some(HealthcheckDef {
1043                method: "GET".to_string(),
1044                path: Some("/models".to_string()),
1045                url: None,
1046                body: None,
1047            }),
1048            cost_per_1k_in: Some(0.0002),
1049            cost_per_1k_out: Some(0.0006),
1050            latency_p50_ms: Some(1600),
1051            ..Default::default()
1052        },
1053    );
1054
1055    // Groq (OpenAI-compatible)
1056    config.providers.insert(
1057        "groq".to_string(),
1058        ProviderDef {
1059            base_url: "https://api.groq.com/openai/v1".to_string(),
1060            base_url_env: Some("GROQ_BASE_URL".to_string()),
1061            auth_style: "bearer".to_string(),
1062            auth_env: AuthEnv::Single("GROQ_API_KEY".to_string()),
1063            chat_endpoint: "/chat/completions".to_string(),
1064            completion_endpoint: Some("/completions".to_string()),
1065            healthcheck: Some(HealthcheckDef {
1066                method: "GET".to_string(),
1067                path: Some("/models".to_string()),
1068                url: None,
1069                body: None,
1070            }),
1071            cost_per_1k_in: Some(0.0001),
1072            cost_per_1k_out: Some(0.0003),
1073            latency_p50_ms: Some(450),
1074            ..Default::default()
1075        },
1076    );
1077
1078    // DeepSeek (OpenAI-compatible)
1079    config.providers.insert(
1080        "deepseek".to_string(),
1081        ProviderDef {
1082            base_url: "https://api.deepseek.com/v1".to_string(),
1083            base_url_env: Some("DEEPSEEK_BASE_URL".to_string()),
1084            auth_style: "bearer".to_string(),
1085            auth_env: AuthEnv::Single("DEEPSEEK_API_KEY".to_string()),
1086            chat_endpoint: "/chat/completions".to_string(),
1087            completion_endpoint: Some("/completions".to_string()),
1088            healthcheck: Some(HealthcheckDef {
1089                method: "GET".to_string(),
1090                path: Some("/models".to_string()),
1091                url: None,
1092                body: None,
1093            }),
1094            cost_per_1k_in: Some(0.00014),
1095            cost_per_1k_out: Some(0.00028),
1096            latency_p50_ms: Some(1800),
1097            ..Default::default()
1098        },
1099    );
1100
1101    // Fireworks (OpenAI-compatible open-weight hosting)
1102    config.providers.insert(
1103        "fireworks".to_string(),
1104        ProviderDef {
1105            base_url: "https://api.fireworks.ai/inference/v1".to_string(),
1106            base_url_env: Some("FIREWORKS_BASE_URL".to_string()),
1107            auth_style: "bearer".to_string(),
1108            auth_env: AuthEnv::Single("FIREWORKS_API_KEY".to_string()),
1109            chat_endpoint: "/chat/completions".to_string(),
1110            completion_endpoint: Some("/completions".to_string()),
1111            healthcheck: Some(HealthcheckDef {
1112                method: "GET".to_string(),
1113                path: Some("/models".to_string()),
1114                url: None,
1115                body: None,
1116            }),
1117            cost_per_1k_in: Some(0.0002),
1118            cost_per_1k_out: Some(0.0006),
1119            latency_p50_ms: Some(1400),
1120            ..Default::default()
1121        },
1122    );
1123
1124    // Alibaba DashScope (OpenAI-compatible Qwen host)
1125    config.providers.insert(
1126        "dashscope".to_string(),
1127        ProviderDef {
1128            base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".to_string(),
1129            base_url_env: Some("DASHSCOPE_BASE_URL".to_string()),
1130            auth_style: "bearer".to_string(),
1131            auth_env: AuthEnv::Single("DASHSCOPE_API_KEY".to_string()),
1132            chat_endpoint: "/chat/completions".to_string(),
1133            completion_endpoint: Some("/completions".to_string()),
1134            healthcheck: Some(HealthcheckDef {
1135                method: "GET".to_string(),
1136                path: Some("/models".to_string()),
1137                url: None,
1138                body: None,
1139            }),
1140            cost_per_1k_in: Some(0.0003),
1141            cost_per_1k_out: Some(0.0012),
1142            latency_p50_ms: Some(1600),
1143            ..Default::default()
1144        },
1145    );
1146
1147    // AWS Bedrock Runtime. The provider shim resolves AWS credentials through
1148    // env vars, the selected/default profile, container credentials, or EC2
1149    // instance profile credentials, then signs Converse API calls with SigV4.
1150    config.providers.insert(
1151        "bedrock".to_string(),
1152        ProviderDef {
1153            base_url: String::new(),
1154            base_url_env: Some("BEDROCK_BASE_URL".to_string()),
1155            auth_style: "aws_sigv4".to_string(),
1156            auth_env: AuthEnv::None,
1157            chat_endpoint: "/model/{model}/converse".to_string(),
1158            features: vec!["native_tools".to_string()],
1159            latency_p50_ms: Some(2600),
1160            ..Default::default()
1161        },
1162    );
1163
1164    // Azure OpenAI. The deployment name is routed in the URL; callers can
1165    // use the Harn model field as the deployment name or set
1166    // AZURE_OPENAI_DEPLOYMENT.
1167    config.providers.insert(
1168        "azure_openai".to_string(),
1169        ProviderDef {
1170            base_url: "https://{resource}.openai.azure.com".to_string(),
1171            base_url_env: Some("AZURE_OPENAI_ENDPOINT".to_string()),
1172            auth_style: "azure_openai".to_string(),
1173            auth_env: AuthEnv::Multiple(vec![
1174                "AZURE_OPENAI_API_KEY".to_string(),
1175                "AZURE_OPENAI_AD_TOKEN".to_string(),
1176                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1177            ]),
1178            chat_endpoint:
1179                "/openai/deployments/{deployment}/chat/completions?api-version={api_version}"
1180                    .to_string(),
1181            features: vec!["native_tools".to_string()],
1182            cost_per_1k_in: Some(0.0025),
1183            cost_per_1k_out: Some(0.010),
1184            latency_p50_ms: Some(1900),
1185            ..Default::default()
1186        },
1187    );
1188
1189    // Google Vertex AI Gemini.
1190    config.providers.insert(
1191        "vertex".to_string(),
1192        ProviderDef {
1193            base_url: "https://aiplatform.googleapis.com/v1".to_string(),
1194            base_url_env: Some("VERTEX_AI_BASE_URL".to_string()),
1195            auth_style: "bearer".to_string(),
1196            auth_env: AuthEnv::Multiple(vec![
1197                "VERTEX_AI_ACCESS_TOKEN".to_string(),
1198                "GOOGLE_OAUTH_ACCESS_TOKEN".to_string(),
1199                "GOOGLE_APPLICATION_CREDENTIALS".to_string(),
1200            ]),
1201            chat_endpoint:
1202                "/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent"
1203                    .to_string(),
1204            features: vec!["native_tools".to_string()],
1205            cost_per_1k_in: Some(0.00125),
1206            cost_per_1k_out: Some(0.005),
1207            latency_p50_ms: Some(2100),
1208            ..Default::default()
1209        },
1210    );
1211
1212    // Local OpenAI-compatible server
1213    config.providers.insert(
1214        "local".to_string(),
1215        ProviderDef {
1216            base_url: "http://localhost:8000".to_string(),
1217            base_url_env: Some("LOCAL_LLM_BASE_URL".to_string()),
1218            auth_style: "none".to_string(),
1219            chat_endpoint: "/v1/chat/completions".to_string(),
1220            completion_endpoint: Some("/v1/completions".to_string()),
1221            healthcheck: Some(HealthcheckDef {
1222                method: "GET".to_string(),
1223                path: Some("/v1/models".to_string()),
1224                url: None,
1225                body: None,
1226            }),
1227            cost_per_1k_in: Some(0.0),
1228            cost_per_1k_out: Some(0.0),
1229            latency_p50_ms: Some(900),
1230            ..Default::default()
1231        },
1232    );
1233
1234    // Apple Silicon MLX OpenAI-compatible server. Harn owns readiness
1235    // probing; hosts that want script-based auto-start should launch the
1236    // process first, then call Harn again to verify readiness.
1237    config.providers.insert(
1238        "mlx".to_string(),
1239        ProviderDef {
1240            base_url: "http://127.0.0.1:8002".to_string(),
1241            base_url_env: Some("MLX_BASE_URL".to_string()),
1242            auth_style: "none".to_string(),
1243            chat_endpoint: "/v1/chat/completions".to_string(),
1244            completion_endpoint: Some("/v1/completions".to_string()),
1245            healthcheck: Some(HealthcheckDef {
1246                method: "GET".to_string(),
1247                path: Some("/v1/models".to_string()),
1248                url: None,
1249                body: None,
1250            }),
1251            cost_per_1k_in: Some(0.0),
1252            cost_per_1k_out: Some(0.0),
1253            latency_p50_ms: Some(900),
1254            ..Default::default()
1255        },
1256    );
1257
1258    // vLLM OpenAI-compatible server.
1259    config.providers.insert(
1260        "vllm".to_string(),
1261        ProviderDef {
1262            base_url: "http://localhost:8000".to_string(),
1263            base_url_env: Some("VLLM_BASE_URL".to_string()),
1264            auth_style: "none".to_string(),
1265            chat_endpoint: "/v1/chat/completions".to_string(),
1266            completion_endpoint: Some("/v1/completions".to_string()),
1267            healthcheck: Some(HealthcheckDef {
1268                method: "GET".to_string(),
1269                path: Some("/v1/models".to_string()),
1270                url: None,
1271                body: None,
1272            }),
1273            cost_per_1k_in: Some(0.0),
1274            cost_per_1k_out: Some(0.0),
1275            latency_p50_ms: Some(800),
1276            ..Default::default()
1277        },
1278    );
1279
1280    // HuggingFace Text Generation Inference OpenAI-compatible endpoint.
1281    config.providers.insert(
1282        "tgi".to_string(),
1283        ProviderDef {
1284            base_url: "http://localhost:8080".to_string(),
1285            base_url_env: Some("TGI_BASE_URL".to_string()),
1286            auth_style: "none".to_string(),
1287            chat_endpoint: "/v1/chat/completions".to_string(),
1288            completion_endpoint: Some("/v1/completions".to_string()),
1289            healthcheck: Some(HealthcheckDef {
1290                method: "GET".to_string(),
1291                path: Some("/health".to_string()),
1292                url: None,
1293                body: None,
1294            }),
1295            cost_per_1k_in: Some(0.0),
1296            cost_per_1k_out: Some(0.0),
1297            latency_p50_ms: Some(950),
1298            ..Default::default()
1299        },
1300    );
1301
1302    // Default inference rules
1303    config.inference_rules = vec![
1304        InferenceRule {
1305            pattern: Some("claude-*".to_string()),
1306            contains: None,
1307            exact: None,
1308            provider: "anthropic".to_string(),
1309        },
1310        InferenceRule {
1311            pattern: Some("gpt-*".to_string()),
1312            contains: None,
1313            exact: None,
1314            provider: "openai".to_string(),
1315        },
1316        InferenceRule {
1317            pattern: Some("o1*".to_string()),
1318            contains: None,
1319            exact: None,
1320            provider: "openai".to_string(),
1321        },
1322        InferenceRule {
1323            pattern: Some("o3*".to_string()),
1324            contains: None,
1325            exact: None,
1326            provider: "openai".to_string(),
1327        },
1328        InferenceRule {
1329            pattern: Some("o4*".to_string()),
1330            contains: None,
1331            exact: None,
1332            provider: "openai".to_string(),
1333        },
1334        InferenceRule {
1335            pattern: Some("anthropic.claude-*".to_string()),
1336            contains: None,
1337            exact: None,
1338            provider: "bedrock".to_string(),
1339        },
1340        InferenceRule {
1341            pattern: Some("meta.llama*".to_string()),
1342            contains: None,
1343            exact: None,
1344            provider: "bedrock".to_string(),
1345        },
1346        InferenceRule {
1347            pattern: Some("amazon.*".to_string()),
1348            contains: None,
1349            exact: None,
1350            provider: "bedrock".to_string(),
1351        },
1352        InferenceRule {
1353            pattern: Some("mistral.*".to_string()),
1354            contains: None,
1355            exact: None,
1356            provider: "bedrock".to_string(),
1357        },
1358        InferenceRule {
1359            pattern: Some("cohere.*".to_string()),
1360            contains: None,
1361            exact: None,
1362            provider: "bedrock".to_string(),
1363        },
1364        InferenceRule {
1365            pattern: Some("gemini-*".to_string()),
1366            contains: None,
1367            exact: None,
1368            provider: "gemini".to_string(),
1369        },
1370    ];
1371
1372    // Default tier rules
1373    config.tier_rules = vec![
1374        TierRule {
1375            contains: Some("9b".to_string()),
1376            pattern: None,
1377            exact: None,
1378            tier: "small".to_string(),
1379        },
1380        TierRule {
1381            contains: Some("a3b".to_string()),
1382            pattern: None,
1383            exact: None,
1384            tier: "small".to_string(),
1385        },
1386        TierRule {
1387            contains: Some("gemma-4-e2b".to_string()),
1388            pattern: None,
1389            exact: None,
1390            tier: "small".to_string(),
1391        },
1392        TierRule {
1393            contains: Some("gemma-4-e4b".to_string()),
1394            pattern: None,
1395            exact: None,
1396            tier: "small".to_string(),
1397        },
1398        TierRule {
1399            contains: Some("gemma-4-26b".to_string()),
1400            pattern: None,
1401            exact: None,
1402            tier: "mid".to_string(),
1403        },
1404        TierRule {
1405            contains: Some("gemma-4-31b".to_string()),
1406            pattern: None,
1407            exact: None,
1408            tier: "frontier".to_string(),
1409        },
1410        TierRule {
1411            contains: Some("gemma4:26b".to_string()),
1412            pattern: None,
1413            exact: None,
1414            tier: "mid".to_string(),
1415        },
1416        TierRule {
1417            contains: Some("gemma4:31b".to_string()),
1418            pattern: None,
1419            exact: None,
1420            tier: "frontier".to_string(),
1421        },
1422        TierRule {
1423            pattern: Some("claude-*".to_string()),
1424            contains: None,
1425            exact: None,
1426            tier: "frontier".to_string(),
1427        },
1428        TierRule {
1429            exact: Some("gpt-4o".to_string()),
1430            contains: None,
1431            pattern: None,
1432            tier: "frontier".to_string(),
1433        },
1434    ];
1435
1436    config.tier_defaults = TierDefaults {
1437        default: "mid".to_string(),
1438    };
1439
1440    config.aliases.insert(
1441        "frontier".to_string(),
1442        AliasDef {
1443            id: "claude-sonnet-4-20250514".to_string(),
1444            provider: "anthropic".to_string(),
1445            tool_format: None,
1446        },
1447    );
1448    config.aliases.insert(
1449        "tier/frontier".to_string(),
1450        AliasDef {
1451            id: "claude-sonnet-4-20250514".to_string(),
1452            provider: "anthropic".to_string(),
1453            tool_format: None,
1454        },
1455    );
1456    config.aliases.insert(
1457        "mid".to_string(),
1458        AliasDef {
1459            id: "gpt-4o-mini".to_string(),
1460            provider: "openai".to_string(),
1461            tool_format: None,
1462        },
1463    );
1464    config.aliases.insert(
1465        "tier/mid".to_string(),
1466        AliasDef {
1467            id: "gpt-4o-mini".to_string(),
1468            provider: "openai".to_string(),
1469            tool_format: None,
1470        },
1471    );
1472    config.aliases.insert(
1473        "small".to_string(),
1474        AliasDef {
1475            id: "Qwen/Qwen3.5-9B".to_string(),
1476            provider: "openrouter".to_string(),
1477            tool_format: None,
1478        },
1479    );
1480    config.aliases.insert(
1481        "tier/small".to_string(),
1482        AliasDef {
1483            id: "Qwen/Qwen3.5-9B".to_string(),
1484            provider: "openrouter".to_string(),
1485            tool_format: None,
1486        },
1487    );
1488    config.aliases.insert(
1489        "local-gemma4".to_string(),
1490        AliasDef {
1491            id: "gemma-4-26b-a4b-it".to_string(),
1492            provider: "local".to_string(),
1493            tool_format: None,
1494        },
1495    );
1496    config.aliases.insert(
1497        "local-gemma4-26b".to_string(),
1498        AliasDef {
1499            id: "gemma-4-26b-a4b-it".to_string(),
1500            provider: "local".to_string(),
1501            tool_format: None,
1502        },
1503    );
1504    config.aliases.insert(
1505        "local-gemma4-31b".to_string(),
1506        AliasDef {
1507            id: "gemma-4-31b-it".to_string(),
1508            provider: "local".to_string(),
1509            tool_format: None,
1510        },
1511    );
1512    config.aliases.insert(
1513        "local-gemma4-e4b".to_string(),
1514        AliasDef {
1515            id: "gemma-4-e4b-it".to_string(),
1516            provider: "local".to_string(),
1517            tool_format: None,
1518        },
1519    );
1520    config.aliases.insert(
1521        "local-gemma4-e2b".to_string(),
1522        AliasDef {
1523            id: "gemma-4-e2b-it".to_string(),
1524            provider: "local".to_string(),
1525            tool_format: None,
1526        },
1527    );
1528    config.aliases.insert(
1529        "mlx-qwen36-27b".to_string(),
1530        AliasDef {
1531            id: "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string(),
1532            provider: "mlx".to_string(),
1533            tool_format: None,
1534        },
1535    );
1536
1537    config.qc_defaults.extend(BTreeMap::from([
1538        (
1539            "anthropic".to_string(),
1540            "claude-3-5-haiku-20241022".to_string(),
1541        ),
1542        ("openai".to_string(), "gpt-4o-mini".to_string()),
1543        (
1544            "openrouter".to_string(),
1545            "google/gemini-2.5-flash".to_string(),
1546        ),
1547        ("ollama".to_string(), "llama3.2".to_string()),
1548        ("local".to_string(), "gpt-4o".to_string()),
1549    ]));
1550
1551    config.models.extend(BTreeMap::from([
1552        (
1553            "claude-sonnet-4-20250514".to_string(),
1554            ModelDef {
1555                name: "Claude Sonnet 4".to_string(),
1556                provider: "anthropic".to_string(),
1557                context_window: 200_000,
1558                stream_timeout: None,
1559                capabilities: vec![
1560                    "tools".to_string(),
1561                    "streaming".to_string(),
1562                    "prompt_caching".to_string(),
1563                    "thinking".to_string(),
1564                ],
1565                pricing: Some(ModelPricing {
1566                    input_per_mtok: 3.0,
1567                    output_per_mtok: 15.0,
1568                    cache_read_per_mtok: Some(0.3),
1569                    cache_write_per_mtok: Some(3.75),
1570                }),
1571            },
1572        ),
1573        (
1574            "gpt-4o-mini".to_string(),
1575            ModelDef {
1576                name: "GPT-4o Mini".to_string(),
1577                provider: "openai".to_string(),
1578                context_window: 128_000,
1579                stream_timeout: None,
1580                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1581                pricing: Some(ModelPricing {
1582                    input_per_mtok: 0.15,
1583                    output_per_mtok: 0.60,
1584                    cache_read_per_mtok: None,
1585                    cache_write_per_mtok: None,
1586                }),
1587            },
1588        ),
1589        (
1590            "Qwen/Qwen3.5-9B".to_string(),
1591            ModelDef {
1592                name: "Qwen3.5 9B".to_string(),
1593                provider: "openrouter".to_string(),
1594                context_window: 131_072,
1595                stream_timeout: None,
1596                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1597                pricing: None,
1598            },
1599        ),
1600        (
1601            "llama3.2".to_string(),
1602            ModelDef {
1603                name: "Llama 3.2".to_string(),
1604                provider: "ollama".to_string(),
1605                context_window: 32_000,
1606                stream_timeout: Some(300.0),
1607                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1608                pricing: None,
1609            },
1610        ),
1611    ]));
1612
1613    config
1614}
1615
1616#[cfg(test)]
1617mod tests {
1618    use super::*;
1619
1620    fn reset_overrides() {
1621        clear_user_overrides();
1622    }
1623
1624    #[test]
1625    fn test_glob_match_prefix() {
1626        assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
1627        assert!(glob_match("gpt-*", "gpt-4o"));
1628        assert!(!glob_match("claude-*", "gpt-4o"));
1629    }
1630
1631    #[test]
1632    fn test_glob_match_suffix() {
1633        assert!(glob_match("*-latest", "llama3.2-latest"));
1634        assert!(!glob_match("*-latest", "llama3.2"));
1635    }
1636
1637    #[test]
1638    fn test_glob_match_middle() {
1639        assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
1640        assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
1641    }
1642
1643    #[test]
1644    fn test_glob_match_exact() {
1645        assert!(glob_match("gpt-4o", "gpt-4o"));
1646        assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
1647    }
1648
1649    #[test]
1650    fn test_infer_provider_from_defaults() {
1651        let _guard = crate::llm::env_lock().lock().expect("env lock");
1652        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1653        unsafe {
1654            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1655        }
1656
1657        assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
1658        assert_eq!(infer_provider("gpt-4o"), "openai");
1659        assert_eq!(infer_provider("o1-preview"), "openai");
1660        assert_eq!(infer_provider("o3-mini"), "openai");
1661        assert_eq!(infer_provider("o4-mini"), "openai");
1662        assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
1663        assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
1664        assert_eq!(infer_provider("llama3.2:latest"), "ollama");
1665        assert_eq!(infer_provider("unknown-model"), "anthropic");
1666
1667        unsafe {
1668            match prev_default_provider {
1669                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1670                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1671            }
1672        }
1673    }
1674
1675    #[test]
1676    fn test_infer_provider_prefix_rules() {
1677        assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
1678        assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
1679        // Even when the id also contains `/`, the local transport prefix wins.
1680        assert_eq!(infer_provider("local:owner/model"), "ollama");
1681        assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
1682    }
1683
1684    #[test]
1685    fn test_openrouter_inference_requires_one_slash() {
1686        let _guard = crate::llm::env_lock().lock().expect("env lock");
1687        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1688        unsafe {
1689            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1690        }
1691
1692        assert_eq!(infer_provider("org/model"), "openrouter");
1693        assert_eq!(infer_provider("org/team/model"), "anthropic");
1694
1695        unsafe {
1696            match prev_default_provider {
1697                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1698                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1699            }
1700        }
1701    }
1702
1703    #[test]
1704    fn test_resolve_model_info_normalizes_provider_prefixes() {
1705        let local = resolve_model_info("local:gemma-4-e4b-it");
1706        assert_eq!(local.id, "gemma-4-e4b-it");
1707        assert_eq!(local.provider, "ollama");
1708
1709        let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
1710        assert_eq!(ollama.id, "qwen3:30b-a3b");
1711        assert_eq!(ollama.provider, "ollama");
1712
1713        let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
1714        assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
1715        assert_eq!(hf.provider, "huggingface");
1716    }
1717
1718    #[test]
1719    fn test_model_tier_from_defaults() {
1720        assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
1721        assert_eq!(model_tier("gpt-4o"), "frontier");
1722        assert_eq!(model_tier("Qwen3.5-9B"), "small");
1723        assert_eq!(model_tier("deepseek-v3"), "mid");
1724    }
1725
1726    #[test]
1727    fn test_resolve_model_unknown_alias() {
1728        let (id, provider) = resolve_model("gpt-4o");
1729        assert_eq!(id, "gpt-4o");
1730        assert!(provider.is_none());
1731    }
1732
1733    #[test]
1734    fn test_provider_names() {
1735        let names = provider_names();
1736        assert!(names.len() >= 7);
1737        assert!(names.contains(&"anthropic".to_string()));
1738        assert!(names.contains(&"together".to_string()));
1739        assert!(names.contains(&"local".to_string()));
1740        assert!(names.contains(&"mlx".to_string()));
1741        assert!(names.contains(&"openai".to_string()));
1742        assert!(names.contains(&"ollama".to_string()));
1743        assert!(names.contains(&"bedrock".to_string()));
1744        assert!(names.contains(&"azure_openai".to_string()));
1745        assert!(names.contains(&"vertex".to_string()));
1746    }
1747
1748    #[test]
1749    fn test_resolve_tier_model_default_aliases() {
1750        let (model, provider) = resolve_tier_model("frontier", None).unwrap();
1751        assert_eq!(model, "claude-sonnet-4-20250514");
1752        assert_eq!(provider, "anthropic");
1753
1754        let (model, provider) = resolve_tier_model("small", None).unwrap();
1755        assert_eq!(model, "Qwen/Qwen3.5-9B");
1756        assert_eq!(provider, "openrouter");
1757    }
1758
1759    #[test]
1760    fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
1761        let (model, provider) = resolve_tier_model("mid", Some("openai")).unwrap();
1762        assert_eq!(model, "gpt-4o-mini");
1763        assert_eq!(provider, "openai");
1764    }
1765
1766    #[test]
1767    fn test_provider_config_anthropic() {
1768        let pdef = provider_config("anthropic").unwrap();
1769        assert_eq!(pdef.auth_style, "header");
1770        assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
1771    }
1772
1773    #[test]
1774    fn test_provider_config_mlx() {
1775        let pdef = provider_config("mlx").unwrap();
1776        assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
1777        assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
1778        assert_eq!(
1779            pdef.healthcheck.unwrap().path.as_deref(),
1780            Some("/v1/models")
1781        );
1782
1783        let (model, provider) = resolve_model("mlx-qwen36-27b");
1784        assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
1785        assert_eq!(provider.as_deref(), Some("mlx"));
1786    }
1787
1788    #[test]
1789    fn test_enterprise_provider_defaults_and_inference() {
1790        let bedrock = provider_config("bedrock").unwrap();
1791        assert_eq!(bedrock.auth_style, "aws_sigv4");
1792        assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
1793        assert_eq!(
1794            infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
1795            "bedrock"
1796        );
1797        assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
1798
1799        let azure = provider_config("azure_openai").unwrap();
1800        assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
1801        assert_eq!(
1802            auth_env_names(&azure.auth_env),
1803            vec![
1804                "AZURE_OPENAI_API_KEY".to_string(),
1805                "AZURE_OPENAI_AD_TOKEN".to_string(),
1806                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1807            ]
1808        );
1809
1810        let vertex = provider_config("vertex").unwrap();
1811        assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
1812        assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
1813    }
1814
1815    #[test]
1816    fn test_default_provider_env_override_for_unknown_model() {
1817        let _guard = crate::llm::env_lock().lock().expect("env lock");
1818        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1819        unsafe {
1820            std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
1821        }
1822
1823        let inference = infer_provider_detail("unknown-model");
1824
1825        unsafe {
1826            match prev_default_provider {
1827                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1828                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1829            }
1830        }
1831
1832        assert_eq!(inference.provider, "openai");
1833        assert_eq!(
1834            inference.source,
1835            crate::llm::provider::ProviderInferenceSource::DefaultFallback
1836        );
1837    }
1838
1839    #[test]
1840    fn test_resolve_base_url_no_env() {
1841        let pdef = ProviderDef {
1842            base_url: "https://example.com".to_string(),
1843            ..Default::default()
1844        };
1845        assert_eq!(resolve_base_url(&pdef), "https://example.com");
1846    }
1847
1848    #[test]
1849    fn test_default_config_roundtrip() {
1850        let config = default_config();
1851        assert!(!config.providers.is_empty());
1852        assert!(!config.inference_rules.is_empty());
1853        assert!(!config.tier_rules.is_empty());
1854        assert_eq!(config.tier_defaults.default, "mid");
1855    }
1856
1857    #[test]
1858    fn test_model_params_empty() {
1859        let params = model_params("claude-sonnet-4-20250514");
1860        assert!(params.is_empty());
1861    }
1862
1863    #[test]
1864    fn test_user_overrides_add_provider_and_alias() {
1865        reset_overrides();
1866        let mut overlay = ProvidersConfig::default();
1867        overlay.providers.insert(
1868            "acme".to_string(),
1869            ProviderDef {
1870                base_url: "https://llm.acme.test/v1".to_string(),
1871                chat_endpoint: "/chat/completions".to_string(),
1872                ..Default::default()
1873            },
1874        );
1875        overlay.aliases.insert(
1876            "acme-fast".to_string(),
1877            AliasDef {
1878                id: "acme/model-fast".to_string(),
1879                provider: "acme".to_string(),
1880                tool_format: Some("native".to_string()),
1881            },
1882        );
1883        set_user_overrides(Some(overlay));
1884
1885        let (model, provider) = resolve_model("acme-fast");
1886        assert_eq!(model, "acme/model-fast");
1887        assert_eq!(provider.as_deref(), Some("acme"));
1888        assert!(provider_names().contains(&"acme".to_string()));
1889        assert_eq!(
1890            provider_config("acme").map(|provider| provider.base_url),
1891            Some("https://llm.acme.test/v1".to_string())
1892        );
1893
1894        reset_overrides();
1895    }
1896
1897    #[test]
1898    fn test_default_tool_format_uses_capability_matrix() {
1899        reset_overrides();
1900
1901        assert_eq!(
1902            default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
1903            "native"
1904        );
1905        assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "text");
1906    }
1907
1908    #[test]
1909    fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
1910        reset_overrides();
1911        let mut overlay = ProvidersConfig::default();
1912        overlay.models.insert(
1913            "acme/model-fast".to_string(),
1914            ModelDef {
1915                name: "Acme Fast".to_string(),
1916                provider: "acme".to_string(),
1917                context_window: 65_536,
1918                stream_timeout: Some(42.0),
1919                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1920                pricing: Some(ModelPricing {
1921                    input_per_mtok: 1.25,
1922                    output_per_mtok: 2.5,
1923                    cache_read_per_mtok: Some(0.25),
1924                    cache_write_per_mtok: None,
1925                }),
1926            },
1927        );
1928        overlay
1929            .qc_defaults
1930            .insert("acme".to_string(), "acme/model-cheap".to_string());
1931        set_user_overrides(Some(overlay));
1932
1933        let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
1934        assert_eq!(entry.context_window, 65_536);
1935        assert_eq!(entry.capabilities, vec!["streaming".to_string()]);
1936        assert_eq!(
1937            entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
1938            Some(1.25)
1939        );
1940        assert_eq!(
1941            pricing_per_1k_for("acme", "acme/model-fast"),
1942            Some((0.00125, 0.0025))
1943        );
1944        assert_eq!(
1945            qc_default_model("acme").as_deref(),
1946            Some("acme/model-cheap")
1947        );
1948
1949        reset_overrides();
1950    }
1951
1952    #[test]
1953    fn test_user_overrides_prepend_inference_rules() {
1954        reset_overrides();
1955        let mut overlay = ProvidersConfig::default();
1956        overlay.inference_rules.push(InferenceRule {
1957            pattern: Some("internal-*".to_string()),
1958            contains: None,
1959            exact: None,
1960            provider: "openai".to_string(),
1961        });
1962        set_user_overrides(Some(overlay));
1963
1964        assert_eq!(infer_provider("internal-foo"), "openai");
1965
1966        reset_overrides();
1967    }
1968}
harn_vm/llm_config.rs

harn_vm/
llm_config.rs