harn_vm/
llm_config.rs

1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::BTreeMap;
4use std::sync::OnceLock;
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8
9thread_local! {
10    /// Thread-local provider config overlays installed by the CLI after it
11    /// reads the nearest `harn.toml` plus any installed package manifests.
12    /// Kept thread-local so tests and multi-VM hosts can scope extensions to
13    /// the current run without mutating the process-wide default config.
14    static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
15}
16
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct ProvidersConfig {
19    #[serde(default)]
20    pub providers: BTreeMap<String, ProviderDef>,
21    #[serde(default)]
22    pub aliases: BTreeMap<String, AliasDef>,
23    #[serde(default)]
24    pub models: BTreeMap<String, ModelDef>,
25    #[serde(default)]
26    pub qc_defaults: BTreeMap<String, String>,
27    #[serde(default)]
28    pub inference_rules: Vec<InferenceRule>,
29    #[serde(default)]
30    pub tier_rules: Vec<TierRule>,
31    #[serde(default)]
32    pub tier_defaults: TierDefaults,
33    #[serde(default)]
34    pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
35}
36
37impl ProvidersConfig {
38    pub fn is_empty(&self) -> bool {
39        self.providers.is_empty()
40            && self.aliases.is_empty()
41            && self.models.is_empty()
42            && self.qc_defaults.is_empty()
43            && self.inference_rules.is_empty()
44            && self.tier_rules.is_empty()
45            && self.model_defaults.is_empty()
46            && self.tier_defaults.default == default_mid()
47    }
48
49    pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
50        self.providers.extend(overlay.providers.clone());
51        self.aliases.extend(overlay.aliases.clone());
52        self.models.extend(overlay.models.clone());
53        self.qc_defaults.extend(overlay.qc_defaults.clone());
54
55        if !overlay.inference_rules.is_empty() {
56            let mut merged = overlay.inference_rules.clone();
57            merged.extend(self.inference_rules.clone());
58            self.inference_rules = merged;
59        }
60
61        if !overlay.tier_rules.is_empty() {
62            let mut merged = overlay.tier_rules.clone();
63            merged.extend(self.tier_rules.clone());
64            self.tier_rules = merged;
65        }
66
67        if overlay.tier_defaults.default != default_mid() {
68            self.tier_defaults = overlay.tier_defaults.clone();
69        }
70
71        for (pattern, defaults) in &overlay.model_defaults {
72            self.model_defaults
73                .entry(pattern.clone())
74                .or_default()
75                .extend(defaults.clone());
76        }
77    }
78}
79
80#[derive(Debug, Clone, Deserialize)]
81pub struct ProviderDef {
82    #[serde(default)]
83    pub display_name: Option<String>,
84    #[serde(default)]
85    pub icon: Option<String>,
86    pub base_url: String,
87    #[serde(default)]
88    pub base_url_env: Option<String>,
89    #[serde(default = "default_bearer")]
90    pub auth_style: String,
91    #[serde(default)]
92    pub auth_header: Option<String>,
93    #[serde(default)]
94    pub auth_env: AuthEnv,
95    #[serde(default)]
96    pub extra_headers: BTreeMap<String, String>,
97    #[serde(default)]
98    pub chat_endpoint: String,
99    #[serde(default)]
100    pub completion_endpoint: Option<String>,
101    #[serde(default)]
102    pub healthcheck: Option<HealthcheckDef>,
103    #[serde(default)]
104    pub features: Vec<String>,
105    /// Fallback provider name to try if this provider fails.
106    #[serde(default)]
107    pub fallback: Option<String>,
108    /// Number of retries before falling back (default 0).
109    #[serde(default)]
110    pub retry_count: Option<u32>,
111    /// Delay between retries in milliseconds (default 1000).
112    #[serde(default)]
113    pub retry_delay_ms: Option<u64>,
114    /// Maximum requests per minute. None = unlimited.
115    #[serde(default)]
116    pub rpm: Option<u32>,
117    /// Provider/catalog pricing in USD per 1k input tokens.
118    #[serde(default)]
119    pub cost_per_1k_in: Option<f64>,
120    /// Provider/catalog pricing in USD per 1k output tokens.
121    #[serde(default)]
122    pub cost_per_1k_out: Option<f64>,
123    /// Observed or configured p50 latency in milliseconds.
124    #[serde(default)]
125    pub latency_p50_ms: Option<u64>,
126}
127
128impl Default for ProviderDef {
129    fn default() -> Self {
130        Self {
131            display_name: None,
132            icon: None,
133            base_url: String::new(),
134            base_url_env: None,
135            auth_style: default_bearer(),
136            auth_header: None,
137            auth_env: AuthEnv::None,
138            extra_headers: BTreeMap::new(),
139            chat_endpoint: String::new(),
140            completion_endpoint: None,
141            healthcheck: None,
142            features: Vec::new(),
143            fallback: None,
144            retry_count: None,
145            retry_delay_ms: None,
146            rpm: None,
147            cost_per_1k_in: None,
148            cost_per_1k_out: None,
149            latency_p50_ms: None,
150        }
151    }
152}
153
154fn default_bearer() -> String {
155    "bearer".to_string()
156}
157
158/// Auth env var name(s) for the provider. Can be a single string or an array
159/// (tried in order until one is set).
160#[derive(Debug, Clone, Deserialize, Default)]
161#[serde(untagged)]
162pub enum AuthEnv {
163    #[default]
164    None,
165    Single(String),
166    Multiple(Vec<String>),
167}
168
169#[derive(Debug, Clone, Deserialize)]
170pub struct HealthcheckDef {
171    pub method: String,
172    #[serde(default)]
173    pub path: Option<String>,
174    #[serde(default)]
175    pub url: Option<String>,
176    #[serde(default)]
177    pub body: Option<String>,
178}
179
180#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
181pub struct AliasDef {
182    pub id: String,
183    pub provider: String,
184    /// Per-model tool format override: "native" or "text". When set, this
185    /// takes precedence over the provider-level default. Models with strong
186    /// tool-calling fine-tuning (Kimi-K2.5, GPT-4o) should use "native";
187    /// models better served by text-based tool calling use "text".
188    #[serde(default)]
189    pub tool_format: Option<String>,
190}
191
192#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
193pub struct ModelPricing {
194    pub input_per_mtok: f64,
195    pub output_per_mtok: f64,
196    #[serde(default)]
197    pub cache_read_per_mtok: Option<f64>,
198    #[serde(default)]
199    pub cache_write_per_mtok: Option<f64>,
200}
201
202#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
203pub struct ModelDef {
204    pub name: String,
205    pub provider: String,
206    pub context_window: u64,
207    #[serde(default)]
208    pub stream_timeout: Option<f64>,
209    #[serde(default)]
210    pub capabilities: Vec<String>,
211    #[serde(default)]
212    pub pricing: Option<ModelPricing>,
213}
214
215#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
216pub struct ResolvedModel {
217    pub id: String,
218    pub provider: String,
219    pub alias: Option<String>,
220    pub tool_format: String,
221    pub tier: String,
222}
223
224#[derive(Debug, Clone, Deserialize)]
225pub struct InferenceRule {
226    #[serde(default)]
227    pub pattern: Option<String>,
228    #[serde(default)]
229    pub contains: Option<String>,
230    #[serde(default)]
231    pub exact: Option<String>,
232    pub provider: String,
233}
234
235#[derive(Debug, Clone, Deserialize)]
236pub struct TierRule {
237    #[serde(default)]
238    pub pattern: Option<String>,
239    #[serde(default)]
240    pub contains: Option<String>,
241    #[serde(default)]
242    pub exact: Option<String>,
243    pub tier: String,
244}
245
246#[derive(Debug, Clone, Deserialize)]
247pub struct TierDefaults {
248    #[serde(default = "default_mid")]
249    pub default: String,
250}
251
252impl Default for TierDefaults {
253    fn default() -> Self {
254        Self {
255            default: default_mid(),
256        }
257    }
258}
259
260fn default_mid() -> String {
261    "mid".to_string()
262}
263
264/// Load and cache the providers config. Called once at VM startup.
265pub fn load_config() -> &'static ProvidersConfig {
266    CONFIG.get_or_init(|| {
267        let verbose_config_logging = matches!(
268            std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
269            Some("1" | "true" | "TRUE" | "yes" | "YES")
270        ) || matches!(
271            std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
272            Some("1" | "true" | "TRUE" | "yes" | "YES")
273        );
274        if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
275            match std::fs::read_to_string(&path) {
276                Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
277                    Ok(config) => {
278                        if verbose_config_logging {
279                            eprintln!(
280                                "[llm_config] Loaded {} providers, {} aliases from {}",
281                                config.providers.len(),
282                                config.aliases.len(),
283                                path
284                            );
285                        }
286                        let _ = CONFIG_PATH.set(path);
287                        return config;
288                    }
289                    Err(e) => eprintln!("[llm_config] TOML parse error in {}: {}", path, e),
290                },
291                Err(e) => eprintln!("[llm_config] Cannot read {}: {}", path, e),
292            }
293        }
294        if let Some(home) = dirs_or_home() {
295            let path = format!("{home}/.config/harn/providers.toml");
296            if let Ok(content) = std::fs::read_to_string(&path) {
297                if let Ok(config) = toml::from_str::<ProvidersConfig>(&content) {
298                    let _ = CONFIG_PATH.set(path);
299                    return config;
300                }
301            }
302        }
303        default_config()
304    })
305}
306
307/// Returns the filesystem path of the currently-loaded providers config, if
308/// any. Returns `None` when built-in defaults are active.
309pub fn loaded_config_path() -> Option<std::path::PathBuf> {
310    // Force lazy init so CONFIG_PATH is populated if a file was loaded.
311    let _ = load_config();
312    CONFIG_PATH.get().map(std::path::PathBuf::from)
313}
314
315/// Install per-run provider config overlays. The overlay uses the same shape as
316/// `providers.toml`, but lives under `[llm]` in `harn.toml` and package
317/// manifests. Passing `None` clears the overlay.
318pub fn set_user_overrides(config: Option<ProvidersConfig>) {
319    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
320}
321
322/// Clear per-run provider config overlays.
323pub fn clear_user_overrides() {
324    set_user_overrides(None);
325}
326
327fn effective_config() -> ProvidersConfig {
328    let mut merged = load_config().clone();
329    USER_OVERRIDES.with(|cell| {
330        if let Some(overlay) = cell.borrow().as_ref() {
331            merged.merge_from(overlay);
332        }
333    });
334    merged
335}
336
337/// Resolve a model alias to (model_id, provider_name).
338pub fn resolve_model(alias: &str) -> (String, Option<String>) {
339    let config = effective_config();
340    if let Some(a) = config.aliases.get(alias) {
341        return (a.id.clone(), Some(a.provider.clone()));
342    }
343    (normalize_model_id(alias), None)
344}
345
346/// Strip host/provider selector prefixes that identify transport, not the
347/// provider-native model id. This mirrors Burin's existing normalization so
348/// `ollama:qwen3:30b` reaches Ollama as `qwen3:30b` instead of an invalid
349/// model named `ollama`.
350pub fn normalize_model_id(raw: &str) -> String {
351    for prefix in ["ollama:", "local:", "huggingface:", "hf:"] {
352        if let Some(stripped) = raw.strip_prefix(prefix) {
353            return stripped.to_string();
354        }
355    }
356    raw.to_string()
357}
358
359/// Resolve an alias or selector into the complete catalog identity hosts need:
360/// provider inference, prefix-normalized model id, default tool format, and tier.
361pub fn resolve_model_info(selector: &str) -> ResolvedModel {
362    let config = effective_config();
363    if let Some(alias) = config.aliases.get(selector) {
364        let id = alias.id.clone();
365        let provider = alias.provider.clone();
366        let tool_format = alias
367            .tool_format
368            .clone()
369            .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
370        return ResolvedModel {
371            tier: model_tier_with_config(&config, &id),
372            id,
373            provider,
374            alias: Some(selector.to_string()),
375            tool_format,
376        };
377    }
378
379    let provider = infer_provider_with_config(&config, selector);
380    let id = normalize_model_id(selector);
381    let tool_format = default_tool_format_with_config(&config, &id, &provider);
382    let tier = model_tier_with_config(&config, &id);
383    ResolvedModel {
384        id,
385        provider,
386        alias: None,
387        tool_format,
388        tier,
389    }
390}
391
392/// Infer provider from a model ID using inference rules.
393pub fn infer_provider(model_id: &str) -> String {
394    let config = effective_config();
395    infer_provider_with_config(&config, model_id)
396}
397
398fn infer_provider_with_config(config: &ProvidersConfig, model_id: &str) -> String {
399    if model_id.starts_with("local:") {
400        return "local".to_string();
401    }
402    if model_id.starts_with("ollama:") {
403        return "ollama".to_string();
404    }
405    if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
406        return "huggingface".to_string();
407    }
408    for rule in &config.inference_rules {
409        if let Some(exact) = &rule.exact {
410            if model_id == exact {
411                return rule.provider.clone();
412            }
413        }
414        if let Some(pattern) = &rule.pattern {
415            if glob_match(pattern, model_id) {
416                return rule.provider.clone();
417            }
418        }
419        if let Some(substr) = &rule.contains {
420            if model_id.contains(substr.as_str()) {
421                return rule.provider.clone();
422            }
423        }
424    }
425    // Fallback to hardcoded inference.
426    // Order matters: `local:` must beat the generic `:` → ollama rule, and
427    // any prefix-based rule must beat the generic `/` → openrouter rule for
428    // ids like `local:owner/model`.
429    if model_id.starts_with("claude-") {
430        return "anthropic".to_string();
431    }
432    if model_id.to_lowercase().starts_with("or-") {
433        return "openrouter".to_string();
434    }
435    if model_id.starts_with("gpt-") || model_id.starts_with("o1") || model_id.starts_with("o3") {
436        return "openai".to_string();
437    }
438    if model_id.contains('/') {
439        return "openrouter".to_string();
440    }
441    if model_id.contains(':') {
442        return "ollama".to_string();
443    }
444    "anthropic".to_string()
445}
446
447/// Get model tier ("small", "mid", "frontier").
448pub fn model_tier(model_id: &str) -> String {
449    let config = effective_config();
450    model_tier_with_config(&config, model_id)
451}
452
453fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
454    for rule in &config.tier_rules {
455        if let Some(exact) = &rule.exact {
456            if model_id == exact {
457                return rule.tier.clone();
458            }
459        }
460        if let Some(pattern) = &rule.pattern {
461            if glob_match(pattern, model_id) {
462                return rule.tier.clone();
463            }
464        }
465        if let Some(substr) = &rule.contains {
466            if model_id.contains(substr.as_str()) {
467                return rule.tier.clone();
468            }
469        }
470    }
471    let lower = model_id.to_lowercase();
472    if lower.contains("9b") || lower.contains("a3b") {
473        return "small".to_string();
474    }
475    if lower.starts_with("claude-") || lower == "gpt-4o" {
476        return "frontier".to_string();
477    }
478    config.tier_defaults.default.clone()
479}
480
481/// Get provider config for resolving base_url, auth, etc.
482pub fn provider_config(name: &str) -> Option<ProviderDef> {
483    effective_config().providers.get(name).cloned()
484}
485
486/// Get model-specific default parameters (temperature, etc.).
487/// Matches glob patterns in model_defaults keys.
488pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
489    let config = effective_config();
490    let mut params = BTreeMap::new();
491    for (pattern, defaults) in &config.model_defaults {
492        if glob_match(pattern, model_id) {
493            for (k, v) in defaults {
494                params.insert(k.clone(), v.clone());
495            }
496        }
497    }
498    params
499}
500
501/// Get list of configured provider names.
502pub fn provider_names() -> Vec<String> {
503    effective_config().providers.keys().cloned().collect()
504}
505
506/// Return every configured alias name, sorted deterministically.
507pub fn known_model_names() -> Vec<String> {
508    effective_config().aliases.keys().cloned().collect()
509}
510
511pub fn alias_entries() -> Vec<(String, AliasDef)> {
512    effective_config().aliases.into_iter().collect()
513}
514
515/// Return every configured model-catalog entry, sorted by provider then id.
516pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
517    let mut entries: Vec<_> = effective_config().models.into_iter().collect();
518    entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
519        model_a
520            .provider
521            .cmp(&model_b.provider)
522            .then_with(|| id_a.cmp(id_b))
523    });
524    entries
525}
526
527pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
528    effective_config().models.get(model_id).cloned()
529}
530
531pub fn qc_default_model(provider: &str) -> Option<String> {
532    std::env::var("BURIN_QC_MODEL")
533        .ok()
534        .filter(|value| !value.trim().is_empty())
535        .or_else(|| {
536            effective_config()
537                .qc_defaults
538                .get(&provider.to_lowercase())
539                .cloned()
540        })
541}
542
543pub fn qc_defaults() -> BTreeMap<String, String> {
544    effective_config().qc_defaults
545}
546
547pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
548    effective_config()
549        .models
550        .get(model_id)
551        .and_then(|model| model.pricing.clone())
552}
553
554pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
555    model_pricing_per_mtok(model_id)
556        .map(|pricing| {
557            (
558                pricing.input_per_mtok / 1000.0,
559                pricing.output_per_mtok / 1000.0,
560            )
561        })
562        .or_else(|| {
563            let (input, output, _) = provider_economics(provider);
564            match (input, output) {
565                (Some(input), Some(output)) => Some((input, output)),
566                _ => None,
567            }
568        })
569}
570
571pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
572    match auth_env {
573        AuthEnv::None => Vec::new(),
574        AuthEnv::Single(name) => vec![name.clone()],
575        AuthEnv::Multiple(names) => names.clone(),
576    }
577}
578
579pub fn provider_key_available(provider: &str) -> bool {
580    let Some(pdef) = provider_config(provider) else {
581        return provider == "ollama";
582    };
583    if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
584        return true;
585    }
586    auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
587        std::env::var(env_name)
588            .ok()
589            .is_some_and(|value| !value.trim().is_empty())
590    })
591}
592
593pub fn available_provider_names() -> Vec<String> {
594    provider_names()
595        .into_iter()
596        .filter(|provider| provider_key_available(provider))
597        .collect()
598}
599
600/// Check if a provider advertises a feature (e.g., "native_tools").
601pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
602    provider_config(provider)
603        .map(|p| p.features.iter().any(|f| f == feature))
604        .unwrap_or(false)
605}
606
607/// Provider-level catalog pricing/latency. Model-specific static pricing in
608/// `llm::cost` still wins when available; this is the adapter-level fallback
609/// used by routing and portal summaries.
610pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
611    provider_config(provider)
612        .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
613        .unwrap_or((None, None, None))
614}
615
616/// Resolve the default tool format for a model+provider combination.
617/// Priority: alias `tool_format` (matched by model ID) > provider feature > "text".
618pub fn default_tool_format(model: &str, provider: &str) -> String {
619    let config = effective_config();
620    default_tool_format_with_config(&config, model, provider)
621}
622
623fn default_tool_format_with_config(
624    config: &ProvidersConfig,
625    model: &str,
626    provider: &str,
627) -> String {
628    // Aliases match by model ID + provider, or by alias name.
629    for (name, alias) in &config.aliases {
630        let matches = (alias.id == model && alias.provider == provider) || name == model;
631        if matches {
632            if let Some(ref fmt) = alias.tool_format {
633                return fmt.clone();
634            }
635        }
636    }
637    if config
638        .providers
639        .get(provider)
640        .map(|p| p.features.iter().any(|f| f == "native_tools"))
641        .unwrap_or(false)
642    {
643        "native".to_string()
644    } else {
645        "text".to_string()
646    }
647}
648
649/// Resolve a tier or alias into a concrete model/provider pair.
650pub fn resolve_tier_model(
651    target: &str,
652    preferred_provider: Option<&str>,
653) -> Option<(String, String)> {
654    let config = effective_config();
655
656    if let Some(alias) = config.aliases.get(target) {
657        return Some((alias.id.clone(), alias.provider.clone()));
658    }
659
660    let candidate_aliases = if let Some(provider) = preferred_provider {
661        vec![
662            format!("{provider}/{target}"),
663            format!("{provider}:{target}"),
664            format!("tier/{target}"),
665            target.to_string(),
666        ]
667    } else {
668        vec![format!("tier/{target}"), target.to_string()]
669    };
670
671    for alias_name in candidate_aliases {
672        if let Some(alias) = config.aliases.get(&alias_name) {
673            return Some((alias.id.clone(), alias.provider.clone()));
674        }
675    }
676
677    None
678}
679
680/// Return all configured alias-backed model/provider pairs whose resolved
681/// model falls into the requested capability tier. The result is de-duplicated
682/// and sorted deterministically by provider then model id.
683pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
684    let config = effective_config();
685    let mut seen = std::collections::BTreeSet::new();
686    let mut candidates = Vec::new();
687
688    for alias in config.aliases.values() {
689        let pair = (alias.id.clone(), alias.provider.clone());
690        if seen.contains(&pair) {
691            continue;
692        }
693        if model_tier(&alias.id) == target {
694            seen.insert(pair.clone());
695            candidates.push(pair);
696        }
697    }
698
699    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
700        provider_a
701            .cmp(provider_b)
702            .then_with(|| model_a.cmp(model_b))
703    });
704    candidates
705}
706
707/// Return all configured alias-backed model/provider pairs. Used by routing
708/// policies that need to compare alternatives across tiers.
709pub fn all_model_candidates() -> Vec<(String, String)> {
710    let config = effective_config();
711    let mut seen = std::collections::BTreeSet::new();
712    let mut candidates = Vec::new();
713
714    for alias in config.aliases.values() {
715        let pair = (alias.id.clone(), alias.provider.clone());
716        if seen.insert(pair.clone()) {
717            candidates.push(pair);
718        }
719    }
720
721    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
722        provider_a
723            .cmp(provider_b)
724            .then_with(|| model_a.cmp(model_b))
725    });
726    candidates
727}
728
729/// Simple glob matching for patterns like "claude-*", "qwen/*", "ollama:*".
730fn glob_match(pattern: &str, input: &str) -> bool {
731    if let Some(prefix) = pattern.strip_suffix('*') {
732        input.starts_with(prefix)
733    } else if let Some(suffix) = pattern.strip_prefix('*') {
734        input.ends_with(suffix)
735    } else if pattern.contains('*') {
736        let parts: Vec<&str> = pattern.split('*').collect();
737        if parts.len() == 2 {
738            input.starts_with(parts[0]) && input.ends_with(parts[1])
739        } else {
740            input == pattern
741        }
742    } else {
743        input == pattern
744    }
745}
746
747fn dirs_or_home() -> Option<String> {
748    std::env::var("HOME").ok()
749}
750
751/// Resolve the effective base URL for a provider, checking the `base_url_env`
752/// override first, then falling back to the configured `base_url`.
753pub fn resolve_base_url(pdef: &ProviderDef) -> String {
754    if let Some(env_name) = &pdef.base_url_env {
755        if let Ok(val) = std::env::var(env_name) {
756            // Strip surrounding quotes that some .env parsers leave intact.
757            let trimmed = val.trim().trim_matches('"').trim_matches('\'');
758            if !trimmed.is_empty() {
759                return trimmed.to_string();
760            }
761        }
762    }
763    pdef.base_url.clone()
764}
765
766fn default_config() -> ProvidersConfig {
767    let mut config = ProvidersConfig::default();
768
769    config.providers.insert(
770        "anthropic".to_string(),
771        ProviderDef {
772            base_url: "https://api.anthropic.com/v1".to_string(),
773            auth_style: "header".to_string(),
774            auth_header: Some("x-api-key".to_string()),
775            auth_env: AuthEnv::Single("ANTHROPIC_API_KEY".to_string()),
776            extra_headers: BTreeMap::from([(
777                "anthropic-version".to_string(),
778                "2023-06-01".to_string(),
779            )]),
780            chat_endpoint: "/messages".to_string(),
781            completion_endpoint: None,
782            healthcheck: Some(HealthcheckDef {
783                method: "POST".to_string(),
784                path: Some("/messages/count_tokens".to_string()),
785                url: None,
786                body: Some(
787                    r#"{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":"x"}]}"#
788                        .to_string(),
789                ),
790            }),
791            features: vec!["prompt_caching".to_string(), "thinking".to_string()],
792            cost_per_1k_in: Some(0.003),
793            cost_per_1k_out: Some(0.015),
794            latency_p50_ms: Some(2500),
795            ..Default::default()
796        },
797    );
798
799    // OpenAI
800    config.providers.insert(
801        "openai".to_string(),
802        ProviderDef {
803            base_url: "https://api.openai.com/v1".to_string(),
804            auth_style: "bearer".to_string(),
805            auth_env: AuthEnv::Single("OPENAI_API_KEY".to_string()),
806            chat_endpoint: "/chat/completions".to_string(),
807            completion_endpoint: Some("/completions".to_string()),
808            healthcheck: Some(HealthcheckDef {
809                method: "GET".to_string(),
810                path: Some("/models".to_string()),
811                url: None,
812                body: None,
813            }),
814            cost_per_1k_in: Some(0.0025),
815            cost_per_1k_out: Some(0.010),
816            latency_p50_ms: Some(1800),
817            ..Default::default()
818        },
819    );
820
821    // OpenRouter
822    config.providers.insert(
823        "openrouter".to_string(),
824        ProviderDef {
825            base_url: "https://openrouter.ai/api/v1".to_string(),
826            auth_style: "bearer".to_string(),
827            auth_env: AuthEnv::Single("OPENROUTER_API_KEY".to_string()),
828            chat_endpoint: "/chat/completions".to_string(),
829            completion_endpoint: Some("/completions".to_string()),
830            healthcheck: Some(HealthcheckDef {
831                method: "GET".to_string(),
832                path: Some("/auth/key".to_string()),
833                url: None,
834                body: None,
835            }),
836            cost_per_1k_in: Some(0.003),
837            cost_per_1k_out: Some(0.015),
838            latency_p50_ms: Some(2200),
839            ..Default::default()
840        },
841    );
842
843    // HuggingFace
844    config.providers.insert(
845        "huggingface".to_string(),
846        ProviderDef {
847            base_url: "https://router.huggingface.co/v1".to_string(),
848            auth_style: "bearer".to_string(),
849            auth_env: AuthEnv::Multiple(vec![
850                "HF_TOKEN".to_string(),
851                "HUGGINGFACE_API_KEY".to_string(),
852            ]),
853            chat_endpoint: "/chat/completions".to_string(),
854            completion_endpoint: Some("/completions".to_string()),
855            healthcheck: Some(HealthcheckDef {
856                method: "GET".to_string(),
857                url: Some("https://huggingface.co/api/whoami-v2".to_string()),
858                path: None,
859                body: None,
860            }),
861            cost_per_1k_in: Some(0.0002),
862            cost_per_1k_out: Some(0.0006),
863            latency_p50_ms: Some(2400),
864            ..Default::default()
865        },
866    );
867
868    // Ollama default. Hosts can override this to `/v1/chat/completions`
869    // via a bundled `providers.toml` (loaded by setting
870    // `HARN_PROVIDERS_CONFIG` in the host process). The OpenAI-compat
871    // path bypasses Ollama's per-model tool-call post-processors
872    // (qwen3coder.go, qwen35.go) which raise HTTP 500s on text-mode
873    // responses for the Qwen3.5 family. The default here stays on
874    // `/api/chat` so the harn-vm test stub keeps working with Ollama's
875    // native NDJSON wire format.
876    config.providers.insert(
877        "ollama".to_string(),
878        ProviderDef {
879            base_url: "http://localhost:11434".to_string(),
880            base_url_env: Some("OLLAMA_HOST".to_string()),
881            auth_style: "none".to_string(),
882            chat_endpoint: "/api/chat".to_string(),
883            completion_endpoint: Some("/api/generate".to_string()),
884            healthcheck: Some(HealthcheckDef {
885                method: "GET".to_string(),
886                path: Some("/api/tags".to_string()),
887                url: None,
888                body: None,
889            }),
890            cost_per_1k_in: Some(0.0),
891            cost_per_1k_out: Some(0.0),
892            latency_p50_ms: Some(1200),
893            ..Default::default()
894        },
895    );
896
897    // Together AI (OpenAI-compatible)
898    config.providers.insert(
899        "together".to_string(),
900        ProviderDef {
901            base_url: "https://api.together.xyz/v1".to_string(),
902            base_url_env: Some("TOGETHER_AI_BASE_URL".to_string()),
903            auth_style: "bearer".to_string(),
904            auth_env: AuthEnv::Single("TOGETHER_AI_API_KEY".to_string()),
905            chat_endpoint: "/chat/completions".to_string(),
906            completion_endpoint: Some("/completions".to_string()),
907            healthcheck: Some(HealthcheckDef {
908                method: "GET".to_string(),
909                path: Some("/models".to_string()),
910                url: None,
911                body: None,
912            }),
913            cost_per_1k_in: Some(0.0002),
914            cost_per_1k_out: Some(0.0006),
915            latency_p50_ms: Some(1600),
916            ..Default::default()
917        },
918    );
919
920    // Groq (OpenAI-compatible)
921    config.providers.insert(
922        "groq".to_string(),
923        ProviderDef {
924            base_url: "https://api.groq.com/openai/v1".to_string(),
925            base_url_env: Some("GROQ_BASE_URL".to_string()),
926            auth_style: "bearer".to_string(),
927            auth_env: AuthEnv::Single("GROQ_API_KEY".to_string()),
928            chat_endpoint: "/chat/completions".to_string(),
929            completion_endpoint: Some("/completions".to_string()),
930            healthcheck: Some(HealthcheckDef {
931                method: "GET".to_string(),
932                path: Some("/models".to_string()),
933                url: None,
934                body: None,
935            }),
936            cost_per_1k_in: Some(0.0001),
937            cost_per_1k_out: Some(0.0003),
938            latency_p50_ms: Some(450),
939            ..Default::default()
940        },
941    );
942
943    // DeepSeek (OpenAI-compatible)
944    config.providers.insert(
945        "deepseek".to_string(),
946        ProviderDef {
947            base_url: "https://api.deepseek.com/v1".to_string(),
948            base_url_env: Some("DEEPSEEK_BASE_URL".to_string()),
949            auth_style: "bearer".to_string(),
950            auth_env: AuthEnv::Single("DEEPSEEK_API_KEY".to_string()),
951            chat_endpoint: "/chat/completions".to_string(),
952            completion_endpoint: Some("/completions".to_string()),
953            healthcheck: Some(HealthcheckDef {
954                method: "GET".to_string(),
955                path: Some("/models".to_string()),
956                url: None,
957                body: None,
958            }),
959            cost_per_1k_in: Some(0.00014),
960            cost_per_1k_out: Some(0.00028),
961            latency_p50_ms: Some(1800),
962            ..Default::default()
963        },
964    );
965
966    // Fireworks (OpenAI-compatible open-weight hosting)
967    config.providers.insert(
968        "fireworks".to_string(),
969        ProviderDef {
970            base_url: "https://api.fireworks.ai/inference/v1".to_string(),
971            base_url_env: Some("FIREWORKS_BASE_URL".to_string()),
972            auth_style: "bearer".to_string(),
973            auth_env: AuthEnv::Single("FIREWORKS_API_KEY".to_string()),
974            chat_endpoint: "/chat/completions".to_string(),
975            completion_endpoint: Some("/completions".to_string()),
976            healthcheck: Some(HealthcheckDef {
977                method: "GET".to_string(),
978                path: Some("/models".to_string()),
979                url: None,
980                body: None,
981            }),
982            cost_per_1k_in: Some(0.0002),
983            cost_per_1k_out: Some(0.0006),
984            latency_p50_ms: Some(1400),
985            ..Default::default()
986        },
987    );
988
989    // Alibaba DashScope (OpenAI-compatible Qwen host)
990    config.providers.insert(
991        "dashscope".to_string(),
992        ProviderDef {
993            base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".to_string(),
994            base_url_env: Some("DASHSCOPE_BASE_URL".to_string()),
995            auth_style: "bearer".to_string(),
996            auth_env: AuthEnv::Single("DASHSCOPE_API_KEY".to_string()),
997            chat_endpoint: "/chat/completions".to_string(),
998            completion_endpoint: Some("/completions".to_string()),
999            healthcheck: Some(HealthcheckDef {
1000                method: "GET".to_string(),
1001                path: Some("/models".to_string()),
1002                url: None,
1003                body: None,
1004            }),
1005            cost_per_1k_in: Some(0.0003),
1006            cost_per_1k_out: Some(0.0012),
1007            latency_p50_ms: Some(1600),
1008            ..Default::default()
1009        },
1010    );
1011
1012    // Local OpenAI-compatible server
1013    config.providers.insert(
1014        "local".to_string(),
1015        ProviderDef {
1016            base_url: "http://localhost:8000".to_string(),
1017            base_url_env: Some("LOCAL_LLM_BASE_URL".to_string()),
1018            auth_style: "none".to_string(),
1019            chat_endpoint: "/v1/chat/completions".to_string(),
1020            completion_endpoint: Some("/v1/completions".to_string()),
1021            healthcheck: Some(HealthcheckDef {
1022                method: "GET".to_string(),
1023                path: Some("/v1/models".to_string()),
1024                url: None,
1025                body: None,
1026            }),
1027            cost_per_1k_in: Some(0.0),
1028            cost_per_1k_out: Some(0.0),
1029            latency_p50_ms: Some(900),
1030            ..Default::default()
1031        },
1032    );
1033
1034    // vLLM OpenAI-compatible server.
1035    config.providers.insert(
1036        "vllm".to_string(),
1037        ProviderDef {
1038            base_url: "http://localhost:8000".to_string(),
1039            base_url_env: Some("VLLM_BASE_URL".to_string()),
1040            auth_style: "none".to_string(),
1041            chat_endpoint: "/v1/chat/completions".to_string(),
1042            completion_endpoint: Some("/v1/completions".to_string()),
1043            healthcheck: Some(HealthcheckDef {
1044                method: "GET".to_string(),
1045                path: Some("/v1/models".to_string()),
1046                url: None,
1047                body: None,
1048            }),
1049            cost_per_1k_in: Some(0.0),
1050            cost_per_1k_out: Some(0.0),
1051            latency_p50_ms: Some(800),
1052            ..Default::default()
1053        },
1054    );
1055
1056    // HuggingFace Text Generation Inference OpenAI-compatible endpoint.
1057    config.providers.insert(
1058        "tgi".to_string(),
1059        ProviderDef {
1060            base_url: "http://localhost:8080".to_string(),
1061            base_url_env: Some("TGI_BASE_URL".to_string()),
1062            auth_style: "none".to_string(),
1063            chat_endpoint: "/v1/chat/completions".to_string(),
1064            completion_endpoint: Some("/v1/completions".to_string()),
1065            healthcheck: Some(HealthcheckDef {
1066                method: "GET".to_string(),
1067                path: Some("/health".to_string()),
1068                url: None,
1069                body: None,
1070            }),
1071            cost_per_1k_in: Some(0.0),
1072            cost_per_1k_out: Some(0.0),
1073            latency_p50_ms: Some(950),
1074            ..Default::default()
1075        },
1076    );
1077
1078    // Default inference rules
1079    config.inference_rules = vec![
1080        InferenceRule {
1081            pattern: Some("claude-*".to_string()),
1082            contains: None,
1083            exact: None,
1084            provider: "anthropic".to_string(),
1085        },
1086        InferenceRule {
1087            pattern: Some("gpt-*".to_string()),
1088            contains: None,
1089            exact: None,
1090            provider: "openai".to_string(),
1091        },
1092        InferenceRule {
1093            pattern: Some("o1*".to_string()),
1094            contains: None,
1095            exact: None,
1096            provider: "openai".to_string(),
1097        },
1098        InferenceRule {
1099            pattern: Some("o3*".to_string()),
1100            contains: None,
1101            exact: None,
1102            provider: "openai".to_string(),
1103        },
1104        InferenceRule {
1105            pattern: Some("local:*".to_string()),
1106            contains: None,
1107            exact: None,
1108            provider: "local".to_string(),
1109        },
1110        InferenceRule {
1111            pattern: None,
1112            contains: Some("/".to_string()),
1113            exact: None,
1114            provider: "openrouter".to_string(),
1115        },
1116        InferenceRule {
1117            pattern: None,
1118            contains: Some(":".to_string()),
1119            exact: None,
1120            provider: "ollama".to_string(),
1121        },
1122    ];
1123
1124    // Default tier rules
1125    config.tier_rules = vec![
1126        TierRule {
1127            contains: Some("9b".to_string()),
1128            pattern: None,
1129            exact: None,
1130            tier: "small".to_string(),
1131        },
1132        TierRule {
1133            contains: Some("a3b".to_string()),
1134            pattern: None,
1135            exact: None,
1136            tier: "small".to_string(),
1137        },
1138        TierRule {
1139            contains: Some("gemma-4-e2b".to_string()),
1140            pattern: None,
1141            exact: None,
1142            tier: "small".to_string(),
1143        },
1144        TierRule {
1145            contains: Some("gemma-4-e4b".to_string()),
1146            pattern: None,
1147            exact: None,
1148            tier: "small".to_string(),
1149        },
1150        TierRule {
1151            contains: Some("gemma-4-26b".to_string()),
1152            pattern: None,
1153            exact: None,
1154            tier: "mid".to_string(),
1155        },
1156        TierRule {
1157            contains: Some("gemma-4-31b".to_string()),
1158            pattern: None,
1159            exact: None,
1160            tier: "frontier".to_string(),
1161        },
1162        TierRule {
1163            contains: Some("gemma4:26b".to_string()),
1164            pattern: None,
1165            exact: None,
1166            tier: "mid".to_string(),
1167        },
1168        TierRule {
1169            contains: Some("gemma4:31b".to_string()),
1170            pattern: None,
1171            exact: None,
1172            tier: "frontier".to_string(),
1173        },
1174        TierRule {
1175            pattern: Some("claude-*".to_string()),
1176            contains: None,
1177            exact: None,
1178            tier: "frontier".to_string(),
1179        },
1180        TierRule {
1181            exact: Some("gpt-4o".to_string()),
1182            contains: None,
1183            pattern: None,
1184            tier: "frontier".to_string(),
1185        },
1186    ];
1187
1188    config.tier_defaults = TierDefaults {
1189        default: "mid".to_string(),
1190    };
1191
1192    config.aliases.insert(
1193        "frontier".to_string(),
1194        AliasDef {
1195            id: "claude-sonnet-4-20250514".to_string(),
1196            provider: "anthropic".to_string(),
1197            tool_format: None,
1198        },
1199    );
1200    config.aliases.insert(
1201        "tier/frontier".to_string(),
1202        AliasDef {
1203            id: "claude-sonnet-4-20250514".to_string(),
1204            provider: "anthropic".to_string(),
1205            tool_format: None,
1206        },
1207    );
1208    config.aliases.insert(
1209        "mid".to_string(),
1210        AliasDef {
1211            id: "gpt-4o-mini".to_string(),
1212            provider: "openai".to_string(),
1213            tool_format: None,
1214        },
1215    );
1216    config.aliases.insert(
1217        "tier/mid".to_string(),
1218        AliasDef {
1219            id: "gpt-4o-mini".to_string(),
1220            provider: "openai".to_string(),
1221            tool_format: None,
1222        },
1223    );
1224    config.aliases.insert(
1225        "small".to_string(),
1226        AliasDef {
1227            id: "Qwen/Qwen3.5-9B".to_string(),
1228            provider: "openrouter".to_string(),
1229            tool_format: None,
1230        },
1231    );
1232    config.aliases.insert(
1233        "tier/small".to_string(),
1234        AliasDef {
1235            id: "Qwen/Qwen3.5-9B".to_string(),
1236            provider: "openrouter".to_string(),
1237            tool_format: None,
1238        },
1239    );
1240    config.aliases.insert(
1241        "local-gemma4".to_string(),
1242        AliasDef {
1243            id: "gemma-4-26b-a4b-it".to_string(),
1244            provider: "local".to_string(),
1245            tool_format: None,
1246        },
1247    );
1248    config.aliases.insert(
1249        "local-gemma4-26b".to_string(),
1250        AliasDef {
1251            id: "gemma-4-26b-a4b-it".to_string(),
1252            provider: "local".to_string(),
1253            tool_format: None,
1254        },
1255    );
1256    config.aliases.insert(
1257        "local-gemma4-31b".to_string(),
1258        AliasDef {
1259            id: "gemma-4-31b-it".to_string(),
1260            provider: "local".to_string(),
1261            tool_format: None,
1262        },
1263    );
1264    config.aliases.insert(
1265        "local-gemma4-e4b".to_string(),
1266        AliasDef {
1267            id: "gemma-4-e4b-it".to_string(),
1268            provider: "local".to_string(),
1269            tool_format: None,
1270        },
1271    );
1272    config.aliases.insert(
1273        "local-gemma4-e2b".to_string(),
1274        AliasDef {
1275            id: "gemma-4-e2b-it".to_string(),
1276            provider: "local".to_string(),
1277            tool_format: None,
1278        },
1279    );
1280
1281    config.qc_defaults.extend(BTreeMap::from([
1282        (
1283            "anthropic".to_string(),
1284            "claude-3-5-haiku-20241022".to_string(),
1285        ),
1286        ("openai".to_string(), "gpt-4o-mini".to_string()),
1287        (
1288            "openrouter".to_string(),
1289            "google/gemini-2.5-flash".to_string(),
1290        ),
1291        ("ollama".to_string(), "llama3.2".to_string()),
1292        ("local".to_string(), "gpt-4o".to_string()),
1293    ]));
1294
1295    config.models.extend(BTreeMap::from([
1296        (
1297            "claude-sonnet-4-20250514".to_string(),
1298            ModelDef {
1299                name: "Claude Sonnet 4".to_string(),
1300                provider: "anthropic".to_string(),
1301                context_window: 200_000,
1302                stream_timeout: None,
1303                capabilities: vec![
1304                    "tools".to_string(),
1305                    "streaming".to_string(),
1306                    "prompt_caching".to_string(),
1307                    "thinking".to_string(),
1308                ],
1309                pricing: Some(ModelPricing {
1310                    input_per_mtok: 3.0,
1311                    output_per_mtok: 15.0,
1312                    cache_read_per_mtok: Some(0.3),
1313                    cache_write_per_mtok: Some(3.75),
1314                }),
1315            },
1316        ),
1317        (
1318            "gpt-4o-mini".to_string(),
1319            ModelDef {
1320                name: "GPT-4o Mini".to_string(),
1321                provider: "openai".to_string(),
1322                context_window: 128_000,
1323                stream_timeout: None,
1324                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1325                pricing: Some(ModelPricing {
1326                    input_per_mtok: 0.15,
1327                    output_per_mtok: 0.60,
1328                    cache_read_per_mtok: None,
1329                    cache_write_per_mtok: None,
1330                }),
1331            },
1332        ),
1333        (
1334            "Qwen/Qwen3.5-9B".to_string(),
1335            ModelDef {
1336                name: "Qwen3.5 9B".to_string(),
1337                provider: "openrouter".to_string(),
1338                context_window: 131_072,
1339                stream_timeout: None,
1340                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1341                pricing: None,
1342            },
1343        ),
1344        (
1345            "llama3.2".to_string(),
1346            ModelDef {
1347                name: "Llama 3.2".to_string(),
1348                provider: "ollama".to_string(),
1349                context_window: 32_000,
1350                stream_timeout: Some(300.0),
1351                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1352                pricing: None,
1353            },
1354        ),
1355    ]));
1356
1357    config
1358}
1359
1360#[cfg(test)]
1361mod tests {
1362    use super::*;
1363
1364    fn reset_overrides() {
1365        clear_user_overrides();
1366    }
1367
1368    #[test]
1369    fn test_glob_match_prefix() {
1370        assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
1371        assert!(glob_match("gpt-*", "gpt-4o"));
1372        assert!(!glob_match("claude-*", "gpt-4o"));
1373    }
1374
1375    #[test]
1376    fn test_glob_match_suffix() {
1377        assert!(glob_match("*-latest", "llama3.2-latest"));
1378        assert!(!glob_match("*-latest", "llama3.2"));
1379    }
1380
1381    #[test]
1382    fn test_glob_match_middle() {
1383        assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
1384        assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
1385    }
1386
1387    #[test]
1388    fn test_glob_match_exact() {
1389        assert!(glob_match("gpt-4o", "gpt-4o"));
1390        assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
1391    }
1392
1393    #[test]
1394    fn test_infer_provider_from_defaults() {
1395        assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
1396        assert_eq!(infer_provider("gpt-4o"), "openai");
1397        assert_eq!(infer_provider("o1-preview"), "openai");
1398        assert_eq!(infer_provider("o3-mini"), "openai");
1399        assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
1400        assert_eq!(infer_provider("llama3.2:latest"), "ollama");
1401        assert_eq!(infer_provider("unknown-model"), "anthropic");
1402    }
1403
1404    #[test]
1405    fn test_infer_provider_local_prefix() {
1406        // `local:` must route to the local OpenAI-compatible provider, not
1407        // ollama (which would otherwise swallow everything containing `:`).
1408        assert_eq!(infer_provider("local:gemma-4-e4b-it"), "local");
1409        assert_eq!(infer_provider("local:qwen2.5"), "local");
1410        // Even when the id also contains `/`, the `local:` prefix wins.
1411        assert_eq!(infer_provider("local:owner/model"), "local");
1412    }
1413
1414    #[test]
1415    fn test_resolve_model_info_normalizes_provider_prefixes() {
1416        let local = resolve_model_info("local:gemma-4-e4b-it");
1417        assert_eq!(local.id, "gemma-4-e4b-it");
1418        assert_eq!(local.provider, "local");
1419
1420        let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
1421        assert_eq!(ollama.id, "qwen3:30b-a3b");
1422        assert_eq!(ollama.provider, "ollama");
1423
1424        let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
1425        assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
1426        assert_eq!(hf.provider, "huggingface");
1427    }
1428
1429    #[test]
1430    fn test_model_tier_from_defaults() {
1431        assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
1432        assert_eq!(model_tier("gpt-4o"), "frontier");
1433        assert_eq!(model_tier("Qwen3.5-9B"), "small");
1434        assert_eq!(model_tier("deepseek-v3"), "mid");
1435    }
1436
1437    #[test]
1438    fn test_resolve_model_unknown_alias() {
1439        let (id, provider) = resolve_model("gpt-4o");
1440        assert_eq!(id, "gpt-4o");
1441        assert!(provider.is_none());
1442    }
1443
1444    #[test]
1445    fn test_provider_names() {
1446        let names = provider_names();
1447        assert!(names.len() >= 7);
1448        assert!(names.contains(&"anthropic".to_string()));
1449        assert!(names.contains(&"together".to_string()));
1450        assert!(names.contains(&"local".to_string()));
1451        assert!(names.contains(&"openai".to_string()));
1452        assert!(names.contains(&"ollama".to_string()));
1453    }
1454
1455    #[test]
1456    fn test_resolve_tier_model_default_aliases() {
1457        let (model, provider) = resolve_tier_model("frontier", None).unwrap();
1458        assert_eq!(model, "claude-sonnet-4-20250514");
1459        assert_eq!(provider, "anthropic");
1460
1461        let (model, provider) = resolve_tier_model("small", None).unwrap();
1462        assert_eq!(model, "Qwen/Qwen3.5-9B");
1463        assert_eq!(provider, "openrouter");
1464    }
1465
1466    #[test]
1467    fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
1468        let (model, provider) = resolve_tier_model("mid", Some("openai")).unwrap();
1469        assert_eq!(model, "gpt-4o-mini");
1470        assert_eq!(provider, "openai");
1471    }
1472
1473    #[test]
1474    fn test_provider_config_anthropic() {
1475        let pdef = provider_config("anthropic").unwrap();
1476        assert_eq!(pdef.auth_style, "header");
1477        assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
1478    }
1479
1480    #[test]
1481    fn test_resolve_base_url_no_env() {
1482        let pdef = ProviderDef {
1483            base_url: "https://example.com".to_string(),
1484            ..Default::default()
1485        };
1486        assert_eq!(resolve_base_url(&pdef), "https://example.com");
1487    }
1488
1489    #[test]
1490    fn test_default_config_roundtrip() {
1491        let config = default_config();
1492        assert!(!config.providers.is_empty());
1493        assert!(!config.inference_rules.is_empty());
1494        assert!(!config.tier_rules.is_empty());
1495        assert_eq!(config.tier_defaults.default, "mid");
1496    }
1497
1498    #[test]
1499    fn test_model_params_empty() {
1500        let params = model_params("claude-sonnet-4-20250514");
1501        assert!(params.is_empty());
1502    }
1503
1504    #[test]
1505    fn test_user_overrides_add_provider_and_alias() {
1506        reset_overrides();
1507        let mut overlay = ProvidersConfig::default();
1508        overlay.providers.insert(
1509            "acme".to_string(),
1510            ProviderDef {
1511                base_url: "https://llm.acme.test/v1".to_string(),
1512                chat_endpoint: "/chat/completions".to_string(),
1513                ..Default::default()
1514            },
1515        );
1516        overlay.aliases.insert(
1517            "acme-fast".to_string(),
1518            AliasDef {
1519                id: "acme/model-fast".to_string(),
1520                provider: "acme".to_string(),
1521                tool_format: Some("native".to_string()),
1522            },
1523        );
1524        set_user_overrides(Some(overlay));
1525
1526        let (model, provider) = resolve_model("acme-fast");
1527        assert_eq!(model, "acme/model-fast");
1528        assert_eq!(provider.as_deref(), Some("acme"));
1529        assert!(provider_names().contains(&"acme".to_string()));
1530        assert_eq!(
1531            provider_config("acme").map(|provider| provider.base_url),
1532            Some("https://llm.acme.test/v1".to_string())
1533        );
1534
1535        reset_overrides();
1536    }
1537
1538    #[test]
1539    fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
1540        reset_overrides();
1541        let mut overlay = ProvidersConfig::default();
1542        overlay.models.insert(
1543            "acme/model-fast".to_string(),
1544            ModelDef {
1545                name: "Acme Fast".to_string(),
1546                provider: "acme".to_string(),
1547                context_window: 65_536,
1548                stream_timeout: Some(42.0),
1549                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1550                pricing: Some(ModelPricing {
1551                    input_per_mtok: 1.25,
1552                    output_per_mtok: 2.5,
1553                    cache_read_per_mtok: Some(0.25),
1554                    cache_write_per_mtok: None,
1555                }),
1556            },
1557        );
1558        overlay
1559            .qc_defaults
1560            .insert("acme".to_string(), "acme/model-cheap".to_string());
1561        set_user_overrides(Some(overlay));
1562
1563        let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
1564        assert_eq!(entry.context_window, 65_536);
1565        assert_eq!(
1566            entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
1567            Some(1.25)
1568        );
1569        assert_eq!(
1570            pricing_per_1k_for("acme", "acme/model-fast"),
1571            Some((0.00125, 0.0025))
1572        );
1573        assert_eq!(
1574            qc_default_model("acme").as_deref(),
1575            Some("acme/model-cheap")
1576        );
1577
1578        reset_overrides();
1579    }
1580
1581    #[test]
1582    fn test_user_overrides_prepend_inference_rules() {
1583        reset_overrides();
1584        let mut overlay = ProvidersConfig::default();
1585        overlay.inference_rules.push(InferenceRule {
1586            pattern: Some("internal-*".to_string()),
1587            contains: None,
1588            exact: None,
1589            provider: "openai".to_string(),
1590        });
1591        set_user_overrides(Some(overlay));
1592
1593        assert_eq!(infer_provider("internal-foo"), "openai");
1594
1595        reset_overrides();
1596    }
1597}
harn_vm/llm_config.rs

harn_vm/
llm_config.rs