Skip to main content

harn_vm/
llm_config.rs

1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::BTreeMap;
4use std::sync::OnceLock;
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8
9thread_local! {
10    /// Thread-local provider config overlays installed by the CLI after it
11    /// reads the nearest `harn.toml` plus any installed package manifests.
12    /// Kept thread-local so tests and multi-VM hosts can scope extensions to
13    /// the current run without mutating the process-wide default config.
14    static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
15}
16
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct ProvidersConfig {
19    #[serde(default)]
20    pub default_provider: Option<String>,
21    #[serde(default)]
22    pub providers: BTreeMap<String, ProviderDef>,
23    #[serde(default)]
24    pub aliases: BTreeMap<String, AliasDef>,
25    #[serde(default)]
26    pub alias_tool_calling: BTreeMap<String, AliasToolCallingDef>,
27    #[serde(default)]
28    pub models: BTreeMap<String, ModelDef>,
29    #[serde(default)]
30    pub qc_defaults: BTreeMap<String, String>,
31    #[serde(default)]
32    pub inference_rules: Vec<InferenceRule>,
33    #[serde(default)]
34    pub tier_rules: Vec<TierRule>,
35    #[serde(default)]
36    pub tier_defaults: TierDefaults,
37    #[serde(default)]
38    pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
39}
40
41impl ProvidersConfig {
42    pub fn is_empty(&self) -> bool {
43        self.default_provider.is_none()
44            && self.providers.is_empty()
45            && self.aliases.is_empty()
46            && self.alias_tool_calling.is_empty()
47            && self.models.is_empty()
48            && self.qc_defaults.is_empty()
49            && self.inference_rules.is_empty()
50            && self.tier_rules.is_empty()
51            && self.model_defaults.is_empty()
52            && self.tier_defaults.default == default_mid()
53    }
54
55    pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
56        self.providers.extend(overlay.providers.clone());
57        self.aliases.extend(overlay.aliases.clone());
58        self.alias_tool_calling
59            .extend(overlay.alias_tool_calling.clone());
60        self.models.extend(overlay.models.clone());
61        self.qc_defaults.extend(overlay.qc_defaults.clone());
62
63        if overlay.default_provider.is_some() {
64            self.default_provider = overlay.default_provider.clone();
65        }
66
67        if !overlay.inference_rules.is_empty() {
68            let mut merged = overlay.inference_rules.clone();
69            merged.extend(self.inference_rules.clone());
70            self.inference_rules = merged;
71        }
72
73        if !overlay.tier_rules.is_empty() {
74            let mut merged = overlay.tier_rules.clone();
75            merged.extend(self.tier_rules.clone());
76            self.tier_rules = merged;
77        }
78
79        if overlay.tier_defaults.default != default_mid() {
80            self.tier_defaults = overlay.tier_defaults.clone();
81        }
82
83        for (pattern, defaults) in &overlay.model_defaults {
84            self.model_defaults
85                .entry(pattern.clone())
86                .or_default()
87                .extend(defaults.clone());
88        }
89    }
90}
91
92#[derive(Debug, Clone, Deserialize)]
93pub struct ProviderDef {
94    #[serde(default)]
95    pub display_name: Option<String>,
96    #[serde(default)]
97    pub icon: Option<String>,
98    pub base_url: String,
99    #[serde(default)]
100    pub base_url_env: Option<String>,
101    #[serde(default = "default_bearer")]
102    pub auth_style: String,
103    #[serde(default)]
104    pub auth_header: Option<String>,
105    #[serde(default)]
106    pub auth_env: AuthEnv,
107    #[serde(default)]
108    pub extra_headers: BTreeMap<String, String>,
109    #[serde(default)]
110    pub chat_endpoint: String,
111    #[serde(default)]
112    pub completion_endpoint: Option<String>,
113    #[serde(default)]
114    pub healthcheck: Option<HealthcheckDef>,
115    #[serde(default)]
116    pub features: Vec<String>,
117    /// Fallback provider name to try if this provider fails.
118    #[serde(default)]
119    pub fallback: Option<String>,
120    /// Number of retries before falling back (default 0).
121    #[serde(default)]
122    pub retry_count: Option<u32>,
123    /// Delay between retries in milliseconds (default 1000).
124    #[serde(default)]
125    pub retry_delay_ms: Option<u64>,
126    /// Maximum requests per minute. None = unlimited.
127    #[serde(default)]
128    pub rpm: Option<u32>,
129    /// Provider/catalog pricing in USD per 1k input tokens.
130    #[serde(default)]
131    pub cost_per_1k_in: Option<f64>,
132    /// Provider/catalog pricing in USD per 1k output tokens.
133    #[serde(default)]
134    pub cost_per_1k_out: Option<f64>,
135    /// Observed or configured p50 latency in milliseconds.
136    #[serde(default)]
137    pub latency_p50_ms: Option<u64>,
138}
139
140impl Default for ProviderDef {
141    fn default() -> Self {
142        Self {
143            display_name: None,
144            icon: None,
145            base_url: String::new(),
146            base_url_env: None,
147            auth_style: default_bearer(),
148            auth_header: None,
149            auth_env: AuthEnv::None,
150            extra_headers: BTreeMap::new(),
151            chat_endpoint: String::new(),
152            completion_endpoint: None,
153            healthcheck: None,
154            features: Vec::new(),
155            fallback: None,
156            retry_count: None,
157            retry_delay_ms: None,
158            rpm: None,
159            cost_per_1k_in: None,
160            cost_per_1k_out: None,
161            latency_p50_ms: None,
162        }
163    }
164}
165
166fn default_bearer() -> String {
167    "bearer".to_string()
168}
169
170/// Auth env var name(s) for the provider. Can be a single string or an array
171/// (tried in order until one is set).
172#[derive(Debug, Clone, Deserialize, Default)]
173#[serde(untagged)]
174pub enum AuthEnv {
175    #[default]
176    None,
177    Single(String),
178    Multiple(Vec<String>),
179}
180
181#[derive(Debug, Clone, Deserialize)]
182pub struct HealthcheckDef {
183    pub method: String,
184    #[serde(default)]
185    pub path: Option<String>,
186    #[serde(default)]
187    pub url: Option<String>,
188    #[serde(default)]
189    pub body: Option<String>,
190}
191
192#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
193pub struct AliasDef {
194    pub id: String,
195    pub provider: String,
196    /// Per-model tool format override: "native" or "text". When set, this
197    /// takes precedence over the provider-level default. Models with strong
198    /// tool-calling fine-tuning (Kimi-K2.5, GPT-4o) should use "native";
199    /// models better served by text-based tool calling use "text".
200    #[serde(default)]
201    pub tool_format: Option<String>,
202}
203
204#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
205pub struct AliasToolCallingDef {
206    #[serde(default)]
207    #[serde(skip_serializing_if = "Option::is_none")]
208    pub native: Option<String>,
209    #[serde(default)]
210    #[serde(skip_serializing_if = "Option::is_none")]
211    pub text: Option<String>,
212    #[serde(default)]
213    #[serde(skip_serializing_if = "Option::is_none")]
214    pub streaming_native: Option<String>,
215    #[serde(default)]
216    #[serde(skip_serializing_if = "Option::is_none")]
217    pub fallback_mode: Option<String>,
218    #[serde(default)]
219    #[serde(skip_serializing_if = "Option::is_none")]
220    pub failure_reason: Option<String>,
221    #[serde(default)]
222    #[serde(skip_serializing_if = "Option::is_none")]
223    pub last_probe_at: Option<String>,
224}
225
226#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
227pub struct ModelPricing {
228    pub input_per_mtok: f64,
229    pub output_per_mtok: f64,
230    #[serde(default)]
231    pub cache_read_per_mtok: Option<f64>,
232    #[serde(default)]
233    pub cache_write_per_mtok: Option<f64>,
234}
235
236/// Optional accelerated-serving ("fast mode") tier for a model. Off by
237/// default: its presence only *describes* that the provider offers a
238/// faster, premium-priced serving path running the same weights — callers
239/// must explicitly opt in via the provider's request knob, so nothing here
240/// changes default behavior. Deliberately provider-agnostic: Anthropic
241/// exposes the tier as `speed = "fast"` (beta-gated), while OpenAI uses
242/// `service_tier = "fast"` / `"priority"`. Premium pricing is stored as
243/// absolute per-MTok rates rather than a single multiplier because
244/// providers price the tier asymmetrically (Anthropic Opus 4.8 is 2x
245/// standard; Opus 4.6/4.7 fast mode is 6x).
246#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
247pub struct FastModeDef {
248    /// Request field that opts into the fast tier (e.g. "speed" for
249    /// Anthropic, "service_tier" for OpenAI).
250    pub param: String,
251    /// Value to send on `param` (e.g. "fast", "priority").
252    pub value: String,
253    /// Provider beta/feature header required to use the tier, if any
254    /// (e.g. Anthropic "fast-mode-2026-02-01").
255    #[serde(default)]
256    pub beta_header: Option<String>,
257    /// Output-tokens-per-second speedup vs standard serving (e.g. 2.5).
258    #[serde(default)]
259    pub otps_speedup: Option<f64>,
260    /// Lifecycle of the fast tier: "ga" | "research_preview" |
261    /// "deprecated". None when unspecified.
262    #[serde(default)]
263    pub status: Option<String>,
264    /// Premium pricing charged while the fast tier is active (absolute
265    /// per-MTok rates, not a multiplier on standard pricing).
266    #[serde(default)]
267    pub pricing: Option<ModelPricing>,
268    /// Free-text note: constraints, deprecation timeline, etc.
269    #[serde(default)]
270    pub note: Option<String>,
271}
272
273#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
274pub struct ModelDef {
275    pub name: String,
276    pub provider: String,
277    pub context_window: u64,
278    #[serde(default)]
279    pub runtime_context_window: Option<u64>,
280    #[serde(default)]
281    pub stream_timeout: Option<f64>,
282    #[serde(default)]
283    pub capabilities: Vec<String>,
284    #[serde(default)]
285    pub pricing: Option<ModelPricing>,
286    #[serde(default)]
287    pub deprecated: bool,
288    #[serde(default)]
289    pub deprecation_note: Option<String>,
290    /// Structured replacement pointer: the catalog id of the model that
291    /// supersedes this one (e.g. an older Opus row points at the newest
292    /// Opus). Lets release tooling express "migrate to X" in a
293    /// machine-readable way instead of burying it in `deprecation_note`
294    /// free text. A model may be superseded without being `deprecated`
295    /// (a newer option exists but this one is still fully supported);
296    /// pair it with `deprecated = true` once a sunset is announced.
297    #[serde(default)]
298    pub superseded_by: Option<String>,
299    /// Accelerated-serving ("fast mode") tier metadata, when the model's
300    /// provider offers one. Off by default — see [`FastModeDef`]. None for
301    /// models with no faster serving path.
302    #[serde(default)]
303    pub fast_mode: Option<FastModeDef>,
304    #[serde(default)]
305    pub quality_tags: Vec<String>,
306    /// Whether the model can be reached over a normal API-key serverless call,
307    /// or only via a dedicated/provisioned endpoint that the caller must spin
308    /// up out-of-band. Providers like Together list dedicated-only routes
309    /// alongside serverless ones in `/v1/models`, so this metadata lets clients
310    /// avoid presenting them as one-click options.
311    #[serde(default)]
312    pub availability: ModelAvailability,
313    /// Popular-consensus tier label. Enum-typed string: "small" | "mid" |
314    /// "frontier" | "reasoning". Self-declared per model (no pattern-matched
315    /// rule table) so the catalog is the single source of truth. When None
316    /// the resolver returns the catalog default ("mid"). Use the richer
317    /// `strengths` + `benchmarks` fields to pick models for specific
318    /// workloads — `tier` exists only as a coarse popular-consensus shortcut.
319    #[serde(default)]
320    pub tier: Option<String>,
321    /// True when the model weights are downloadable / self-hostable
322    /// (open-weight / open-source license, regardless of commercial-use
323    /// restrictions). False when weights are closed (Anthropic, OpenAI,
324    /// Google, etc.). None when the catalog row predates the migration.
325    #[serde(default)]
326    pub open_weight: Option<bool>,
327    /// Workload-shaped strength tags. Conventional values include
328    /// `coding`, `summarization`, `long_context`, `tool_use`, `reasoning`,
329    /// `vision`, `speed`, `cheap`, `agentic`. Selectors should treat
330    /// missing entries as "no claim" rather than "no strength."
331    #[serde(default)]
332    pub strengths: Vec<String>,
333    /// Public benchmark numbers, keyed by a snake_case identifier
334    /// (`swe_bench_verified`, `humaneval`, `aa_intelligence_index`, etc.).
335    /// Values are the raw published scores. The selector layer is free
336    /// to normalize per benchmark; the catalog records the canonical
337    /// score so future readers can audit the source.
338    #[serde(default)]
339    pub benchmarks: BTreeMap<String, f64>,
340}
341
342#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Default)]
343#[serde(rename_all = "snake_case")]
344pub enum ModelAvailability {
345    /// Reachable through the provider's normal API-key path with no extra
346    /// setup. The default for cataloged hosted/local models: by cataloging a
347    /// row we are claiming the route works out of the box.
348    #[default]
349    Serverless,
350    /// Requires the caller to provision a dedicated endpoint before requests
351    /// will succeed. The catalog row exists for selection/pricing UI, but
352    /// hosts must not auto-route to it.
353    Dedicated,
354    /// Availability is not known ahead of time. Used for routes that were
355    /// surfaced dynamically (e.g. through `/v1/models`) without a static
356    /// claim from Harn or the user.
357    Unknown,
358}
359
360impl ModelAvailability {
361    pub fn as_str(self) -> &'static str {
362        match self {
363            Self::Serverless => "serverless",
364            Self::Dedicated => "dedicated",
365            Self::Unknown => "unknown",
366        }
367    }
368
369    pub fn parse(value: &str) -> Option<Self> {
370        match value {
371            "serverless" => Some(Self::Serverless),
372            "dedicated" => Some(Self::Dedicated),
373            "unknown" => Some(Self::Unknown),
374            _ => None,
375        }
376    }
377}
378
379#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
380pub struct ResolvedModel {
381    pub id: String,
382    pub provider: String,
383    pub alias: Option<String>,
384    pub tool_format: String,
385    pub tier: String,
386}
387
388#[derive(Debug, Clone, Deserialize)]
389pub struct InferenceRule {
390    #[serde(default)]
391    pub pattern: Option<String>,
392    #[serde(default)]
393    pub contains: Option<String>,
394    #[serde(default)]
395    pub exact: Option<String>,
396    pub provider: String,
397}
398
399#[derive(Debug, Clone, Deserialize)]
400pub struct TierRule {
401    #[serde(default)]
402    pub pattern: Option<String>,
403    #[serde(default)]
404    pub contains: Option<String>,
405    #[serde(default)]
406    pub exact: Option<String>,
407    pub tier: String,
408}
409
410#[derive(Debug, Clone, Deserialize)]
411pub struct TierDefaults {
412    #[serde(default = "default_mid")]
413    pub default: String,
414}
415
416impl Default for TierDefaults {
417    fn default() -> Self {
418        Self {
419            default: default_mid(),
420        }
421    }
422}
423
424fn default_mid() -> String {
425    "mid".to_string()
426}
427
428/// Load and cache the providers config. Called once at VM startup.
429pub fn load_config() -> &'static ProvidersConfig {
430    CONFIG.get_or_init(|| {
431        let mut config = default_config();
432        let verbose_config_logging = matches!(
433            std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
434            Some("1" | "true" | "TRUE" | "yes" | "YES")
435        ) || matches!(
436            std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
437            Some("1" | "true" | "TRUE" | "yes" | "YES")
438        );
439        if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
440            if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
441                config.merge_from(&overlay);
442                let _ = CONFIG_PATH.set(path);
443                return config;
444            }
445        }
446        if let Some(home) = dirs_or_home() {
447            let path = format!("{home}/.config/harn/providers.toml");
448            if let Some(overlay) = read_external_config(&path, false) {
449                config.merge_from(&overlay);
450                let _ = CONFIG_PATH.set(path);
451                return config;
452            }
453        }
454        config
455    })
456}
457
458fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
459    match std::fs::read_to_string(path) {
460        Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
461            Ok(config) => {
462                if verbose {
463                    eprintln!(
464                        "[llm_config] Loaded {} providers, {} aliases from {}",
465                        config.providers.len(),
466                        config.aliases.len(),
467                        path
468                    );
469                }
470                Some(config)
471            }
472            Err(error) => {
473                eprintln!("[llm_config] TOML parse error in {path}: {error}");
474                None
475            }
476        },
477        Err(error) => {
478            if verbose {
479                eprintln!("[llm_config] Cannot read {path}: {error}");
480            }
481            None
482        }
483    }
484}
485
486/// Parse a provider/model catalog overlay in the same shape as
487/// `providers.toml` or `[llm]` package-manifest sections.
488pub fn parse_config_toml(src: &str) -> Result<ProvidersConfig, toml::de::Error> {
489    toml::from_str::<ProvidersConfig>(src)
490}
491
492/// Returns the filesystem path of the currently-loaded providers config, if
493/// any. Returns `None` when built-in defaults are active.
494pub fn loaded_config_path() -> Option<std::path::PathBuf> {
495    // Force lazy init so CONFIG_PATH is populated if a file was loaded.
496    let _ = load_config();
497    CONFIG_PATH.get().map(std::path::PathBuf::from)
498}
499
500/// Install per-run provider config overlays. The overlay uses the same shape as
501/// `providers.toml`, but lives under `[llm]` in `harn.toml` and package
502/// manifests. Passing `None` clears the overlay.
503pub fn set_user_overrides(config: Option<ProvidersConfig>) {
504    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
505}
506
507/// Clear per-run provider config overlays.
508pub fn clear_user_overrides() {
509    set_user_overrides(None);
510}
511
512fn effective_config() -> ProvidersConfig {
513    let mut merged = load_config().clone();
514    USER_OVERRIDES.with(|cell| {
515        if let Some(overlay) = cell.borrow().as_ref() {
516            merged.merge_from(overlay);
517        }
518    });
519    merged
520}
521
522/// Resolve a model alias to (model_id, provider_name).
523pub fn resolve_model(alias: &str) -> (String, Option<String>) {
524    let config = effective_config();
525    if let Some(a) = config.aliases.get(alias) {
526        return (a.id.clone(), Some(a.provider.clone()));
527    }
528    (normalize_model_id(alias), None)
529}
530
531/// Strip host/provider selector prefixes that identify transport, not the
532/// provider-native model id. This mirrors Burin's existing normalization so
533/// `ollama:qwen3:30b` reaches Ollama as `qwen3:30b` instead of an invalid
534/// model named `ollama`. Cerebras follows the same convention but uses a
535/// slash separator (`cerebras/gpt-oss-120b`) because its own /v1/models
536/// endpoint returns bare names that overlap OpenAI's families.
537pub fn normalize_model_id(raw: &str) -> String {
538    for prefix in PROVIDER_SELECTOR_PREFIXES {
539        if let Some(stripped) = raw.strip_prefix(prefix) {
540            return stripped.to_string();
541        }
542    }
543    raw.to_string()
544}
545
546const PROVIDER_SELECTOR_PREFIXES: &[&str] =
547    &["ollama:", "local:", "huggingface:", "hf:", "cerebras/"];
548
549/// Resolve an alias or selector into the complete catalog identity hosts need:
550/// provider inference, prefix-normalized model id, default tool format, and tier.
551pub fn resolve_model_info(selector: &str) -> ResolvedModel {
552    let config = effective_config();
553    if let Some(alias) = config.aliases.get(selector) {
554        let id = alias.id.clone();
555        let provider = alias.provider.clone();
556        let tool_format = alias
557            .tool_format
558            .clone()
559            .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
560        return ResolvedModel {
561            tier: model_tier_with_config(&config, &id),
562            id,
563            provider,
564            alias: Some(selector.to_string()),
565            tool_format,
566        };
567    }
568
569    let id = normalize_model_id(selector);
570    let provider = infer_provider_with_config(&config, selector).provider;
571    let tool_format = default_tool_format_with_config(&config, &id, &provider);
572    let tier = model_tier_with_config(&config, &id);
573    ResolvedModel {
574        id,
575        provider,
576        alias: None,
577        tool_format,
578        tier,
579    }
580}
581
582/// Infer provider from a model ID using inference rules.
583pub fn infer_provider(model_id: &str) -> String {
584    infer_provider_detail(model_id).provider
585}
586
587/// Infer provider from a model ID and retain whether the configured default was used.
588pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
589    let config = effective_config();
590    infer_provider_with_config(&config, model_id)
591}
592
593fn infer_provider_with_config(
594    config: &ProvidersConfig,
595    model_id: &str,
596) -> crate::llm::provider::ProviderInference {
597    if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
598        return crate::llm::provider::ProviderInference::builtin("ollama");
599    }
600    if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
601        return crate::llm::provider::ProviderInference::builtin("huggingface");
602    }
603    // Exact catalog rows are the most authoritative declaration of where
604    // a model is hosted: any pattern-based inference rule is necessarily
605    // less specific than `[models."<id>"].provider = "<name>"`. Catalogs
606    // include user overlays, so users can still re-home a model by
607    // setting a catalog entry in their own providers.toml.
608    let normalized_id = normalize_model_id(model_id);
609    if let Some(model) = config
610        .models
611        .get(model_id)
612        .or_else(|| config.models.get(&normalized_id))
613    {
614        return crate::llm::provider::ProviderInference::builtin(model.provider.clone());
615    }
616    for rule in &config.inference_rules {
617        if let Some(exact) = &rule.exact {
618            if model_id == exact {
619                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
620            }
621        }
622        if let Some(pattern) = &rule.pattern {
623            if glob_match(pattern, model_id) {
624                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
625            }
626        }
627        if let Some(substr) = &rule.contains {
628            if model_id.contains(substr.as_str()) {
629                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
630            }
631        }
632    }
633    crate::llm::provider::infer_provider_from_model_id(
634        model_id,
635        &default_provider_with_config(config),
636    )
637}
638
639pub fn default_provider() -> String {
640    let config = effective_config();
641    default_provider_with_config(&config)
642}
643
644fn default_provider_with_config(config: &ProvidersConfig) -> String {
645    std::env::var("HARN_DEFAULT_PROVIDER")
646        .ok()
647        .map(|value| value.trim().to_string())
648        .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
649        .or_else(|| {
650            config
651                .default_provider
652                .as_deref()
653                .map(str::trim)
654                .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
655                .map(str::to_string)
656        })
657        .unwrap_or_else(|| "anthropic".to_string())
658}
659
660/// Get model tier ("small", "mid", "frontier").
661pub fn model_tier(model_id: &str) -> String {
662    let config = effective_config();
663    model_tier_with_config(&config, model_id)
664}
665
666fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
667    // Per-model self-declared tier wins. This is the only path.
668    if let Some(model) = config.models.get(model_id) {
669        if let Some(tier) = model.tier.as_deref() {
670            let trimmed = tier.trim();
671            if !trimmed.is_empty() {
672                return trimmed.to_string();
673            }
674        }
675    }
676    // Legacy pattern-rules: still consulted while we finish migrating the
677    // long tail of models to per-row `tier = "..."`. Newly added rows
678    // should set `tier` directly; the rule table is a fallback only.
679    for rule in &config.tier_rules {
680        if let Some(exact) = &rule.exact {
681            if model_id == exact {
682                return rule.tier.clone();
683            }
684        }
685        if let Some(pattern) = &rule.pattern {
686            if glob_match(pattern, model_id) {
687                return rule.tier.clone();
688            }
689        }
690        if let Some(substr) = &rule.contains {
691            if model_id.contains(substr.as_str()) {
692                return rule.tier.clone();
693            }
694        }
695    }
696    config.tier_defaults.default.clone()
697}
698
699/// Get provider config for resolving base_url, auth, etc.
700pub fn provider_config(name: &str) -> Option<ProviderDef> {
701    effective_config().providers.get(name).cloned()
702}
703
704/// Get model-specific default parameters (temperature, etc.).
705/// Matches glob patterns in model_defaults keys.
706pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
707    let config = effective_config();
708    let mut params = BTreeMap::new();
709    for (pattern, defaults) in &config.model_defaults {
710        if glob_match(pattern, model_id) {
711            for (k, v) in defaults {
712                params.insert(k.clone(), v.clone());
713            }
714        }
715    }
716    params
717}
718
719/// Get list of configured provider names.
720pub fn provider_names() -> Vec<String> {
721    effective_config().providers.keys().cloned().collect()
722}
723
724/// Return every configured alias name, sorted deterministically.
725pub fn known_model_names() -> Vec<String> {
726    effective_config().aliases.keys().cloned().collect()
727}
728
729pub fn alias_entries() -> Vec<(String, AliasDef)> {
730    effective_config().aliases.into_iter().collect()
731}
732
733pub fn alias_tool_calling_entry(alias: &str) -> Option<AliasToolCallingDef> {
734    effective_config().alias_tool_calling.get(alias).cloned()
735}
736
737/// Return every configured model-catalog entry, sorted by provider then id.
738pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
739    let mut entries: Vec<_> = effective_config()
740        .models
741        .into_iter()
742        .map(|(id, model)| {
743            let provider = model.provider.clone();
744            (
745                id.clone(),
746                with_effective_capability_tags(id, provider, model),
747            )
748        })
749        .collect();
750    entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
751        model_a
752            .provider
753            .cmp(&model_b.provider)
754            .then_with(|| id_a.cmp(id_b))
755    });
756    entries
757}
758
759pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
760    effective_config()
761        .models
762        .get(model_id)
763        .cloned()
764        .map(|model| {
765            let provider = model.provider.clone();
766            with_effective_capability_tags(model_id.to_string(), provider, model)
767        })
768}
769
770pub fn qc_default_model(provider: &str) -> Option<String> {
771    std::env::var("BURIN_QC_MODEL")
772        .ok()
773        .filter(|value| !value.trim().is_empty())
774        .or_else(|| {
775            effective_config()
776                .qc_defaults
777                .get(&provider.to_lowercase())
778                .cloned()
779        })
780}
781
782pub fn default_model_for_provider(provider: &str) -> String {
783    match provider {
784        "local" => std::env::var("LOCAL_LLM_MODEL")
785            .or_else(|_| std::env::var("HARN_LLM_MODEL"))
786            .unwrap_or_else(|_| "gemma-4-26b-a4b-it".to_string()),
787        "mlx" => std::env::var("MLX_MODEL_ID")
788            .unwrap_or_else(|_| "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string()),
789        "openai" => "gpt-4o-mini".to_string(),
790        "ollama" => "llama3.2".to_string(),
791        "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
792        _ => "claude-sonnet-4-6".to_string(),
793    }
794}
795
796pub fn qc_defaults() -> BTreeMap<String, String> {
797    effective_config().qc_defaults
798}
799
800pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
801    effective_config()
802        .models
803        .get(model_id)
804        .and_then(|model| model.pricing.clone())
805}
806
807pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
808    model_pricing_per_mtok(model_id)
809        .map(|pricing| {
810            (
811                pricing.input_per_mtok / 1000.0,
812                pricing.output_per_mtok / 1000.0,
813            )
814        })
815        .or_else(|| {
816            let (input, output, _) = provider_economics(provider);
817            match (input, output) {
818                (Some(input), Some(output)) => Some((input, output)),
819                _ => None,
820            }
821        })
822}
823
824pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
825    match auth_env {
826        AuthEnv::None => Vec::new(),
827        AuthEnv::Single(name) => vec![name.clone()],
828        AuthEnv::Multiple(names) => names.clone(),
829    }
830}
831
832pub fn provider_key_available(provider: &str) -> bool {
833    let Some(pdef) = provider_config(provider) else {
834        return provider == "ollama";
835    };
836    if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
837        return true;
838    }
839    auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
840        std::env::var(env_name)
841            .ok()
842            .is_some_and(|value| !value.trim().is_empty())
843    })
844}
845
846pub fn available_provider_names() -> Vec<String> {
847    provider_names()
848        .into_iter()
849        .filter(|provider| provider_key_available(provider))
850        .collect()
851}
852
853/// Check if a provider advertises a legacy provider-level feature.
854pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
855    provider_config(provider)
856        .map(|p| p.features.iter().any(|f| f == feature))
857        .unwrap_or(false)
858}
859
860/// Provider-level catalog pricing/latency. Model-specific catalog pricing
861/// wins when available; this is the adapter-level fallback used by routing
862/// and portal summaries when a model has no explicit catalog entry.
863pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
864    provider_config(provider)
865        .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
866        .unwrap_or((None, None, None))
867}
868
869/// Resolve the default tool format for a model+provider combination.
870/// Priority: alias `tool_format` (matched by model ID) > provider/model
871/// capability matrix > legacy provider feature > "text".
872pub fn default_tool_format(model: &str, provider: &str) -> String {
873    let config = effective_config();
874    default_tool_format_with_config(&config, model, provider)
875}
876
877fn default_tool_format_with_config(
878    config: &ProvidersConfig,
879    model: &str,
880    provider: &str,
881) -> String {
882    // Aliases match by model ID + provider, or by alias name.
883    for (name, alias) in &config.aliases {
884        let matches = (alias.id == model && alias.provider == provider) || name == model;
885        if matches {
886            if let Some(ref fmt) = alias.tool_format {
887                return fmt.clone();
888            }
889        }
890    }
891    let capabilities = crate::llm::capabilities::lookup(provider, model);
892    if let Some(format) = capabilities.preferred_tool_format.as_deref() {
893        if matches!(format, "native" | "text") {
894            return format.to_string();
895        }
896    }
897    let capability_matrix_native = capabilities.native_tools;
898    let legacy_provider_native = config
899        .providers
900        .get(provider)
901        .map(|p| p.features.iter().any(|f| f == "native_tools"))
902        .unwrap_or(false);
903    if capability_matrix_native || legacy_provider_native {
904        "native".to_string()
905    } else {
906        "text".to_string()
907    }
908}
909
910fn with_effective_capability_tags(
911    model_id: String,
912    provider: String,
913    mut model: ModelDef,
914) -> ModelDef {
915    model.capabilities = effective_model_capability_tags(&provider, &model_id);
916    model
917}
918
919/// Legacy display tags derived from the canonical provider/model capability
920/// matrix. The matrix is the source of truth; `models.*.capabilities` in
921/// providers.toml is accepted only for backwards-compatible parsing.
922pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
923    let caps = crate::llm::capabilities::lookup(provider, model_id);
924    let mut tags = Vec::new();
925    // Today all Harn chat providers expose streaming. Keep this as a
926    // transport baseline rather than a duplicated per-model declaration.
927    tags.push("streaming".to_string());
928    if caps.native_tools || caps.text_tool_wire_format_supported {
929        tags.push("tools".to_string());
930    }
931    if !caps.tool_search.is_empty() {
932        tags.push("tool_search".to_string());
933    }
934    if caps.vision || caps.vision_supported {
935        tags.push("vision".to_string());
936    }
937    if caps.audio {
938        tags.push("audio".to_string());
939    }
940    if caps.pdf {
941        tags.push("pdf".to_string());
942    }
943    if caps.files_api_supported {
944        tags.push("files".to_string());
945    }
946    if caps.prompt_caching {
947        tags.push("prompt_caching".to_string());
948    }
949    if !caps.thinking_modes.is_empty() {
950        tags.push("thinking".to_string());
951    }
952    if caps.interleaved_thinking_supported
953        || caps
954            .thinking_modes
955            .iter()
956            .any(|mode| mode == "adaptive" || mode == "effort")
957    {
958        tags.push("extended_thinking".to_string());
959    }
960    if caps.json_schema.is_some() {
961        tags.push("structured_output".to_string());
962    }
963    tags
964}
965
966/// Resolve a tier or alias into a concrete model/provider pair.
967pub fn resolve_tier_model(
968    target: &str,
969    preferred_provider: Option<&str>,
970) -> Option<(String, String)> {
971    let config = effective_config();
972
973    if let Some(alias) = config.aliases.get(target) {
974        return Some((alias.id.clone(), alias.provider.clone()));
975    }
976
977    let candidate_aliases = if let Some(provider) = preferred_provider {
978        vec![
979            format!("{provider}/{target}"),
980            format!("{provider}:{target}"),
981            format!("tier/{target}"),
982            target.to_string(),
983        ]
984    } else {
985        vec![format!("tier/{target}"), target.to_string()]
986    };
987
988    for alias_name in candidate_aliases {
989        if let Some(alias) = config.aliases.get(&alias_name) {
990            return Some((alias.id.clone(), alias.provider.clone()));
991        }
992    }
993
994    None
995}
996
997/// Return all configured alias-backed model/provider pairs whose resolved
998/// model falls into the requested capability tier. The result is de-duplicated
999/// and sorted deterministically by provider then model id.
1000pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
1001    let config = effective_config();
1002    let mut seen = std::collections::BTreeSet::new();
1003    let mut candidates = Vec::new();
1004
1005    for alias in config.aliases.values() {
1006        let pair = (alias.id.clone(), alias.provider.clone());
1007        if seen.contains(&pair) {
1008            continue;
1009        }
1010        if model_tier(&alias.id) == target {
1011            seen.insert(pair.clone());
1012            candidates.push(pair);
1013        }
1014    }
1015
1016    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
1017        provider_a
1018            .cmp(provider_b)
1019            .then_with(|| model_a.cmp(model_b))
1020    });
1021    candidates
1022}
1023
1024/// Return all configured alias-backed model/provider pairs. Used by routing
1025/// policies that need to compare alternatives across tiers.
1026pub fn all_model_candidates() -> Vec<(String, String)> {
1027    let config = effective_config();
1028    let mut seen = std::collections::BTreeSet::new();
1029    let mut candidates = Vec::new();
1030
1031    for alias in config.aliases.values() {
1032        let pair = (alias.id.clone(), alias.provider.clone());
1033        if seen.insert(pair.clone()) {
1034            candidates.push(pair);
1035        }
1036    }
1037
1038    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
1039        provider_a
1040            .cmp(provider_b)
1041            .then_with(|| model_a.cmp(model_b))
1042    });
1043    candidates
1044}
1045
1046/// Simple glob matching for patterns like "claude-*", "qwen/*", "ollama:*".
1047fn glob_match(pattern: &str, input: &str) -> bool {
1048    if let Some(prefix) = pattern.strip_suffix('*') {
1049        input.starts_with(prefix)
1050    } else if let Some(suffix) = pattern.strip_prefix('*') {
1051        input.ends_with(suffix)
1052    } else if pattern.contains('*') {
1053        let parts: Vec<&str> = pattern.split('*').collect();
1054        if parts.len() == 2 {
1055            input.starts_with(parts[0]) && input.ends_with(parts[1])
1056        } else {
1057            input == pattern
1058        }
1059    } else {
1060        input == pattern
1061    }
1062}
1063
1064fn dirs_or_home() -> Option<String> {
1065    std::env::var("HOME").ok()
1066}
1067
1068/// Resolve the effective base URL for a provider, checking the `base_url_env`
1069/// override first, then falling back to the configured `base_url`.
1070pub fn resolve_base_url(pdef: &ProviderDef) -> String {
1071    if let Some(env_name) = &pdef.base_url_env {
1072        if let Ok(val) = std::env::var(env_name) {
1073            // Strip surrounding quotes that some .env parsers leave intact.
1074            let trimmed = val.trim().trim_matches('"').trim_matches('\'');
1075            if !trimmed.is_empty() {
1076                return trimmed.to_string();
1077            }
1078        }
1079    }
1080    pdef.base_url.clone()
1081}
1082
1083/// Embedded copy of `llm/providers.toml`, the single source of truth for
1084/// Harn's bundled provider/model catalog. Edit the TOML, not this string.
1085const EMBEDDED_PROVIDERS_TOML: &str = include_str!("llm/providers.toml");
1086
1087/// Parse the embedded `providers.toml` into the runtime `ProvidersConfig`.
1088///
1089/// Hosts overlay this base via `HARN_PROVIDERS_CONFIG`,
1090/// `~/.config/harn/providers.toml`, `harn.toml`, package-manifest
1091/// `[llm]` sections, and per-run `set_user_overrides(...)`. The same
1092/// Serde shape applies at every layer, so there is exactly one schema to
1093/// keep coherent — no parallel Rust-literal catalog.
1094///
1095/// We `expect` on parse failure because the file is bundled into the
1096/// binary at compile time; a malformed embedded catalog is a build-time
1097/// invariant violation that should fail every test, not silently
1098/// degrade in production.
1099fn default_config() -> ProvidersConfig {
1100    parse_config_toml(EMBEDDED_PROVIDERS_TOML)
1101        .expect("embedded providers.toml must parse — invariant checked by harn-vm tests")
1102}
1103
1104#[cfg(test)]
1105fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
1106    let mut config = default_config();
1107    config.merge_from(&overlay);
1108    config
1109}
1110
1111#[cfg(test)]
1112mod tests {
1113    use super::*;
1114
1115    fn reset_overrides() {
1116        clear_user_overrides();
1117    }
1118
1119    #[test]
1120    fn test_glob_match_prefix() {
1121        assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
1122        assert!(glob_match("gpt-*", "gpt-4o"));
1123        assert!(!glob_match("claude-*", "gpt-4o"));
1124    }
1125
1126    #[test]
1127    fn test_glob_match_suffix() {
1128        assert!(glob_match("*-latest", "llama3.2-latest"));
1129        assert!(!glob_match("*-latest", "llama3.2"));
1130    }
1131
1132    #[test]
1133    fn test_glob_match_middle() {
1134        assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
1135        assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
1136    }
1137
1138    #[test]
1139    fn test_glob_match_exact() {
1140        assert!(glob_match("gpt-4o", "gpt-4o"));
1141        assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
1142    }
1143
1144    #[test]
1145    fn test_infer_provider_from_defaults() {
1146        let _guard = crate::llm::env_lock().lock().expect("env lock");
1147        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1148        unsafe {
1149            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1150        }
1151
1152        assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
1153        assert_eq!(infer_provider("gpt-4o"), "openai");
1154        assert_eq!(infer_provider("o1-preview"), "openai");
1155        assert_eq!(infer_provider("o3-mini"), "openai");
1156        assert_eq!(infer_provider("o4-mini"), "openai");
1157        assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
1158        assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
1159        assert_eq!(infer_provider("llama3.2:latest"), "ollama");
1160        assert_eq!(infer_provider("unknown-model"), "anthropic");
1161
1162        unsafe {
1163            match prev_default_provider {
1164                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1165                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1166            }
1167        }
1168    }
1169
1170    #[test]
1171    fn test_infer_provider_prefix_rules() {
1172        assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
1173        assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
1174        // Even when the id also contains `/`, the local transport prefix wins.
1175        assert_eq!(infer_provider("local:owner/model"), "ollama");
1176        assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
1177    }
1178
1179    #[test]
1180    fn test_openrouter_inference_requires_one_slash() {
1181        let _guard = crate::llm::env_lock().lock().expect("env lock");
1182        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1183        unsafe {
1184            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1185        }
1186
1187        assert_eq!(infer_provider("org/model"), "openrouter");
1188        assert_eq!(infer_provider("org/team/model"), "anthropic");
1189
1190        unsafe {
1191            match prev_default_provider {
1192                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1193                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1194            }
1195        }
1196    }
1197
1198    #[test]
1199    fn test_cerebras_inference_beats_openrouter_slash_fallback() {
1200        let _guard = crate::llm::env_lock().lock().expect("env lock");
1201        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1202        unsafe {
1203            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1204        }
1205
1206        assert_eq!(infer_provider("cerebras/gpt-oss-120b"), "cerebras");
1207        assert_eq!(infer_provider("cerebras/llama-3.3-70b"), "cerebras");
1208
1209        unsafe {
1210            match prev_default_provider {
1211                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1212                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1213            }
1214        }
1215    }
1216
1217    #[test]
1218    fn test_direct_catalog_model_id_resolves_to_catalog_provider() {
1219        // Bare model IDs that the embedded catalog hosts on Cerebras must
1220        // not be misrouted by the generic `gpt-*` / single-slash inference
1221        // fallbacks. Regression for harn#2142 (model-info routed
1222        // `gpt-oss-120b` to openai, breaking Burin TUI credential checks).
1223        let _guard = crate::llm::env_lock().lock().expect("env lock");
1224        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1225        unsafe {
1226            std::env::remove_var("HARN_DEFAULT_PROVIDER");
1227        }
1228
1229        for model in ["gpt-oss-120b", "llama-3.3-70b"] {
1230            assert_eq!(
1231                infer_provider(model),
1232                "cerebras",
1233                "{model} should route to its catalog provider"
1234            );
1235            let resolved = resolve_model_info(model);
1236            assert_eq!(resolved.id, model);
1237            assert_eq!(resolved.provider, "cerebras");
1238        }
1239
1240        unsafe {
1241            match prev_default_provider {
1242                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1243                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1244            }
1245        }
1246    }
1247
1248    #[test]
1249    fn test_user_catalog_overlay_re_homes_model_provider() {
1250        // Users can re-home a built-in model by overlaying a catalog row;
1251        // the exact-match catalog lookup must honor overlays as well as the
1252        // embedded TOML.
1253        reset_overrides();
1254        let mut overlay = ProvidersConfig::default();
1255        overlay.models.insert(
1256            "gpt-4o".to_string(),
1257            ModelDef {
1258                name: "GPT-4o via OpenRouter".to_string(),
1259                provider: "openrouter".to_string(),
1260                context_window: 128_000,
1261                runtime_context_window: None,
1262                stream_timeout: None,
1263                capabilities: Vec::new(),
1264                pricing: None,
1265                deprecated: false,
1266                deprecation_note: None,
1267                superseded_by: None,
1268                fast_mode: None,
1269                quality_tags: Vec::new(),
1270                availability: ModelAvailability::default(),
1271                tier: None,
1272                open_weight: None,
1273                strengths: Vec::new(),
1274                benchmarks: std::collections::BTreeMap::new(),
1275            },
1276        );
1277        set_user_overrides(Some(overlay));
1278
1279        assert_eq!(infer_provider("gpt-4o"), "openrouter");
1280
1281        reset_overrides();
1282    }
1283
1284    #[test]
1285    fn test_resolve_model_info_normalizes_provider_prefixes() {
1286        let local = resolve_model_info("local:gemma-4-e4b-it");
1287        assert_eq!(local.id, "gemma-4-e4b-it");
1288        assert_eq!(local.provider, "ollama");
1289
1290        let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
1291        assert_eq!(ollama.id, "qwen3:30b-a3b");
1292        assert_eq!(ollama.provider, "ollama");
1293
1294        let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
1295        assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
1296        assert_eq!(hf.provider, "huggingface");
1297
1298        let cerebras = resolve_model_info("cerebras/gpt-oss-120b");
1299        assert_eq!(cerebras.id, "gpt-oss-120b");
1300        assert_eq!(cerebras.provider, "cerebras");
1301    }
1302
1303    #[test]
1304    fn test_model_tier_from_defaults() {
1305        // Tier is now self-declared per model row in providers.toml.
1306        // Models that match an entry use the declared value; unknown
1307        // model ids fall through to `tier_defaults.default` ("mid").
1308        assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
1309        assert_eq!(model_tier("gpt-4o"), "frontier");
1310        assert_eq!(model_tier("Qwen/Qwen3.5-9B"), "small");
1311        assert_eq!(model_tier("deepseek-v4-flash"), "mid");
1312        assert_eq!(model_tier("deepseek-v4-pro"), "frontier");
1313        assert_eq!(model_tier("MiniMax-M2.7"), "frontier");
1314        assert_eq!(model_tier("glm-5.1"), "frontier");
1315        // Unknown ids resolve to the default.
1316        assert_eq!(model_tier("definitely-not-a-real-model"), "mid");
1317    }
1318
1319    #[test]
1320    fn test_resolve_model_unknown_alias() {
1321        let (id, provider) = resolve_model("gpt-4o");
1322        assert_eq!(id, "gpt-4o");
1323        assert!(provider.is_none());
1324    }
1325
1326    #[test]
1327    fn test_provider_names() {
1328        let names = provider_names();
1329        assert!(names.len() >= 7);
1330        assert!(names.contains(&"anthropic".to_string()));
1331        assert!(names.contains(&"together".to_string()));
1332        assert!(names.contains(&"local".to_string()));
1333        assert!(names.contains(&"mlx".to_string()));
1334        assert!(names.contains(&"openai".to_string()));
1335        assert!(names.contains(&"ollama".to_string()));
1336        assert!(names.contains(&"bedrock".to_string()));
1337        assert!(names.contains(&"azure_openai".to_string()));
1338        assert!(names.contains(&"vertex".to_string()));
1339    }
1340
1341    #[test]
1342    fn global_provider_file_is_an_overlay_on_builtin_defaults() {
1343        let mut overlay = ProvidersConfig {
1344            default_provider: Some("ollama".to_string()),
1345            ..Default::default()
1346        };
1347        overlay.aliases.insert(
1348            "quickstart".to_string(),
1349            AliasDef {
1350                id: "llama3.2".to_string(),
1351                provider: "ollama".to_string(),
1352                tool_format: None,
1353            },
1354        );
1355
1356        let merged = merge_global_config(overlay);
1357
1358        assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
1359        assert!(merged.providers.contains_key("anthropic"));
1360        assert!(merged.providers.contains_key("ollama"));
1361        assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
1362    }
1363
1364    #[test]
1365    fn test_resolve_tier_model_default_aliases() {
1366        // Exercise the alias-resolution machinery, not the specific catalog
1367        // value: the model under each tier alias evolves as the embedded
1368        // providers.toml is updated. The invariants worth pinning are the
1369        // provider routing + catalog-registration of the resolved model.
1370        let (model, provider) = resolve_tier_model("frontier", None)
1371            .expect("frontier alias must resolve from the embedded catalog");
1372        assert_eq!(provider, "anthropic");
1373        assert!(
1374            model_catalog_entry(&model)
1375                .is_some_and(|entry| entry.provider == "anthropic" && !entry.deprecated),
1376            "frontier alias must point at a registered, non-deprecated anthropic model (got {model})"
1377        );
1378
1379        let (model, provider) = resolve_tier_model("small", None)
1380            .expect("small alias must resolve from the embedded catalog");
1381        assert!(
1382            [
1383                "openrouter",
1384                "huggingface",
1385                "local",
1386                "llamacpp",
1387                "mlx",
1388                "ollama"
1389            ]
1390            .contains(&provider.as_str()),
1391            "small tier should resolve to an open-weight provider (got {provider} / {model})"
1392        );
1393    }
1394
1395    #[test]
1396    fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
1397        // tier/<provider> takes precedence over generic tier when the
1398        // caller scopes by provider. Don't pin the specific model — the
1399        // catalog evolves.
1400        let (model, provider) = resolve_tier_model("mid", Some("openai"))
1401            .expect("mid tier scoped to openai must resolve");
1402        assert_eq!(provider, "openai");
1403        assert!(
1404            model_catalog_entry(&model).is_some(),
1405            "mid/openai alias must point at a registered model (got {model})"
1406        );
1407    }
1408
1409    #[test]
1410    fn test_provider_config_anthropic() {
1411        let pdef = provider_config("anthropic").unwrap();
1412        assert_eq!(pdef.auth_style, "header");
1413        assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
1414    }
1415
1416    #[test]
1417    fn test_provider_config_mlx() {
1418        let pdef = provider_config("mlx").unwrap();
1419        assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
1420        assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
1421        assert_eq!(
1422            pdef.healthcheck.unwrap().path.as_deref(),
1423            Some("/v1/models")
1424        );
1425
1426        let (model, provider) = resolve_model("mlx-qwen36-27b");
1427        assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
1428        assert_eq!(provider.as_deref(), Some("mlx"));
1429    }
1430
1431    #[test]
1432    fn test_enterprise_provider_defaults_and_inference() {
1433        let bedrock = provider_config("bedrock").unwrap();
1434        assert_eq!(bedrock.auth_style, "aws_sigv4");
1435        assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
1436        assert_eq!(
1437            infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
1438            "bedrock"
1439        );
1440        assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
1441
1442        let azure = provider_config("azure_openai").unwrap();
1443        assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
1444        assert_eq!(
1445            auth_env_names(&azure.auth_env),
1446            vec![
1447                "AZURE_OPENAI_API_KEY".to_string(),
1448                "AZURE_OPENAI_AD_TOKEN".to_string(),
1449                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1450            ]
1451        );
1452
1453        let vertex = provider_config("vertex").unwrap();
1454        assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
1455        assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
1456    }
1457
1458    #[test]
1459    fn test_default_provider_env_override_for_unknown_model() {
1460        let _guard = crate::llm::env_lock().lock().expect("env lock");
1461        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1462        unsafe {
1463            std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
1464        }
1465
1466        let inference = infer_provider_detail("unknown-model");
1467
1468        unsafe {
1469            match prev_default_provider {
1470                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1471                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1472            }
1473        }
1474
1475        assert_eq!(inference.provider, "openai");
1476        assert_eq!(
1477            inference.source,
1478            crate::llm::provider::ProviderInferenceSource::DefaultFallback
1479        );
1480    }
1481
1482    #[test]
1483    fn test_resolve_base_url_no_env() {
1484        let pdef = ProviderDef {
1485            base_url: "https://example.com".to_string(),
1486            ..Default::default()
1487        };
1488        assert_eq!(resolve_base_url(&pdef), "https://example.com");
1489    }
1490
1491    #[test]
1492    fn test_default_config_roundtrip() {
1493        let config = default_config();
1494        assert!(!config.providers.is_empty());
1495        assert!(!config.inference_rules.is_empty());
1496        // Tier is now declared on each model row; tier_rules is allowed
1497        // to be empty (the rule table is a legacy fallback only).
1498        assert_eq!(config.tier_defaults.default, "mid");
1499        // At least the new open-weight frontiers should have explicit tiers.
1500        let frontiers = config
1501            .models
1502            .iter()
1503            .filter(|(_, m)| m.tier.as_deref() == Some("frontier"))
1504            .count();
1505        assert!(
1506            frontiers >= 4,
1507            "expected at least 4 frontier-tagged models, got {frontiers}"
1508        );
1509    }
1510
1511    #[test]
1512    fn test_local_ollama_catalog_metadata() {
1513        reset_overrides();
1514
1515        let qwen_coding = model_catalog_entry("qwen3.6:35b-a3b-coding-nvfp4")
1516            .expect("qwen3.6 coding catalog entry");
1517        assert_eq!(qwen_coding.context_window, 262_144);
1518        assert!(!qwen_coding.capabilities.iter().any(|cap| cap == "vision"));
1519
1520        let gemma4 = model_catalog_entry("gemma4:26b").expect("gemma4 catalog entry");
1521        assert_eq!(gemma4.context_window, 262_144);
1522        assert!(gemma4.capabilities.iter().any(|cap| cap == "vision"));
1523    }
1524
1525    #[test]
1526    fn test_external_config_overlays_default_catalog() {
1527        let mut config = default_config();
1528        let mut overlay = ProvidersConfig {
1529            default_provider: Some("ollama".to_string()),
1530            ..Default::default()
1531        };
1532        overlay.providers.insert(
1533            "custom".to_string(),
1534            ProviderDef {
1535                base_url: "https://llm.example.test/v1".to_string(),
1536                chat_endpoint: "/chat/completions".to_string(),
1537                ..Default::default()
1538            },
1539        );
1540
1541        config.merge_from(&overlay);
1542
1543        assert_eq!(config.default_provider.as_deref(), Some("ollama"));
1544        assert!(config.providers.contains_key("custom"));
1545        assert!(config.providers.contains_key("anthropic"));
1546        assert!(config.providers.contains_key("ollama"));
1547    }
1548
1549    #[test]
1550    fn test_model_params_empty() {
1551        let params = model_params("claude-sonnet-4-20250514");
1552        assert!(params.is_empty());
1553    }
1554
1555    #[test]
1556    fn test_user_overrides_add_provider_and_alias() {
1557        reset_overrides();
1558        let mut overlay = ProvidersConfig::default();
1559        overlay.providers.insert(
1560            "acme".to_string(),
1561            ProviderDef {
1562                base_url: "https://llm.acme.test/v1".to_string(),
1563                chat_endpoint: "/chat/completions".to_string(),
1564                ..Default::default()
1565            },
1566        );
1567        overlay.aliases.insert(
1568            "acme-fast".to_string(),
1569            AliasDef {
1570                id: "acme/model-fast".to_string(),
1571                provider: "acme".to_string(),
1572                tool_format: Some("native".to_string()),
1573            },
1574        );
1575        set_user_overrides(Some(overlay));
1576
1577        let (model, provider) = resolve_model("acme-fast");
1578        assert_eq!(model, "acme/model-fast");
1579        assert_eq!(provider.as_deref(), Some("acme"));
1580        assert!(provider_names().contains(&"acme".to_string()));
1581        assert_eq!(
1582            provider_config("acme").map(|provider| provider.base_url),
1583            Some("https://llm.acme.test/v1".to_string())
1584        );
1585
1586        reset_overrides();
1587    }
1588
1589    #[test]
1590    fn test_default_tool_format_uses_capability_matrix() {
1591        reset_overrides();
1592
1593        assert_eq!(
1594            default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
1595            "text"
1596        );
1597        assert_eq!(
1598            default_tool_format("devstral-small-2:24b", "ollama"),
1599            "text"
1600        );
1601        assert_eq!(
1602            default_tool_format("ollama-devstral-small-2-native", "ollama"),
1603            "native"
1604        );
1605        assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "text");
1606        assert_eq!(
1607            default_tool_format("deepseek/deepseek-v3.2", "openrouter"),
1608            "text"
1609        );
1610        assert_eq!(
1611            default_tool_format("qwen/qwen3-coder-flash", "openrouter"),
1612            "text"
1613        );
1614    }
1615
1616    #[test]
1617    fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
1618        reset_overrides();
1619        let mut overlay = ProvidersConfig::default();
1620        overlay.models.insert(
1621            "acme/model-fast".to_string(),
1622            ModelDef {
1623                name: "Acme Fast".to_string(),
1624                provider: "acme".to_string(),
1625                context_window: 65_536,
1626                runtime_context_window: None,
1627                stream_timeout: Some(42.0),
1628                capabilities: vec!["tools".to_string(), "streaming".to_string()],
1629                pricing: Some(ModelPricing {
1630                    input_per_mtok: 1.25,
1631                    output_per_mtok: 2.5,
1632                    cache_read_per_mtok: Some(0.25),
1633                    cache_write_per_mtok: None,
1634                }),
1635                deprecated: false,
1636                deprecation_note: None,
1637                superseded_by: None,
1638                fast_mode: None,
1639                quality_tags: Vec::new(),
1640                availability: ModelAvailability::default(),
1641                tier: None,
1642                open_weight: None,
1643                strengths: Vec::new(),
1644                benchmarks: std::collections::BTreeMap::new(),
1645            },
1646        );
1647        overlay
1648            .qc_defaults
1649            .insert("acme".to_string(), "acme/model-cheap".to_string());
1650        set_user_overrides(Some(overlay));
1651
1652        let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
1653        assert_eq!(entry.context_window, 65_536);
1654        assert_eq!(
1655            entry.capabilities,
1656            vec!["streaming".to_string(), "tools".to_string()]
1657        );
1658        assert_eq!(
1659            entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
1660            Some(1.25)
1661        );
1662        assert_eq!(
1663            pricing_per_1k_for("acme", "acme/model-fast"),
1664            Some((0.00125, 0.0025))
1665        );
1666        assert_eq!(
1667            qc_default_model("acme").as_deref(),
1668            Some("acme/model-cheap")
1669        );
1670
1671        reset_overrides();
1672    }
1673
1674    #[test]
1675    fn test_user_overrides_prepend_inference_rules() {
1676        reset_overrides();
1677        let mut overlay = ProvidersConfig::default();
1678        overlay.inference_rules.push(InferenceRule {
1679            pattern: Some("internal-*".to_string()),
1680            contains: None,
1681            exact: None,
1682            provider: "openai".to_string(),
1683        });
1684        set_user_overrides(Some(overlay));
1685
1686        assert_eq!(infer_provider("internal-foo"), "openai");
1687
1688        reset_overrides();
1689    }
1690
1691    // ── Embedded providers.toml invariants ───────────────────────────────────
1692    // These tests pin properties of the *system* — TOML parses, every
1693    // alias resolves, every deprecated model has a note — without
1694    // pinning specific catalog values. They survive future catalog
1695    // churn and surface real schema breakage.
1696
1697    #[test]
1698    fn embedded_providers_toml_parses_and_is_not_trivially_empty() {
1699        let config = default_config();
1700        assert!(
1701            config.providers.len() >= 10,
1702            "expected >=10 providers in embedded catalog, got {}",
1703            config.providers.len()
1704        );
1705        assert!(
1706            config.models.len() >= 20,
1707            "expected >=20 models in embedded catalog, got {}",
1708            config.models.len()
1709        );
1710        assert!(
1711            config.aliases.len() >= 15,
1712            "expected >=15 aliases in embedded catalog, got {}",
1713            config.aliases.len()
1714        );
1715        assert_eq!(config.default_provider.as_deref(), Some("anthropic"));
1716    }
1717
1718    #[test]
1719    fn embedded_catalog_every_deprecated_model_has_a_note() {
1720        let config = default_config();
1721        let offenders: Vec<&str> = config
1722            .models
1723            .iter()
1724            .filter(|(_, model)| {
1725                model.deprecated
1726                    && model
1727                        .deprecation_note
1728                        .as_deref()
1729                        .unwrap_or("")
1730                        .trim()
1731                        .is_empty()
1732            })
1733            .map(|(id, _)| id.as_str())
1734            .collect();
1735        assert!(
1736            offenders.is_empty(),
1737            "deprecated models missing a deprecation_note: {offenders:?}"
1738        );
1739    }
1740
1741    #[test]
1742    fn embedded_catalog_every_model_targets_a_registered_provider() {
1743        let config = default_config();
1744        let known: std::collections::BTreeSet<&str> =
1745            config.providers.keys().map(String::as_str).collect();
1746        let orphans: Vec<(&str, &str)> = config
1747            .models
1748            .iter()
1749            .filter(|(_, model)| !known.contains(model.provider.as_str()))
1750            .map(|(id, model)| (id.as_str(), model.provider.as_str()))
1751            .collect();
1752        assert!(
1753            orphans.is_empty(),
1754            "models reference unknown providers: {orphans:?}"
1755        );
1756    }
1757
1758    #[test]
1759    fn embedded_catalog_every_alias_targets_a_registered_provider() {
1760        let config = default_config();
1761        let known: std::collections::BTreeSet<&str> =
1762            config.providers.keys().map(String::as_str).collect();
1763        let orphans: Vec<(&str, &str)> = config
1764            .aliases
1765            .iter()
1766            .filter(|(_, alias)| !known.contains(alias.provider.as_str()))
1767            .map(|(name, alias)| (name.as_str(), alias.provider.as_str()))
1768            .collect();
1769        assert!(
1770            orphans.is_empty(),
1771            "aliases reference unknown providers: {orphans:?}"
1772        );
1773    }
1774
1775    #[test]
1776    fn embedded_catalog_every_qc_default_targets_a_known_model() {
1777        let config = default_config();
1778        let orphans: Vec<(&str, &str)> = config
1779            .qc_defaults
1780            .iter()
1781            .filter(|(_, model_id)| !config.models.contains_key(model_id.as_str()))
1782            .map(|(provider, model_id)| (provider.as_str(), model_id.as_str()))
1783            .collect();
1784        assert!(
1785            orphans.is_empty(),
1786            "qc_defaults reference unknown models: {orphans:?}"
1787        );
1788    }
1789
1790    #[test]
1791    fn embedded_catalog_pricing_rates_are_non_negative() {
1792        let config = default_config();
1793        for (id, model) in &config.models {
1794            let Some(pricing) = &model.pricing else {
1795                continue;
1796            };
1797            assert!(
1798                pricing.input_per_mtok >= 0.0 && pricing.output_per_mtok >= 0.0,
1799                "{id}: negative pricing — in={} out={}",
1800                pricing.input_per_mtok,
1801                pricing.output_per_mtok
1802            );
1803            if let Some(rate) = pricing.cache_read_per_mtok {
1804                assert!(rate >= 0.0, "{id}: negative cache_read rate {rate}");
1805            }
1806            if let Some(rate) = pricing.cache_write_per_mtok {
1807                assert!(rate >= 0.0, "{id}: negative cache_write rate {rate}");
1808            }
1809        }
1810    }
1811
1812    #[test]
1813    fn model_availability_parses_known_strings() {
1814        assert_eq!(
1815            ModelAvailability::parse("serverless"),
1816            Some(ModelAvailability::Serverless)
1817        );
1818        assert_eq!(
1819            ModelAvailability::parse("dedicated"),
1820            Some(ModelAvailability::Dedicated)
1821        );
1822        assert_eq!(
1823            ModelAvailability::parse("unknown"),
1824            Some(ModelAvailability::Unknown)
1825        );
1826        assert_eq!(ModelAvailability::parse("provisioned"), None);
1827        for value in [
1828            ModelAvailability::Serverless,
1829            ModelAvailability::Dedicated,
1830            ModelAvailability::Unknown,
1831        ] {
1832            assert_eq!(ModelAvailability::parse(value.as_str()), Some(value));
1833        }
1834    }
1835
1836    #[test]
1837    fn embedded_catalog_marks_together_dedicated_route_as_dedicated() {
1838        let config = default_config();
1839        let model = config
1840            .models
1841            .get("Qwen/Qwen3-Coder-Next-FP8")
1842            .expect("Together Qwen3 Coder Next FP8 is cataloged");
1843        assert_eq!(model.provider, "together");
1844        assert_eq!(model.availability, ModelAvailability::Dedicated);
1845    }
1846
1847    #[test]
1848    fn embedded_catalog_dedicated_models_are_not_targeted_by_tier_aliases() {
1849        // A dedicated-only model behind a tier alias would silently fail
1850        // every serverless caller; the catalog must keep those routes
1851        // separated.
1852        let config = default_config();
1853        let dedicated: std::collections::BTreeSet<(&str, &str)> = config
1854            .models
1855            .iter()
1856            .filter(|(_, model)| model.availability == ModelAvailability::Dedicated)
1857            .map(|(id, model)| (model.provider.as_str(), id.as_str()))
1858            .collect();
1859        for (name, alias) in &config.aliases {
1860            if matches!(
1861                name.as_str(),
1862                "frontier"
1863                    | "mid"
1864                    | "small"
1865                    | "tier/frontier"
1866                    | "tier/mid"
1867                    | "tier/small"
1868                    | "sonnet"
1869                    | "opus"
1870                    | "haiku"
1871            ) {
1872                assert!(
1873                    !dedicated.contains(&(alias.provider.as_str(), alias.id.as_str())),
1874                    "tier alias `{name}` targets dedicated-only route `{}/{}`",
1875                    alias.provider,
1876                    alias.id,
1877                );
1878            }
1879        }
1880    }
1881
1882    #[test]
1883    fn embedded_catalog_tier_aliases_resolve_to_active_models() {
1884        // The three canonical tier aliases (frontier / mid / small) MUST
1885        // resolve to non-deprecated catalog entries; a default that
1886        // routes the loop into a sunsetted model is a release blocker.
1887        for alias in ["frontier", "mid", "small"] {
1888            let (model, _provider) = resolve_tier_model(alias, None)
1889                .unwrap_or_else(|| panic!("tier alias `{alias}` must resolve"));
1890            let entry = model_catalog_entry(&model).unwrap_or_else(|| {
1891                panic!("tier alias `{alias}` -> `{model}` must be a registered catalog entry")
1892            });
1893            assert!(
1894                !entry.deprecated,
1895                "tier alias `{alias}` resolves to deprecated model `{model}` ({:?})",
1896                entry.deprecation_note
1897            );
1898        }
1899    }
1900
1901    #[test]
1902    fn opus_alias_tracks_claude_opus_4_8_with_fast_mode() {
1903        // The `opus` alias must follow the newest Opus release, and that
1904        // release advertises its (off-by-default) fast-mode tier.
1905        let (model, provider) = resolve_model("opus");
1906        assert_eq!(model, "claude-opus-4-8");
1907        assert_eq!(provider.as_deref(), Some("anthropic"));
1908
1909        let opus48 = model_catalog_entry("claude-opus-4-8").expect("opus 4.8 catalog entry");
1910        assert!(!opus48.deprecated, "newest Opus must not be deprecated");
1911        let fast = opus48.fast_mode.expect("opus 4.8 advertises fast mode");
1912        assert_eq!(fast.param, "speed");
1913        assert_eq!(fast.value, "fast");
1914        assert_eq!(fast.status.as_deref(), Some("research_preview"));
1915        let fast_pricing = fast.pricing.expect("fast mode carries premium pricing");
1916        let standard = opus48.pricing.expect("opus 4.8 standard pricing");
1917        assert!(
1918            fast_pricing.input_per_mtok > standard.input_per_mtok,
1919            "fast mode must be premium-priced relative to standard"
1920        );
1921    }
1922
1923    #[test]
1924    fn superseded_opus_models_point_at_claude_opus_4_8() {
1925        // Earlier Opus rows are deprecated and carry a structured
1926        // `superseded_by` pointer to the current flagship.
1927        for model in ["claude-opus-4-7", "claude-opus-4-6"] {
1928            let entry =
1929                model_catalog_entry(model).unwrap_or_else(|| panic!("{model} catalog entry"));
1930            assert!(entry.deprecated, "{model} should be deprecated");
1931            assert_eq!(
1932                entry.superseded_by.as_deref(),
1933                Some("claude-opus-4-8"),
1934                "{model} should be superseded by claude-opus-4-8"
1935            );
1936        }
1937    }
1938
1939    #[test]
1940    fn gpt_5_5_fast_mode_rides_service_tier() {
1941        // Fast mode is provider-agnostic: OpenAI exposes it through the
1942        // `service_tier` knob rather than Anthropic's `speed`.
1943        let entry = model_catalog_entry("gpt-5.5").expect("gpt-5.5 catalog entry");
1944        let fast = entry.fast_mode.expect("gpt-5.5 advertises a fast tier");
1945        assert_eq!(fast.param, "service_tier");
1946        assert_eq!(fast.status.as_deref(), Some("ga"));
1947    }
1948}