harn_vm/
llm_config.rs

1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::{BTreeMap, BTreeSet};
4use std::sync::{OnceLock, RwLock};
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8static RUNTIME_CATALOG_OVERLAY: OnceLock<RwLock<Option<ProvidersConfig>>> = OnceLock::new();
9
10thread_local! {
11    /// Thread-local provider config overlays installed by the CLI after it
12    /// reads the nearest `harn.toml` plus any installed package manifests.
13    /// Kept thread-local so tests and multi-VM hosts can scope extensions to
14    /// the current run without mutating the process-wide default config.
15    static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
16}
17
18#[derive(Debug, Clone, Deserialize, Default)]
19pub struct ProvidersConfig {
20    #[serde(default)]
21    pub default_provider: Option<String>,
22    #[serde(default)]
23    pub providers: BTreeMap<String, ProviderDef>,
24    #[serde(default)]
25    pub aliases: BTreeMap<String, AliasDef>,
26    #[serde(default)]
27    pub alias_tool_calling: BTreeMap<String, AliasToolCallingDef>,
28    #[serde(default)]
29    pub models: BTreeMap<String, ModelDef>,
30    #[serde(default)]
31    pub qc_defaults: BTreeMap<String, String>,
32    #[serde(default)]
33    pub inference_rules: Vec<InferenceRule>,
34    #[serde(default)]
35    pub tier_rules: Vec<TierRule>,
36    #[serde(default)]
37    pub tier_defaults: TierDefaults,
38    #[serde(default)]
39    pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
40    #[serde(default)]
41    pub model_roles: BTreeMap<String, BTreeMap<String, toml::Value>>,
42}
43
44impl ProvidersConfig {
45    pub fn is_empty(&self) -> bool {
46        self.default_provider.is_none()
47            && self.providers.is_empty()
48            && self.aliases.is_empty()
49            && self.alias_tool_calling.is_empty()
50            && self.models.is_empty()
51            && self.qc_defaults.is_empty()
52            && self.inference_rules.is_empty()
53            && self.tier_rules.is_empty()
54            && self.model_defaults.is_empty()
55            && self.model_roles.is_empty()
56            && self.tier_defaults.default == default_mid()
57    }
58
59    pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
60        for (name, provider) in &overlay.providers {
61            match self.providers.get_mut(name) {
62                Some(existing) => existing.merge_from(provider),
63                None => {
64                    self.providers.insert(name.clone(), provider.clone());
65                }
66            }
67        }
68        self.aliases.extend(overlay.aliases.clone());
69        self.alias_tool_calling
70            .extend(overlay.alias_tool_calling.clone());
71        self.models.extend(overlay.models.clone());
72        self.qc_defaults.extend(overlay.qc_defaults.clone());
73
74        if overlay.default_provider.is_some() {
75            self.default_provider = overlay.default_provider.clone();
76        }
77
78        if !overlay.inference_rules.is_empty() {
79            let mut merged = overlay.inference_rules.clone();
80            merged.extend(self.inference_rules.clone());
81            self.inference_rules = merged;
82        }
83
84        if !overlay.tier_rules.is_empty() {
85            let mut merged = overlay.tier_rules.clone();
86            merged.extend(self.tier_rules.clone());
87            self.tier_rules = merged;
88        }
89
90        if overlay.tier_defaults.default != default_mid() {
91            self.tier_defaults = overlay.tier_defaults.clone();
92        }
93
94        for (pattern, defaults) in &overlay.model_defaults {
95            self.model_defaults
96                .entry(pattern.clone())
97                .or_default()
98                .extend(defaults.clone());
99        }
100
101        for (role, defaults) in &overlay.model_roles {
102            self.model_roles
103                .entry(role.clone())
104                .or_default()
105                .extend(defaults.clone());
106        }
107    }
108}
109
110#[derive(Debug, Clone)]
111pub struct ProviderDef {
112    pub display_name: Option<String>,
113    pub icon: Option<String>,
114    /// Provider protocol. Omitted providers use Harn's normal HTTP provider
115    /// path; `acp` launches an Agent Client Protocol server and drives it as
116    /// an agent-backed provider.
117    pub protocol: Option<String>,
118    pub base_url: String,
119    pub base_url_env: Option<String>,
120    pub auth_style: String,
121    pub auth_header: Option<String>,
122    pub auth_env: AuthEnv,
123    pub extra_headers: BTreeMap<String, String>,
124    pub chat_endpoint: String,
125    pub completion_endpoint: Option<String>,
126    pub command: Option<String>,
127    pub args: Vec<String>,
128    pub env: BTreeMap<String, String>,
129    pub cwd: Option<String>,
130    pub mcp_servers: Vec<serde_json::Value>,
131    pub healthcheck: Option<HealthcheckDef>,
132    pub features: Vec<String>,
133    /// Fallback provider name to try if this provider fails.
134    pub fallback: Option<String>,
135    /// Number of retries before falling back (default 0).
136    pub retry_count: Option<u32>,
137    /// Delay between retries in milliseconds (default 1000).
138    pub retry_delay_ms: Option<u64>,
139    /// Maximum requests per minute. None = unlimited.
140    pub rpm: Option<u32>,
141    /// Provider/catalog pricing in USD per 1k input tokens.
142    pub cost_per_1k_in: Option<f64>,
143    /// Provider/catalog pricing in USD per 1k output tokens.
144    pub cost_per_1k_out: Option<f64>,
145    /// Observed or configured p50 latency in milliseconds.
146    pub latency_p50_ms: Option<u64>,
147    #[doc(hidden)]
148    pub auth_style_explicit: bool,
149}
150
151#[derive(Debug, Clone, Deserialize)]
152struct ProviderDefWire {
153    #[serde(default)]
154    display_name: Option<String>,
155    #[serde(default)]
156    icon: Option<String>,
157    #[serde(default)]
158    protocol: Option<String>,
159    #[serde(default)]
160    base_url: String,
161    #[serde(default)]
162    base_url_env: Option<String>,
163    #[serde(default)]
164    auth_style: Option<String>,
165    #[serde(default)]
166    auth_header: Option<String>,
167    #[serde(default)]
168    auth_env: AuthEnv,
169    #[serde(default)]
170    extra_headers: BTreeMap<String, String>,
171    #[serde(default)]
172    chat_endpoint: String,
173    #[serde(default)]
174    completion_endpoint: Option<String>,
175    #[serde(default)]
176    command: Option<String>,
177    #[serde(default)]
178    args: Vec<String>,
179    #[serde(default)]
180    env: BTreeMap<String, String>,
181    #[serde(default)]
182    cwd: Option<String>,
183    #[serde(default)]
184    mcp_servers: Vec<serde_json::Value>,
185    #[serde(default)]
186    healthcheck: Option<HealthcheckDef>,
187    #[serde(default)]
188    features: Vec<String>,
189    #[serde(default)]
190    fallback: Option<String>,
191    #[serde(default)]
192    retry_count: Option<u32>,
193    #[serde(default)]
194    retry_delay_ms: Option<u64>,
195    #[serde(default)]
196    rpm: Option<u32>,
197    #[serde(default)]
198    cost_per_1k_in: Option<f64>,
199    #[serde(default)]
200    cost_per_1k_out: Option<f64>,
201    #[serde(default)]
202    latency_p50_ms: Option<u64>,
203}
204
205impl<'de> Deserialize<'de> for ProviderDef {
206    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
207    where
208        D: serde::Deserializer<'de>,
209    {
210        let wire = ProviderDefWire::deserialize(deserializer)?;
211        let auth_style_explicit = wire.auth_style.is_some();
212        Ok(Self {
213            display_name: wire.display_name,
214            icon: wire.icon,
215            protocol: wire.protocol,
216            base_url: wire.base_url,
217            base_url_env: wire.base_url_env,
218            auth_style: wire.auth_style.unwrap_or_else(default_bearer),
219            auth_header: wire.auth_header,
220            auth_env: wire.auth_env,
221            extra_headers: wire.extra_headers,
222            chat_endpoint: wire.chat_endpoint,
223            completion_endpoint: wire.completion_endpoint,
224            command: wire.command,
225            args: wire.args,
226            env: wire.env,
227            cwd: wire.cwd,
228            mcp_servers: wire.mcp_servers,
229            healthcheck: wire.healthcheck,
230            features: wire.features,
231            fallback: wire.fallback,
232            retry_count: wire.retry_count,
233            retry_delay_ms: wire.retry_delay_ms,
234            rpm: wire.rpm,
235            cost_per_1k_in: wire.cost_per_1k_in,
236            cost_per_1k_out: wire.cost_per_1k_out,
237            latency_p50_ms: wire.latency_p50_ms,
238            auth_style_explicit,
239        })
240    }
241}
242
243impl Default for ProviderDef {
244    fn default() -> Self {
245        Self {
246            display_name: None,
247            icon: None,
248            protocol: None,
249            base_url: String::new(),
250            base_url_env: None,
251            auth_style: default_bearer(),
252            auth_header: None,
253            auth_env: AuthEnv::None,
254            extra_headers: BTreeMap::new(),
255            chat_endpoint: String::new(),
256            completion_endpoint: None,
257            command: None,
258            args: Vec::new(),
259            env: BTreeMap::new(),
260            cwd: None,
261            mcp_servers: Vec::new(),
262            healthcheck: None,
263            features: Vec::new(),
264            fallback: None,
265            retry_count: None,
266            retry_delay_ms: None,
267            rpm: None,
268            cost_per_1k_in: None,
269            cost_per_1k_out: None,
270            latency_p50_ms: None,
271            auth_style_explicit: false,
272        }
273    }
274}
275
276impl ProviderDef {
277    fn merge_from(&mut self, overlay: &ProviderDef) {
278        merge_option(&mut self.display_name, &overlay.display_name);
279        merge_option(&mut self.icon, &overlay.icon);
280        merge_option(&mut self.protocol, &overlay.protocol);
281        merge_string(&mut self.base_url, &overlay.base_url);
282        merge_option(&mut self.base_url_env, &overlay.base_url_env);
283        let overlay_uses_default_auth_style = overlay.auth_style == default_bearer();
284        if overlay.auth_style_explicit
285            || !overlay_uses_default_auth_style
286            || self.auth_style == default_bearer()
287        {
288            self.auth_style = overlay.auth_style.clone();
289            self.auth_style_explicit |=
290                overlay.auth_style_explicit || !overlay_uses_default_auth_style;
291        }
292        merge_option(&mut self.auth_header, &overlay.auth_header);
293        if !overlay.auth_env.is_none() {
294            self.auth_env = overlay.auth_env.clone();
295        }
296        self.extra_headers.extend(overlay.extra_headers.clone());
297        merge_string(&mut self.chat_endpoint, &overlay.chat_endpoint);
298        merge_option(&mut self.completion_endpoint, &overlay.completion_endpoint);
299        merge_option(&mut self.command, &overlay.command);
300        merge_vec(&mut self.args, &overlay.args);
301        self.env.extend(overlay.env.clone());
302        merge_option(&mut self.cwd, &overlay.cwd);
303        merge_vec(&mut self.mcp_servers, &overlay.mcp_servers);
304        merge_option(&mut self.healthcheck, &overlay.healthcheck);
305        merge_vec(&mut self.features, &overlay.features);
306        merge_option(&mut self.fallback, &overlay.fallback);
307        merge_option(&mut self.retry_count, &overlay.retry_count);
308        merge_option(&mut self.retry_delay_ms, &overlay.retry_delay_ms);
309        merge_option(&mut self.rpm, &overlay.rpm);
310        merge_option(&mut self.cost_per_1k_in, &overlay.cost_per_1k_in);
311        merge_option(&mut self.cost_per_1k_out, &overlay.cost_per_1k_out);
312        merge_option(&mut self.latency_p50_ms, &overlay.latency_p50_ms);
313    }
314}
315
316fn merge_option<T: Clone>(base: &mut Option<T>, overlay: &Option<T>) {
317    if overlay.is_some() {
318        *base = overlay.clone();
319    }
320}
321
322fn merge_string(base: &mut String, overlay: &str) {
323    if !overlay.is_empty() {
324        *base = overlay.to_string();
325    }
326}
327
328fn merge_vec<T: Clone>(base: &mut Vec<T>, overlay: &[T]) {
329    if !overlay.is_empty() {
330        *base = overlay.to_vec();
331    }
332}
333
334fn default_bearer() -> String {
335    "bearer".to_string()
336}
337
338/// Auth env var name(s) for the provider. Can be a single string or an array
339/// (tried in order until one is set).
340#[derive(Debug, Clone, Deserialize, Default)]
341#[serde(untagged)]
342pub enum AuthEnv {
343    #[default]
344    None,
345    Single(String),
346    Multiple(Vec<String>),
347}
348
349impl AuthEnv {
350    fn is_none(&self) -> bool {
351        matches!(self, AuthEnv::None)
352    }
353}
354
355#[derive(Debug, Clone, Deserialize)]
356pub struct HealthcheckDef {
357    pub method: String,
358    #[serde(default)]
359    pub path: Option<String>,
360    #[serde(default)]
361    pub url: Option<String>,
362    #[serde(default)]
363    pub body: Option<String>,
364}
365
366#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
367pub struct AliasDef {
368    pub id: String,
369    pub provider: String,
370    /// Per-model tool format override: "native" or "text". When set, this
371    /// takes precedence over the provider-level default. Models with strong
372    /// tool-calling fine-tuning (Kimi-K2.5, GPT-4o) should use "native";
373    /// models better served by text-based tool calling use "text".
374    #[serde(default)]
375    pub tool_format: Option<String>,
376}
377
378#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
379pub struct AliasToolCallingDef {
380    #[serde(default)]
381    #[serde(skip_serializing_if = "Option::is_none")]
382    pub native: Option<String>,
383    #[serde(default)]
384    #[serde(skip_serializing_if = "Option::is_none")]
385    pub text: Option<String>,
386    #[serde(default)]
387    #[serde(skip_serializing_if = "Option::is_none")]
388    pub streaming_native: Option<String>,
389    #[serde(default)]
390    #[serde(skip_serializing_if = "Option::is_none")]
391    pub fallback_mode: Option<String>,
392    #[serde(default)]
393    #[serde(skip_serializing_if = "Option::is_none")]
394    pub failure_reason: Option<String>,
395    #[serde(default)]
396    #[serde(skip_serializing_if = "Option::is_none")]
397    pub last_probe_at: Option<String>,
398}
399
400#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
401pub struct ModelPricing {
402    pub input_per_mtok: f64,
403    pub output_per_mtok: f64,
404    #[serde(default)]
405    pub cache_read_per_mtok: Option<f64>,
406    #[serde(default)]
407    pub cache_write_per_mtok: Option<f64>,
408}
409
410/// Optional accelerated-serving ("fast mode") tier for a model. Off by
411/// default: its presence only *describes* that the provider offers a
412/// faster, premium-priced serving path running the same weights — callers
413/// must explicitly opt in via the provider's request knob, so nothing here
414/// changes default behavior. Deliberately provider-agnostic: Anthropic
415/// exposes the tier as `speed = "fast"` (beta-gated), while OpenAI uses
416/// `service_tier = "fast"` / `"priority"`. Premium pricing is stored as
417/// absolute per-MTok rates rather than a single multiplier because
418/// providers price the tier asymmetrically (Anthropic Opus 4.8 is 2x
419/// standard; Opus 4.6/4.7 fast mode is 6x).
420#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
421pub struct FastModeDef {
422    /// Request field that opts into the fast tier (e.g. "speed" for
423    /// Anthropic, "service_tier" for OpenAI).
424    pub param: String,
425    /// Value to send on `param` (e.g. "fast", "priority").
426    pub value: String,
427    /// Provider beta/feature header required to use the tier, if any
428    /// (e.g. Anthropic "fast-mode-2026-02-01").
429    #[serde(default)]
430    pub beta_header: Option<String>,
431    /// Output-tokens-per-second speedup vs standard serving (e.g. 2.5).
432    #[serde(default)]
433    pub otps_speedup: Option<f64>,
434    /// Lifecycle of the fast tier: "ga" | "research_preview" |
435    /// "deprecated". None when unspecified.
436    #[serde(default)]
437    pub status: Option<String>,
438    /// Premium pricing charged while the fast tier is active (absolute
439    /// per-MTok rates, not a multiplier on standard pricing).
440    #[serde(default)]
441    pub pricing: Option<ModelPricing>,
442    /// Free-text note: constraints, deprecation timeline, etc.
443    #[serde(default)]
444    pub note: Option<String>,
445}
446
447#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
448pub struct ModelDef {
449    pub name: String,
450    pub provider: String,
451    pub context_window: u64,
452    #[serde(default)]
453    pub runtime_context_window: Option<u64>,
454    #[serde(default)]
455    pub stream_timeout: Option<f64>,
456    #[serde(default)]
457    pub capabilities: Vec<String>,
458    #[serde(default)]
459    pub pricing: Option<ModelPricing>,
460    #[serde(default)]
461    pub deprecated: bool,
462    #[serde(default)]
463    pub deprecation_note: Option<String>,
464    /// Structured replacement pointer: the catalog id of the model that
465    /// supersedes this one (e.g. an older Opus row points at the newest
466    /// Opus). Lets release tooling express "migrate to X" in a
467    /// machine-readable way instead of burying it in `deprecation_note`
468    /// free text. A model may be superseded without being `deprecated`
469    /// (a newer option exists but this one is still fully supported);
470    /// pair it with `deprecated = true` once a sunset is announced.
471    #[serde(default)]
472    pub superseded_by: Option<String>,
473    /// Accelerated-serving ("fast mode") tier metadata, when the model's
474    /// provider offers one. Off by default — see [`FastModeDef`]. None for
475    /// models with no faster serving path.
476    #[serde(default)]
477    pub fast_mode: Option<FastModeDef>,
478    #[serde(default)]
479    pub quality_tags: Vec<String>,
480    /// Whether the model can be reached over a normal API-key serverless call,
481    /// or only via a dedicated/provisioned endpoint that the caller must spin
482    /// up out-of-band. Providers like Together list dedicated-only routes
483    /// alongside serverless ones in `/v1/models`, so this metadata lets clients
484    /// avoid presenting them as one-click options.
485    #[serde(default)]
486    pub availability: ModelAvailability,
487    /// Popular-consensus tier label. Enum-typed string: "small" | "mid" |
488    /// "frontier" | "reasoning". Self-declared per model (no pattern-matched
489    /// rule table) so the catalog is the single source of truth. When None
490    /// the resolver returns the catalog default ("mid"). Use the richer
491    /// `strengths` + `benchmarks` fields to pick models for specific
492    /// workloads — `tier` exists only as a coarse popular-consensus shortcut.
493    #[serde(default)]
494    pub tier: Option<String>,
495    /// True when the model weights are downloadable / self-hostable
496    /// (open-weight / open-source license, regardless of commercial-use
497    /// restrictions). False when weights are closed (Anthropic, OpenAI,
498    /// Google, etc.). None when the catalog row predates the migration.
499    #[serde(default)]
500    pub open_weight: Option<bool>,
501    /// Workload-shaped strength tags. Conventional values include
502    /// `coding`, `summarization`, `long_context`, `tool_use`, `reasoning`,
503    /// `vision`, `speed`, `cheap`, `agentic`. Selectors should treat
504    /// missing entries as "no claim" rather than "no strength."
505    #[serde(default)]
506    pub strengths: Vec<String>,
507    /// Public benchmark numbers, keyed by a snake_case identifier
508    /// (`swe_bench_verified`, `humaneval`, `aa_intelligence_index`, etc.).
509    /// Values are the raw published scores. The selector layer is free
510    /// to normalize per benchmark; the catalog records the canonical
511    /// score so future readers can audit the source.
512    #[serde(default)]
513    pub benchmarks: BTreeMap<String, f64>,
514    /// Normalized model-family token used as a diversity signal for
515    /// reviewer selection. Distinct from provider: hosted wrappers should
516    /// keep the underlying family (for example OpenRouter-hosted Claude
517    /// still uses `anthropic-claude`).
518    #[serde(default)]
519    pub family: Option<String>,
520    /// Narrower family lineage used by option-pack calibration.
521    #[serde(default)]
522    pub lineage: Option<String>,
523    /// Preferred reviewer families for critique/review workloads.
524    #[serde(default)]
525    pub complementary_with: Vec<String>,
526    /// Author families, lineages, model ids, or provider/model selectors
527    /// this row should not review.
528    #[serde(default)]
529    pub avoid_as_reviewer_for: Vec<String>,
530}
531
532#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Default)]
533#[serde(rename_all = "snake_case")]
534pub enum ModelAvailability {
535    /// Reachable through the provider's normal API-key path with no extra
536    /// setup. The default for cataloged hosted/local models: by cataloging a
537    /// row we are claiming the route works out of the box.
538    #[default]
539    Serverless,
540    /// Requires the caller to provision a dedicated endpoint before requests
541    /// will succeed. The catalog row exists for selection/pricing UI, but
542    /// hosts must not auto-route to it.
543    Dedicated,
544    /// Availability is not known ahead of time. Used for routes that were
545    /// surfaced dynamically (e.g. through `/v1/models`) without a static
546    /// claim from Harn or the user.
547    Unknown,
548}
549
550impl ModelAvailability {
551    pub fn as_str(self) -> &'static str {
552        match self {
553            Self::Serverless => "serverless",
554            Self::Dedicated => "dedicated",
555            Self::Unknown => "unknown",
556        }
557    }
558
559    pub fn parse(value: &str) -> Option<Self> {
560        match value {
561            "serverless" => Some(Self::Serverless),
562            "dedicated" => Some(Self::Dedicated),
563            "unknown" => Some(Self::Unknown),
564            _ => None,
565        }
566    }
567}
568
569#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
570pub struct ResolvedModel {
571    pub id: String,
572    pub provider: String,
573    pub alias: Option<String>,
574    pub tool_format: String,
575    pub tier: String,
576    pub family: String,
577    pub lineage: String,
578}
579
580#[derive(Debug, Clone, PartialEq)]
581pub struct ComplementaryReviewerOptions {
582    pub author_model: String,
583    pub author_provider: Option<String>,
584    pub intent: ComplementaryReviewerIntent,
585    pub max_price_multiplier: Option<f64>,
586}
587
588#[derive(Debug, Clone, Copy, PartialEq, Eq)]
589pub enum ComplementaryReviewerIntent {
590    Review,
591    Critique,
592    PlanReview,
593}
594
595impl ComplementaryReviewerIntent {
596    pub fn parse(value: &str) -> Option<Self> {
597        match value {
598            "review" => Some(Self::Review),
599            "critique" => Some(Self::Critique),
600            "plan_review" => Some(Self::PlanReview),
601            _ => None,
602        }
603    }
604
605    pub fn as_str(self) -> &'static str {
606        match self {
607            Self::Review => "review",
608            Self::Critique => "critique",
609            Self::PlanReview => "plan_review",
610        }
611    }
612}
613
614#[derive(Debug, Clone, Serialize, PartialEq)]
615pub struct ComplementaryReviewerSelection {
616    pub intent: String,
617    pub author: ComplementaryModelIdentity,
618    pub reviewer: ComplementaryModelIdentity,
619    pub fallback: bool,
620    pub fallback_reason: Option<String>,
621    pub reason: String,
622    pub estimated_incremental_cost: Option<ComplementaryCostEstimate>,
623}
624
625#[derive(Debug, Clone, Serialize, PartialEq)]
626pub struct ComplementaryModelIdentity {
627    pub id: String,
628    pub provider: String,
629    pub family: String,
630    pub lineage: String,
631    pub tier: String,
632    #[serde(skip_serializing_if = "Option::is_none")]
633    pub pricing: Option<ModelPricing>,
634}
635
636#[derive(Debug, Clone, Serialize, PartialEq)]
637pub struct ComplementaryCostEstimate {
638    pub input_per_mtok: f64,
639    pub output_per_mtok: f64,
640    pub total_per_mtok: f64,
641    #[serde(skip_serializing_if = "Option::is_none")]
642    pub multiplier_vs_author: Option<f64>,
643}
644
645#[derive(Debug, Clone, Deserialize)]
646pub struct InferenceRule {
647    #[serde(default)]
648    pub pattern: Option<String>,
649    #[serde(default)]
650    pub contains: Option<String>,
651    #[serde(default)]
652    pub exact: Option<String>,
653    pub provider: String,
654}
655
656#[derive(Debug, Clone, Deserialize)]
657pub struct TierRule {
658    #[serde(default)]
659    pub pattern: Option<String>,
660    #[serde(default)]
661    pub contains: Option<String>,
662    #[serde(default)]
663    pub exact: Option<String>,
664    pub tier: String,
665}
666
667#[derive(Debug, Clone, Deserialize)]
668pub struct TierDefaults {
669    #[serde(default = "default_mid")]
670    pub default: String,
671}
672
673impl Default for TierDefaults {
674    fn default() -> Self {
675        Self {
676            default: default_mid(),
677        }
678    }
679}
680
681fn default_mid() -> String {
682    "mid".to_string()
683}
684
685/// Load and cache the providers config. Called once at VM startup.
686pub fn load_config() -> &'static ProvidersConfig {
687    CONFIG.get_or_init(|| {
688        let mut config = default_config();
689        let verbose_config_logging = matches!(
690            std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
691            Some("1" | "true" | "TRUE" | "yes" | "YES")
692        ) || matches!(
693            std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
694            Some("1" | "true" | "TRUE" | "yes" | "YES")
695        );
696        if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
697            if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
698                config.merge_from(&overlay);
699                let _ = CONFIG_PATH.set(path);
700                return config;
701            }
702        }
703        if should_load_home_config() {
704            if let Some(home) = dirs_or_home() {
705                let path = format!("{home}/.config/harn/providers.toml");
706                if let Some(overlay) = read_external_config(&path, false) {
707                    config.merge_from(&overlay);
708                    let _ = CONFIG_PATH.set(path);
709                    return config;
710                }
711            }
712        }
713        config
714    })
715}
716
717fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
718    match std::fs::read_to_string(path) {
719        Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
720            Ok(config) => {
721                if verbose {
722                    eprintln!(
723                        "[llm_config] Loaded {} providers, {} aliases from {}",
724                        config.providers.len(),
725                        config.aliases.len(),
726                        path
727                    );
728                }
729                Some(config)
730            }
731            Err(error) => {
732                eprintln!("[llm_config] TOML parse error in {path}: {error}");
733                None
734            }
735        },
736        Err(error) => {
737            if verbose {
738                eprintln!("[llm_config] Cannot read {path}: {error}");
739            }
740            None
741        }
742    }
743}
744
745fn should_load_home_config() -> bool {
746    // Unit tests should cover embedded defaults plus explicit overlays, not
747    // whichever provider file happens to exist on the developer machine.
748    !cfg!(test)
749}
750
751/// Parse a provider/model catalog overlay in the same shape as
752/// `providers.toml` or `[llm]` package-manifest sections.
753pub fn parse_config_toml(src: &str) -> Result<ProvidersConfig, toml::de::Error> {
754    toml::from_str::<ProvidersConfig>(src)
755}
756
757/// Returns the filesystem path of the currently-loaded providers config, if
758/// any. Returns `None` when built-in defaults are active.
759pub fn loaded_config_path() -> Option<std::path::PathBuf> {
760    // Force lazy init so CONFIG_PATH is populated if a file was loaded.
761    let _ = load_config();
762    CONFIG_PATH.get().map(std::path::PathBuf::from)
763}
764
765/// Install per-run provider config overlays. The overlay uses the same shape as
766/// `providers.toml`, but lives under `[llm]` in `harn.toml` and package
767/// manifests. Passing `None` clears the overlay.
768pub fn set_user_overrides(config: Option<ProvidersConfig>) {
769    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
770}
771
772/// Clear per-run provider config overlays.
773pub fn clear_user_overrides() {
774    set_user_overrides(None);
775}
776
777/// Install the process-wide runtime catalog overlay used by
778/// `provider_catalog::refresh_runtime_catalog`. Per-run user overlays still
779/// merge last so project-local provider config can override hosted catalog
780/// updates.
781pub fn set_runtime_catalog_overlay(config: Option<ProvidersConfig>) {
782    *runtime_catalog_overlay()
783        .write()
784        .expect("runtime catalog overlay poisoned") = config;
785}
786
787pub fn clear_runtime_catalog_overlay() {
788    set_runtime_catalog_overlay(None);
789}
790
791pub(crate) fn effective_config() -> ProvidersConfig {
792    let user_overrides = USER_OVERRIDES.with(|cell| cell.borrow().clone());
793    effective_config_with_user_overrides(user_overrides.as_ref())
794}
795
796pub(crate) fn effective_config_with_user_overrides(
797    user_overrides: Option<&ProvidersConfig>,
798) -> ProvidersConfig {
799    let mut merged = load_config().clone();
800    if let Some(overlay) = runtime_catalog_overlay()
801        .read()
802        .expect("runtime catalog overlay poisoned")
803        .as_ref()
804    {
805        merged.merge_from(overlay);
806    }
807    if let Some(overlay) = user_overrides {
808        merged.merge_from(overlay);
809    }
810    merged
811}
812
813fn runtime_catalog_overlay() -> &'static RwLock<Option<ProvidersConfig>> {
814    RUNTIME_CATALOG_OVERLAY.get_or_init(|| RwLock::new(None))
815}
816
817/// Resolve a model alias to (model_id, provider_name).
818pub fn resolve_model(alias: &str) -> (String, Option<String>) {
819    let config = effective_config();
820    if let Some(a) = config.aliases.get(alias) {
821        return (a.id.clone(), Some(a.provider.clone()));
822    }
823    (normalize_model_id(alias), None)
824}
825
826/// Strip host/provider selector prefixes that identify transport, not the
827/// provider-native model id. This mirrors Burin's existing normalization so
828/// `ollama:qwen3:30b` reaches Ollama as `qwen3:30b` instead of an invalid
829/// model named `ollama`. Cerebras follows the same convention but uses a
830/// slash separator (`cerebras/gpt-oss-120b`) because its own /v1/models
831/// endpoint returns bare names that overlap OpenAI's families.
832pub fn normalize_model_id(raw: &str) -> String {
833    for prefix in PROVIDER_SELECTOR_PREFIXES {
834        if let Some(stripped) = raw.strip_prefix(prefix) {
835            return stripped.to_string();
836        }
837    }
838    raw.to_string()
839}
840
841const PROVIDER_SELECTOR_PREFIXES: &[&str] =
842    &["ollama:", "local:", "huggingface:", "hf:", "cerebras/"];
843
844/// Resolve an alias or selector into the complete catalog identity hosts need:
845/// provider inference, prefix-normalized model id, default tool format, and tier.
846pub fn resolve_model_info(selector: &str) -> ResolvedModel {
847    let config = effective_config();
848    if let Some(alias) = config.aliases.get(selector) {
849        let id = alias.id.clone();
850        let provider = alias.provider.clone();
851        let tool_format = alias
852            .tool_format
853            .clone()
854            .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
855        return ResolvedModel {
856            tier: model_tier_with_config(&config, &id),
857            family: model_family_with_config(&config, &provider, &id),
858            lineage: model_lineage_with_config(&config, &provider, &id),
859            id,
860            provider,
861            alias: Some(selector.to_string()),
862            tool_format,
863        };
864    }
865
866    let id = normalize_model_id(selector);
867    let inference = infer_provider_with_config(&config, selector);
868    let source = inference.source;
869    let provider = inference.provider;
870    let tool_format = default_tool_format_with_config(&config, &id, &provider);
871    let tier = model_tier_with_config(&config, &id);
872    let family = model_family_with_inference_source(&config, &provider, &id, source);
873    let lineage = model_lineage_with_inference_source(&config, &provider, &id, source);
874    ResolvedModel {
875        id,
876        provider,
877        alias: None,
878        tool_format,
879        tier,
880        family,
881        lineage,
882    }
883}
884
885/// Infer provider from a model ID using inference rules.
886pub fn infer_provider(model_id: &str) -> String {
887    infer_provider_detail(model_id).provider
888}
889
890/// Infer provider from a model ID and retain whether the configured default was used.
891pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
892    let config = effective_config();
893    infer_provider_with_config(&config, model_id)
894}
895
896fn infer_provider_with_config(
897    config: &ProvidersConfig,
898    model_id: &str,
899) -> crate::llm::provider::ProviderInference {
900    if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
901        return crate::llm::provider::ProviderInference::builtin("ollama");
902    }
903    if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
904        return crate::llm::provider::ProviderInference::builtin("huggingface");
905    }
906    // Exact catalog rows are the most authoritative declaration of where
907    // a model is hosted: any pattern-based inference rule is necessarily
908    // less specific than `[models."<id>"].provider = "<name>"`. Catalogs
909    // include user overlays, so users can still re-home a model by
910    // setting a catalog entry in their own providers.toml.
911    let normalized_id = normalize_model_id(model_id);
912    if let Some(model) = config
913        .models
914        .get(model_id)
915        .or_else(|| config.models.get(&normalized_id))
916    {
917        return crate::llm::provider::ProviderInference::builtin(model.provider.clone());
918    }
919    for rule in &config.inference_rules {
920        if let Some(exact) = &rule.exact {
921            if model_id == exact {
922                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
923            }
924        }
925        if let Some(pattern) = &rule.pattern {
926            if glob_match(pattern, model_id) {
927                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
928            }
929        }
930        if let Some(substr) = &rule.contains {
931            if model_id.contains(substr.as_str()) {
932                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
933            }
934        }
935    }
936    crate::llm::provider::infer_provider_from_model_id(
937        model_id,
938        &default_provider_with_config(config),
939    )
940}
941
942pub fn default_provider() -> String {
943    let config = effective_config();
944    default_provider_with_config(&config)
945}
946
947fn default_provider_with_config(config: &ProvidersConfig) -> String {
948    std::env::var("HARN_DEFAULT_PROVIDER")
949        .ok()
950        .map(|value| value.trim().to_string())
951        .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
952        .or_else(|| {
953            config
954                .default_provider
955                .as_deref()
956                .map(str::trim)
957                .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
958                .map(str::to_string)
959        })
960        .unwrap_or_else(|| "anthropic".to_string())
961}
962
963/// Get model tier ("small", "mid", "frontier").
964pub fn model_tier(model_id: &str) -> String {
965    let config = effective_config();
966    model_tier_with_config(&config, model_id)
967}
968
969pub(crate) fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
970    // Per-model self-declared tier wins. This is the only path.
971    if let Some(model) = config.models.get(model_id) {
972        if let Some(tier) = model.tier.as_deref() {
973            let trimmed = tier.trim();
974            if !trimmed.is_empty() {
975                return trimmed.to_string();
976            }
977        }
978    }
979    // Legacy pattern-rules: still consulted while we finish migrating the
980    // long tail of models to per-row `tier = "..."`. Newly added rows
981    // should set `tier` directly; the rule table is a fallback only.
982    for rule in &config.tier_rules {
983        if let Some(exact) = &rule.exact {
984            if model_id == exact {
985                return rule.tier.clone();
986            }
987        }
988        if let Some(pattern) = &rule.pattern {
989            if glob_match(pattern, model_id) {
990                return rule.tier.clone();
991            }
992        }
993        if let Some(substr) = &rule.contains {
994            if model_id.contains(substr.as_str()) {
995                return rule.tier.clone();
996            }
997        }
998    }
999    config.tier_defaults.default.clone()
1000}
1001
1002/// Return the normalized model-family token used for cross-family review.
1003pub fn model_family(provider: &str, model_id: &str) -> String {
1004    let config = effective_config();
1005    model_family_with_config(&config, provider, model_id)
1006}
1007
1008pub(crate) fn model_family_with_config(
1009    config: &ProvidersConfig,
1010    provider: &str,
1011    model_id: &str,
1012) -> String {
1013    catalog_family_token(config, model_id)
1014        .unwrap_or_else(|| derive_model_family(provider, model_id))
1015}
1016
1017fn model_family_with_inference_source(
1018    config: &ProvidersConfig,
1019    provider: &str,
1020    model_id: &str,
1021    source: crate::llm::provider::ProviderInferenceSource,
1022) -> String {
1023    if let Some(family) = catalog_family_token(config, model_id) {
1024        return family;
1025    }
1026    let id_family = derive_model_family("", model_id);
1027    if id_family != "unknown" {
1028        return id_family;
1029    }
1030    if matches!(
1031        source,
1032        crate::llm::provider::ProviderInferenceSource::DefaultFallback
1033    ) {
1034        return "unknown".to_string();
1035    }
1036    derive_model_family(provider, model_id)
1037}
1038
1039/// Return the narrower lineage token used for model-aware option packs.
1040pub fn model_lineage(provider: &str, model_id: &str) -> String {
1041    let config = effective_config();
1042    model_lineage_with_config(&config, provider, model_id)
1043}
1044
1045pub(crate) fn model_lineage_with_config(
1046    config: &ProvidersConfig,
1047    provider: &str,
1048    model_id: &str,
1049) -> String {
1050    catalog_lineage_token(config, model_id)
1051        .unwrap_or_else(|| derive_model_lineage(provider, model_id))
1052}
1053
1054fn model_lineage_with_inference_source(
1055    config: &ProvidersConfig,
1056    provider: &str,
1057    model_id: &str,
1058    source: crate::llm::provider::ProviderInferenceSource,
1059) -> String {
1060    if let Some(lineage) = catalog_lineage_token(config, model_id) {
1061        return lineage;
1062    }
1063    let id_lineage = derive_model_lineage("", model_id);
1064    if id_lineage != "unknown" {
1065        return id_lineage;
1066    }
1067    if matches!(
1068        source,
1069        crate::llm::provider::ProviderInferenceSource::DefaultFallback
1070    ) {
1071        return "unknown".to_string();
1072    }
1073    derive_model_lineage(provider, model_id)
1074}
1075
1076fn catalog_family_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1077    config
1078        .models
1079        .get(model_id)
1080        .and_then(|model| normalized_catalog_token(model.family.as_deref()))
1081}
1082
1083fn catalog_lineage_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1084    config
1085        .models
1086        .get(model_id)
1087        .and_then(|model| normalized_catalog_token(model.lineage.as_deref()))
1088}
1089
1090fn normalized_catalog_token(value: Option<&str>) -> Option<String> {
1091    value
1092        .map(str::trim)
1093        .filter(|value| !value.is_empty())
1094        .map(|value| value.to_ascii_lowercase().replace('_', "-"))
1095}
1096
1097fn derive_model_family(provider: &str, model_id: &str) -> String {
1098    let id = model_id.to_ascii_lowercase();
1099    if contains_any(&id, &["claude", "anthropic.claude"]) {
1100        return "anthropic-claude".to_string();
1101    }
1102    if contains_any(&id, &["gemini", "google/gemini"]) {
1103        return "google-gemini".to_string();
1104    }
1105    if contains_any(&id, &["deepseek"]) {
1106        return "deepseek".to_string();
1107    }
1108    if contains_any(&id, &["qwen"]) {
1109        return "qwen".to_string();
1110    }
1111    if contains_any(&id, &["kimi", "moonshot"]) {
1112        return "kimi".to_string();
1113    }
1114    if contains_any(&id, &["glm", "z-ai/glm", "zhipu"]) {
1115        return "glm".to_string();
1116    }
1117    if contains_any(&id, &["mistral", "mixtral", "devstral"]) {
1118        return "mistral".to_string();
1119    }
1120    if contains_any(&id, &["minimax"]) {
1121        return "minimax".to_string();
1122    }
1123    if contains_any(&id, &["llama"]) {
1124        return "llama".to_string();
1125    }
1126    if contains_any(&id, &["gemma"]) {
1127        return "gemma".to_string();
1128    }
1129    if is_openai_reasoning_model(&id) {
1130        return "openai-reasoning".to_string();
1131    }
1132    if contains_any(&id, &["gpt-oss", "openai/gpt", "gpt-"]) {
1133        return "openai-gpt".to_string();
1134    }
1135    match provider {
1136        "anthropic" | "bedrock" | "vertex-anthropic" => "anthropic-claude".to_string(),
1137        "openai" | "azure" | "azure_openai" => "openai-gpt".to_string(),
1138        "gemini" | "vertex" | "google" => "google-gemini".to_string(),
1139        "deepseek" => "deepseek".to_string(),
1140        "zai" => "glm".to_string(),
1141        "minimax" => "minimax".to_string(),
1142        other if !other.is_empty() => normalize_identifier_token(other),
1143        _ => "unknown".to_string(),
1144    }
1145}
1146
1147fn derive_model_lineage(provider: &str, model_id: &str) -> String {
1148    let id = model_id.to_ascii_lowercase();
1149    if contains_any(&id, &["haiku"]) {
1150        return "claude-haiku".to_string();
1151    }
1152    if contains_any(&id, &["opus-4-7", "opus-4-8", "opus-mythos"]) {
1153        return "claude-opus-adaptive".to_string();
1154    }
1155    if contains_any(&id, &["claude"]) {
1156        return "claude-sonnet-opus".to_string();
1157    }
1158    if contains_any(&id, &["gpt-5"]) {
1159        return "openai-gpt5".to_string();
1160    }
1161    if is_openai_reasoning_model(&id) {
1162        return "openai-reasoning".to_string();
1163    }
1164    if contains_any(&id, &["gpt-", "gpt_"]) {
1165        return "openai-legacy".to_string();
1166    }
1167    if contains_any(&id, &["gemini"]) {
1168        if contains_any(&id, &["flash"]) {
1169            return "gemini-flash".to_string();
1170        }
1171        return "gemini-pro".to_string();
1172    }
1173    if contains_any(&id, &["qwen3", "qwen/qwen3"]) {
1174        return "qwen3".to_string();
1175    }
1176    if contains_any(&id, &["gemma4", "gemma-4"]) {
1177        return "gemma4".to_string();
1178    }
1179    let family = derive_model_family(provider, model_id);
1180    if family == "unknown" {
1181        "unknown".to_string()
1182    } else {
1183        family
1184    }
1185}
1186
1187fn contains_any(haystack: &str, needles: &[&str]) -> bool {
1188    needles.iter().any(|needle| haystack.contains(needle))
1189}
1190
1191fn starts_with_any(haystack: &str, prefixes: &[&str]) -> bool {
1192    prefixes.iter().any(|prefix| haystack.starts_with(prefix))
1193}
1194
1195fn is_openai_reasoning_model(id: &str) -> bool {
1196    starts_with_any(id, &["o1", "o3", "o4"])
1197        || contains_any(
1198            id,
1199            &[
1200                "/o1", "/o3", "/o4", ":o1", ":o3", ":o4", ".o1", ".o3", ".o4",
1201            ],
1202        )
1203}
1204
1205fn normalize_identifier_token(value: &str) -> String {
1206    value
1207        .trim()
1208        .to_ascii_lowercase()
1209        .chars()
1210        .map(|ch| {
1211            if ch.is_ascii_alphanumeric() || ch == '-' {
1212                ch
1213            } else {
1214                '-'
1215            }
1216        })
1217        .collect::<String>()
1218        .split('-')
1219        .filter(|part| !part.is_empty())
1220        .collect::<Vec<_>>()
1221        .join("-")
1222}
1223
1224/// Get provider config for resolving base_url, auth, etc.
1225pub fn provider_config(name: &str) -> Option<ProviderDef> {
1226    effective_config().providers.get(name).cloned()
1227}
1228
1229pub fn provider_protocol(name: &str) -> Option<String> {
1230    provider_config(name).and_then(|def| def.protocol)
1231}
1232
1233pub fn provider_uses_acp(name: &str) -> bool {
1234    provider_protocol(name)
1235        .as_deref()
1236        .is_some_and(|protocol| protocol.eq_ignore_ascii_case("acp"))
1237}
1238
1239/// Get model-specific default parameters (temperature, etc.).
1240/// Matches glob patterns in model_defaults keys.
1241pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
1242    let config = effective_config();
1243    let mut params = BTreeMap::new();
1244    for (pattern, defaults) in &config.model_defaults {
1245        if glob_match(pattern, model_id) {
1246            for (k, v) in defaults {
1247                params.insert(k.clone(), v.clone());
1248            }
1249        }
1250    }
1251    params
1252}
1253
1254/// Get per-role LLM defaults, e.g. `[model_roles.merge]`.
1255///
1256/// Role defaults are intentionally shaped like ordinary `llm_call` options:
1257/// callers can pin `provider`/`model`, install `route_policy` or `prefer`,
1258/// and tune budget/latency knobs without creating a parallel routing stack.
1259/// Environment variables provide a lightweight operational override for
1260/// merge/fast-apply workers:
1261///
1262/// - `HARN_LLM_MERGE_PROVIDER`, `HARN_LLM_MERGE_MODEL`,
1263///   `HARN_LLM_MERGE_ROUTE_POLICY`
1264/// - `HARN_LLM_FAST_APPLY_PROVIDER`, `HARN_LLM_FAST_APPLY_MODEL`,
1265///   `HARN_LLM_FAST_APPLY_ROUTE_POLICY`
1266/// - `HARN_LLM_ROLE_<ROLE>_PROVIDER`, `_MODEL`, `_ROUTE_POLICY`
1267pub fn model_role_defaults(role: &str) -> BTreeMap<String, toml::Value> {
1268    let normalized = normalize_model_role_name(role);
1269    if normalized.is_empty() {
1270        return BTreeMap::new();
1271    }
1272    let config = effective_config();
1273    let mut params = BTreeMap::new();
1274    for key in role_lookup_keys(&normalized) {
1275        extend_model_role_defaults(&config, &key, &mut params);
1276    }
1277    apply_model_role_env_overrides(&normalized, &mut params);
1278    params
1279}
1280
1281fn extend_model_role_defaults(
1282    config: &ProvidersConfig,
1283    role: &str,
1284    params: &mut BTreeMap<String, toml::Value>,
1285) {
1286    for (configured_role, defaults) in &config.model_roles {
1287        if normalize_model_role_name(configured_role) == role {
1288            params.extend(defaults.clone());
1289        }
1290    }
1291    if let Some(defaults) = config.model_roles.get(role) {
1292        params.extend(defaults.clone());
1293    }
1294}
1295
1296fn normalize_model_role_name(role: &str) -> String {
1297    role.trim().to_ascii_lowercase().replace('-', "_")
1298}
1299
1300fn role_lookup_keys(role: &str) -> Vec<String> {
1301    if role == "merge" {
1302        vec!["fast_apply".to_string(), "merge".to_string()]
1303    } else if role == "fast_apply" {
1304        vec!["merge".to_string(), "fast_apply".to_string()]
1305    } else {
1306        vec![role.to_string()]
1307    }
1308}
1309
1310fn role_env_token(role: &str) -> String {
1311    role.chars()
1312        .map(|ch| {
1313            if ch.is_ascii_alphanumeric() {
1314                ch.to_ascii_uppercase()
1315            } else {
1316                '_'
1317            }
1318        })
1319        .collect::<String>()
1320        .split('_')
1321        .filter(|part| !part.is_empty())
1322        .collect::<Vec<_>>()
1323        .join("_")
1324}
1325
1326fn apply_model_role_env_overrides(role: &str, params: &mut BTreeMap<String, toml::Value>) {
1327    for alias in role_env_aliases(role) {
1328        apply_model_role_env_var(&format!("HARN_LLM_{alias}_PROVIDER"), "provider", params);
1329        apply_model_role_env_var(&format!("HARN_LLM_{alias}_MODEL"), "model", params);
1330        apply_model_role_env_var(
1331            &format!("HARN_LLM_{alias}_ROUTE_POLICY"),
1332            "route_policy",
1333            params,
1334        );
1335        apply_model_role_env_var(
1336            &format!("HARN_LLM_ROLE_{alias}_PROVIDER"),
1337            "provider",
1338            params,
1339        );
1340        apply_model_role_env_var(&format!("HARN_LLM_ROLE_{alias}_MODEL"), "model", params);
1341        apply_model_role_env_var(
1342            &format!("HARN_LLM_ROLE_{alias}_ROUTE_POLICY"),
1343            "route_policy",
1344            params,
1345        );
1346    }
1347}
1348
1349fn role_env_aliases(role: &str) -> Vec<String> {
1350    let token = role_env_token(role);
1351    if token.is_empty() {
1352        return Vec::new();
1353    }
1354    if token == "MERGE" {
1355        vec!["FAST_APPLY".to_string(), "MERGE".to_string()]
1356    } else if token == "FAST_APPLY" {
1357        vec!["MERGE".to_string(), "FAST_APPLY".to_string()]
1358    } else {
1359        vec![token]
1360    }
1361}
1362
1363fn apply_model_role_env_var(
1364    env_name: &str,
1365    option_name: &str,
1366    params: &mut BTreeMap<String, toml::Value>,
1367) {
1368    let Ok(value) = std::env::var(env_name) else {
1369        return;
1370    };
1371    let trimmed = value.trim();
1372    if trimmed.is_empty() {
1373        return;
1374    }
1375    params.insert(
1376        option_name.to_string(),
1377        toml::Value::String(trimmed.to_string()),
1378    );
1379}
1380
1381/// Get list of configured provider names.
1382pub fn provider_names() -> Vec<String> {
1383    effective_config().providers.keys().cloned().collect()
1384}
1385
1386/// Return every configured alias name, sorted deterministically.
1387pub fn known_model_names() -> Vec<String> {
1388    effective_config().aliases.keys().cloned().collect()
1389}
1390
1391pub fn alias_entries() -> Vec<(String, AliasDef)> {
1392    effective_config().aliases.into_iter().collect()
1393}
1394
1395pub fn alias_tool_calling_entry(alias: &str) -> Option<AliasToolCallingDef> {
1396    effective_config().alias_tool_calling.get(alias).cloned()
1397}
1398
1399/// Return every configured model-catalog entry, sorted by provider then id.
1400pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
1401    let config = effective_config();
1402    model_catalog_entries_with_config(&config)
1403}
1404
1405pub(crate) fn model_catalog_entries_with_config(
1406    config: &ProvidersConfig,
1407) -> Vec<(String, ModelDef)> {
1408    sorted_model_entries_with_config(config)
1409        .into_iter()
1410        .map(|(id, model)| {
1411            let provider = model.provider.clone();
1412            (
1413                id.clone(),
1414                with_effective_capability_tags(id, provider, model),
1415            )
1416        })
1417        .collect()
1418}
1419
1420pub(crate) fn sorted_model_entries_with_config(
1421    config: &ProvidersConfig,
1422) -> Vec<(String, ModelDef)> {
1423    let mut entries: Vec<_> = config
1424        .models
1425        .iter()
1426        .map(|(id, model)| (id.clone(), model.clone()))
1427        .collect();
1428    entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
1429        model_a
1430            .provider
1431            .cmp(&model_b.provider)
1432            .then_with(|| id_a.cmp(id_b))
1433    });
1434    entries
1435}
1436
1437pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
1438    effective_config()
1439        .models
1440        .get(model_id)
1441        .cloned()
1442        .map(|model| {
1443            let provider = model.provider.clone();
1444            with_effective_capability_tags(model_id.to_string(), provider, model)
1445        })
1446}
1447
1448pub fn qc_default_model(provider: &str) -> Option<String> {
1449    std::env::var("BURIN_QC_MODEL")
1450        .ok()
1451        .filter(|value| !value.trim().is_empty())
1452        .or_else(|| {
1453            effective_config()
1454                .qc_defaults
1455                .get(&provider.to_lowercase())
1456                .cloned()
1457        })
1458}
1459
1460pub fn default_model_for_provider(provider: &str) -> String {
1461    if provider_uses_acp(provider) {
1462        return "default".to_string();
1463    }
1464    match provider {
1465        "local" => std::env::var("LOCAL_LLM_MODEL")
1466            .or_else(|_| std::env::var("HARN_LLM_MODEL"))
1467            .unwrap_or_else(|_| "gemma-4-26b-a4b-it".to_string()),
1468        "mlx" => std::env::var("MLX_MODEL_ID")
1469            .unwrap_or_else(|_| "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string()),
1470        "openai" => "gpt-4o-mini".to_string(),
1471        "ollama" => "llama3.2".to_string(),
1472        "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
1473        _ => "claude-sonnet-4-6".to_string(),
1474    }
1475}
1476
1477pub fn qc_defaults() -> BTreeMap<String, String> {
1478    effective_config().qc_defaults
1479}
1480
1481pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
1482    effective_config()
1483        .models
1484        .get(model_id)
1485        .and_then(|model| model.pricing.clone())
1486}
1487
1488/// Premium per-MTok pricing for a model's accelerated-serving ("fast mode")
1489/// tier, when the catalog declares one. Returns `None` for models with no
1490/// fast tier or a tier that omits explicit pricing — callers fall back to
1491/// standard pricing in that case.
1492pub fn model_fast_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
1493    effective_config()
1494        .models
1495        .get(model_id)
1496        .and_then(|model| model.fast_mode.as_ref())
1497        .and_then(|fast_mode| fast_mode.pricing.clone())
1498}
1499
1500pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
1501    model_pricing_per_mtok(model_id)
1502        .map(|pricing| {
1503            (
1504                pricing.input_per_mtok / 1000.0,
1505                pricing.output_per_mtok / 1000.0,
1506            )
1507        })
1508        .or_else(|| {
1509            let (input, output, _) = provider_economics(provider);
1510            match (input, output) {
1511                (Some(input), Some(output)) => Some((input, output)),
1512                _ => None,
1513            }
1514        })
1515}
1516
1517pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
1518    match auth_env {
1519        AuthEnv::None => Vec::new(),
1520        AuthEnv::Single(name) => vec![name.clone()],
1521        AuthEnv::Multiple(names) => names.clone(),
1522    }
1523}
1524
1525pub fn provider_key_available(provider: &str) -> bool {
1526    let Some(pdef) = provider_config(provider) else {
1527        return provider == "ollama";
1528    };
1529    if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
1530        return true;
1531    }
1532    auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
1533        std::env::var(env_name)
1534            .ok()
1535            .is_some_and(|value| !value.trim().is_empty())
1536    })
1537}
1538
1539pub fn available_provider_names() -> Vec<String> {
1540    provider_names()
1541        .into_iter()
1542        .filter(|provider| provider_key_available(provider))
1543        .collect()
1544}
1545
1546/// Check if a provider advertises a legacy provider-level feature.
1547pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
1548    provider_config(provider)
1549        .map(|p| p.features.iter().any(|f| f == feature))
1550        .unwrap_or(false)
1551}
1552
1553/// Provider-level catalog pricing/latency. Model-specific catalog pricing
1554/// wins when available; this is the adapter-level fallback used by routing
1555/// and portal summaries when a model has no explicit catalog entry.
1556pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
1557    provider_config(provider)
1558        .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
1559        .unwrap_or((None, None, None))
1560}
1561
1562/// Resolve the default tool format for a model+provider combination.
1563/// Priority: alias `tool_format` (matched by model ID) > provider/model
1564/// capability matrix > legacy provider feature > "text".
1565pub fn default_tool_format(model: &str, provider: &str) -> String {
1566    let config = effective_config();
1567    default_tool_format_with_config(&config, model, provider)
1568}
1569
1570fn default_tool_format_with_config(
1571    config: &ProvidersConfig,
1572    model: &str,
1573    provider: &str,
1574) -> String {
1575    // Aliases match by model ID + provider, or by alias name.
1576    for (name, alias) in &config.aliases {
1577        let matches = (alias.id == model && alias.provider == provider) || name == model;
1578        if matches {
1579            if let Some(ref fmt) = alias.tool_format {
1580                return fmt.clone();
1581            }
1582        }
1583    }
1584    let capabilities = crate::llm::capabilities::lookup(provider, model);
1585    if let Some(format) = capabilities.preferred_tool_format.as_deref() {
1586        if matches!(format, "native" | "text") {
1587            return format.to_string();
1588        }
1589    }
1590    let capability_matrix_native = capabilities.native_tools;
1591    let legacy_provider_native = config
1592        .providers
1593        .get(provider)
1594        .map(|p| p.features.iter().any(|f| f == "native_tools"))
1595        .unwrap_or(false);
1596    if capability_matrix_native || legacy_provider_native {
1597        "native".to_string()
1598    } else {
1599        "text".to_string()
1600    }
1601}
1602
1603fn with_effective_capability_tags(
1604    model_id: String,
1605    provider: String,
1606    mut model: ModelDef,
1607) -> ModelDef {
1608    model.capabilities = effective_model_capability_tags(&provider, &model_id);
1609    model
1610}
1611
1612/// Legacy display tags derived from the canonical provider/model capability
1613/// matrix. The matrix is the source of truth; `models.*.capabilities` in
1614/// providers.toml is accepted only for backwards-compatible parsing.
1615pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
1616    let caps = crate::llm::capabilities::lookup(provider, model_id);
1617    capability_tags_from_capabilities(&caps)
1618}
1619
1620pub(crate) fn capability_tags_from_capabilities(
1621    caps: &crate::llm::capabilities::Capabilities,
1622) -> Vec<String> {
1623    let mut tags = Vec::new();
1624    // Today all Harn chat providers expose streaming. Keep this as a
1625    // transport baseline rather than a duplicated per-model declaration.
1626    tags.push("streaming".to_string());
1627    if caps.native_tools || caps.text_tool_wire_format_supported {
1628        tags.push("tools".to_string());
1629    }
1630    if !caps.tool_search.is_empty() {
1631        tags.push("tool_search".to_string());
1632    }
1633    if caps.vision || caps.vision_supported {
1634        tags.push("vision".to_string());
1635    }
1636    if caps.audio {
1637        tags.push("audio".to_string());
1638    }
1639    if caps.pdf {
1640        tags.push("pdf".to_string());
1641    }
1642    if caps.video {
1643        tags.push("video".to_string());
1644    }
1645    if caps.files_api_supported {
1646        tags.push("files".to_string());
1647    }
1648    if caps.prompt_caching {
1649        tags.push("prompt_caching".to_string());
1650    }
1651    if !caps.thinking_modes.is_empty() {
1652        tags.push("thinking".to_string());
1653    }
1654    if caps.interleaved_thinking_supported
1655        || caps
1656            .thinking_modes
1657            .iter()
1658            .any(|mode| mode == "adaptive" || mode == "effort")
1659    {
1660        tags.push("extended_thinking".to_string());
1661    }
1662    if caps.json_schema.is_some() {
1663        tags.push("structured_output".to_string());
1664    }
1665    tags
1666}
1667
1668/// Resolve a tier or alias into a concrete model/provider pair.
1669pub fn resolve_tier_model(
1670    target: &str,
1671    preferred_provider: Option<&str>,
1672) -> Option<(String, String)> {
1673    let config = effective_config();
1674
1675    if let Some(alias) = config.aliases.get(target) {
1676        return Some((alias.id.clone(), alias.provider.clone()));
1677    }
1678
1679    let candidate_aliases = if let Some(provider) = preferred_provider {
1680        vec![
1681            format!("{provider}/{target}"),
1682            format!("{provider}:{target}"),
1683            format!("tier/{target}"),
1684            target.to_string(),
1685        ]
1686    } else {
1687        vec![format!("tier/{target}"), target.to_string()]
1688    };
1689
1690    for alias_name in candidate_aliases {
1691        if let Some(alias) = config.aliases.get(&alias_name) {
1692            return Some((alias.id.clone(), alias.provider.clone()));
1693        }
1694    }
1695
1696    None
1697}
1698
1699/// Return all configured alias-backed model/provider pairs whose resolved
1700/// model falls into the requested capability tier. The result is de-duplicated
1701/// and sorted deterministically by provider then model id.
1702pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
1703    let config = effective_config();
1704    let mut seen = std::collections::BTreeSet::new();
1705    let mut candidates = Vec::new();
1706
1707    for alias in config.aliases.values() {
1708        let pair = (alias.id.clone(), alias.provider.clone());
1709        if seen.contains(&pair) {
1710            continue;
1711        }
1712        if model_tier(&alias.id) == target {
1713            seen.insert(pair.clone());
1714            candidates.push(pair);
1715        }
1716    }
1717
1718    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
1719        provider_a
1720            .cmp(provider_b)
1721            .then_with(|| model_a.cmp(model_b))
1722    });
1723    candidates
1724}
1725
1726/// Return all configured alias-backed model/provider pairs. Used by routing
1727/// policies that need to compare alternatives across tiers.
1728pub fn all_model_candidates() -> Vec<(String, String)> {
1729    let config = effective_config();
1730    let mut seen = std::collections::BTreeSet::new();
1731    let mut candidates = Vec::new();
1732
1733    for alias in config.aliases.values() {
1734        let pair = (alias.id.clone(), alias.provider.clone());
1735        if seen.insert(pair.clone()) {
1736            candidates.push(pair);
1737        }
1738    }
1739
1740    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
1741        provider_a
1742            .cmp(provider_b)
1743            .then_with(|| model_a.cmp(model_b))
1744    });
1745    candidates
1746}
1747
1748pub fn pick_complementary_reviewer(
1749    options: ComplementaryReviewerOptions,
1750) -> ComplementaryReviewerSelection {
1751    let config = effective_config();
1752    let mut author = resolve_model_info(&options.author_model);
1753    if let Some(provider) = options
1754        .author_provider
1755        .as_deref()
1756        .map(str::trim)
1757        .filter(|provider| !provider.is_empty())
1758    {
1759        author.provider = provider.to_string();
1760        author.family = model_family_with_config(&config, &author.provider, &author.id);
1761        author.lineage = model_lineage_with_config(&config, &author.provider, &author.id);
1762        author.tool_format = default_tool_format_with_config(&config, &author.id, &author.provider);
1763    }
1764    let author_entry = config.models.get(&author.id);
1765    let author_identity = complementary_identity(
1766        author.id.clone(),
1767        author.provider.clone(),
1768        author.family.clone(),
1769        author.lineage.clone(),
1770        author.tier.clone(),
1771        author_entry.and_then(|model| model.pricing.clone()),
1772    );
1773
1774    let fallback = |fallback_reason: String| ComplementaryReviewerSelection {
1775        intent: options.intent.as_str().to_string(),
1776        reviewer: author_identity.clone(),
1777        estimated_incremental_cost: cost_estimate(
1778            author_identity.pricing.as_ref(),
1779            author_identity.pricing.as_ref(),
1780        ),
1781        author: author_identity.clone(),
1782        fallback: true,
1783        reason: format!(
1784            "using author model {} because {fallback_reason}",
1785            author_identity.id
1786        ),
1787        fallback_reason: Some(fallback_reason),
1788    };
1789
1790    if author_identity.family == "unknown" {
1791        return fallback("author model family is unknown".to_string());
1792    }
1793
1794    let preferred_families = author_entry
1795        .map(|model| model.complementary_with.clone())
1796        .unwrap_or_default();
1797    let author_refs = reviewer_match_refs(&author_identity);
1798    let mut rejected_by_price = 0usize;
1799    let mut diff_family_seen = 0usize;
1800    let mut candidates = Vec::new();
1801
1802    for (id, model) in config.models.iter() {
1803        if id == &author_identity.id && model.provider == author_identity.provider {
1804            continue;
1805        }
1806        if model.deprecated || model.availability != ModelAvailability::Serverless {
1807            continue;
1808        }
1809        let family = model_family_with_config(&config, &model.provider, id);
1810        if family == "unknown" || family == author_identity.family {
1811            continue;
1812        }
1813        diff_family_seen += 1;
1814        let lineage = model_lineage_with_config(&config, &model.provider, id);
1815        let candidate_identity = complementary_identity(
1816            id.clone(),
1817            model.provider.clone(),
1818            family,
1819            lineage,
1820            model_tier_with_config(&config, id),
1821            model.pricing.clone(),
1822        );
1823        if model
1824            .avoid_as_reviewer_for
1825            .iter()
1826            .any(|selector| refs_contain_selector(&author_refs, selector))
1827        {
1828            continue;
1829        }
1830        if exceeds_price_cap(
1831            author_identity.pricing.as_ref(),
1832            candidate_identity.pricing.as_ref(),
1833            options.max_price_multiplier,
1834        ) {
1835            rejected_by_price += 1;
1836            continue;
1837        }
1838        let score = reviewer_score(
1839            &options,
1840            &author_identity,
1841            &candidate_identity,
1842            model,
1843            &preferred_families,
1844        );
1845        candidates.push(ReviewerCandidate {
1846            identity: candidate_identity,
1847            score,
1848        });
1849    }
1850
1851    candidates.sort_by(|left, right| {
1852        right
1853            .score
1854            .partial_cmp(&left.score)
1855            .unwrap_or(std::cmp::Ordering::Equal)
1856            .then_with(|| left.identity.provider.cmp(&right.identity.provider))
1857            .then_with(|| left.identity.id.cmp(&right.identity.id))
1858    });
1859
1860    let Some(best) = candidates.into_iter().next() else {
1861        if rejected_by_price > 0 {
1862            let cap = options.max_price_multiplier.unwrap_or_default();
1863            return fallback(format!(
1864                "no different-family reviewer satisfied max_price_multiplier {cap}"
1865            ));
1866        }
1867        if diff_family_seen == 0 {
1868            return fallback(
1869                "no active serverless different-family reviewer is cataloged".to_string(),
1870            );
1871        }
1872        return fallback("all different-family reviewer candidates were excluded".to_string());
1873    };
1874
1875    let estimate = cost_estimate(
1876        best.identity.pricing.as_ref(),
1877        author_identity.pricing.as_ref(),
1878    );
1879    ComplementaryReviewerSelection {
1880        intent: options.intent.as_str().to_string(),
1881        reason: reviewer_reason(&author_identity, &best.identity, estimate.as_ref()),
1882        estimated_incremental_cost: estimate,
1883        author: author_identity,
1884        reviewer: best.identity,
1885        fallback: false,
1886        fallback_reason: None,
1887    }
1888}
1889
1890#[derive(Debug, Clone)]
1891struct ReviewerCandidate {
1892    identity: ComplementaryModelIdentity,
1893    score: f64,
1894}
1895
1896fn complementary_identity(
1897    id: String,
1898    provider: String,
1899    family: String,
1900    lineage: String,
1901    tier: String,
1902    pricing: Option<ModelPricing>,
1903) -> ComplementaryModelIdentity {
1904    ComplementaryModelIdentity {
1905        id,
1906        provider,
1907        family,
1908        lineage,
1909        tier,
1910        pricing,
1911    }
1912}
1913
1914fn reviewer_score(
1915    options: &ComplementaryReviewerOptions,
1916    author: &ComplementaryModelIdentity,
1917    candidate: &ComplementaryModelIdentity,
1918    model: &ModelDef,
1919    preferred_families: &[String],
1920) -> f64 {
1921    let candidate_refs = reviewer_match_refs(candidate);
1922    let mut score = 0.0;
1923    if let Some(rank) = preferred_families
1924        .iter()
1925        .position(|selector| refs_contain_selector(&candidate_refs, selector))
1926    {
1927        score += 1_000.0 - rank as f64;
1928    }
1929    if candidate.provider != author.provider {
1930        score += 100.0;
1931    }
1932    score += match tier_distance(&author.tier, &candidate.tier) {
1933        0 => 80.0,
1934        1 => 45.0,
1935        2 => 15.0,
1936        _ => 0.0,
1937    };
1938    for strength in intent_strengths(options.intent) {
1939        if model.strengths.iter().any(|tag| tag == strength) {
1940            score += 8.0;
1941        }
1942    }
1943    if model.capabilities.iter().any(|tag| tag == "tools") {
1944        score += 4.0;
1945    }
1946    if let (Some(author_total), Some(candidate_total)) = (
1947        pricing_total(author.pricing.as_ref()),
1948        pricing_total(candidate.pricing.as_ref()),
1949    ) {
1950        if author_total > 0.0 {
1951            let ratio = candidate_total / author_total;
1952            if ratio <= 1.0 {
1953                score += 20.0;
1954            }
1955            score -= (ratio - 1.0).abs().min(10.0) * 8.0;
1956        }
1957    }
1958    score
1959}
1960
1961fn intent_strengths(intent: ComplementaryReviewerIntent) -> &'static [&'static str] {
1962    match intent {
1963        ComplementaryReviewerIntent::Review => &["reasoning", "coding", "tool_use"],
1964        ComplementaryReviewerIntent::Critique => &["reasoning", "long_context", "tool_use"],
1965        ComplementaryReviewerIntent::PlanReview => {
1966            &["reasoning", "coding", "agentic", "long_context", "tool_use"]
1967        }
1968    }
1969}
1970
1971fn tier_distance(left: &str, right: &str) -> u8 {
1972    let left = tier_rank(left);
1973    let right = tier_rank(right);
1974    left.abs_diff(right)
1975}
1976
1977fn tier_rank(tier: &str) -> u8 {
1978    match tier {
1979        "small" => 0,
1980        "mid" => 1,
1981        "frontier" | "reasoning" => 2,
1982        _ => 1,
1983    }
1984}
1985
1986fn exceeds_price_cap(
1987    author_pricing: Option<&ModelPricing>,
1988    candidate_pricing: Option<&ModelPricing>,
1989    max_price_multiplier: Option<f64>,
1990) -> bool {
1991    let Some(max_price_multiplier) = max_price_multiplier else {
1992        return false;
1993    };
1994    let Some(author_total) = pricing_total(author_pricing) else {
1995        return false;
1996    };
1997    let Some(candidate_total) = pricing_total(candidate_pricing) else {
1998        return true;
1999    };
2000    author_total > 0.0 && candidate_total > author_total * max_price_multiplier
2001}
2002
2003fn cost_estimate(
2004    reviewer_pricing: Option<&ModelPricing>,
2005    author_pricing: Option<&ModelPricing>,
2006) -> Option<ComplementaryCostEstimate> {
2007    let reviewer_pricing = reviewer_pricing?;
2008    let total_per_mtok = reviewer_pricing.input_per_mtok + reviewer_pricing.output_per_mtok;
2009    let multiplier_vs_author = pricing_total(author_pricing)
2010        .filter(|author_total| *author_total > 0.0)
2011        .map(|author_total| total_per_mtok / author_total);
2012    Some(ComplementaryCostEstimate {
2013        input_per_mtok: reviewer_pricing.input_per_mtok,
2014        output_per_mtok: reviewer_pricing.output_per_mtok,
2015        total_per_mtok,
2016        multiplier_vs_author,
2017    })
2018}
2019
2020fn pricing_total(pricing: Option<&ModelPricing>) -> Option<f64> {
2021    pricing.map(|pricing| pricing.input_per_mtok + pricing.output_per_mtok)
2022}
2023
2024fn reviewer_reason(
2025    author: &ComplementaryModelIdentity,
2026    reviewer: &ComplementaryModelIdentity,
2027    estimate: Option<&ComplementaryCostEstimate>,
2028) -> String {
2029    let cost = estimate
2030        .and_then(|estimate| estimate.multiplier_vs_author)
2031        .map(|multiplier| format!("{multiplier:.2}x the author model price"))
2032        .unwrap_or_else(|| "price ratio unavailable".to_string());
2033    format!(
2034        "selected {} via {} because family {} differs from author family {}, tier {} matches author tier {}, and {}",
2035        reviewer.id,
2036        reviewer.provider,
2037        reviewer.family,
2038        author.family,
2039        reviewer.tier,
2040        author.tier,
2041        cost
2042    )
2043}
2044
2045fn reviewer_match_refs(identity: &ComplementaryModelIdentity) -> BTreeSet<String> {
2046    BTreeSet::from([
2047        identity.id.to_ascii_lowercase(),
2048        identity.provider.to_ascii_lowercase(),
2049        format!("{}/{}", identity.provider, identity.id).to_ascii_lowercase(),
2050        format!("{}:{}", identity.provider, identity.id).to_ascii_lowercase(),
2051        identity.family.to_ascii_lowercase(),
2052        identity.lineage.to_ascii_lowercase(),
2053    ])
2054}
2055
2056fn refs_contain_selector(refs: &BTreeSet<String>, selector: &str) -> bool {
2057    normalized_catalog_token(Some(selector))
2058        .or_else(|| Some(selector.trim().to_ascii_lowercase()))
2059        .is_some_and(|selector| refs.contains(&selector))
2060}
2061
2062/// Simple glob matching for patterns like "claude-*", "qwen/*", "ollama:*".
2063fn glob_match(pattern: &str, input: &str) -> bool {
2064    if let Some(prefix) = pattern.strip_suffix('*') {
2065        input.starts_with(prefix)
2066    } else if let Some(suffix) = pattern.strip_prefix('*') {
2067        input.ends_with(suffix)
2068    } else if pattern.contains('*') {
2069        let parts: Vec<&str> = pattern.split('*').collect();
2070        if parts.len() == 2 {
2071            input.starts_with(parts[0]) && input.ends_with(parts[1])
2072        } else {
2073            input == pattern
2074        }
2075    } else {
2076        input == pattern
2077    }
2078}
2079
2080fn dirs_or_home() -> Option<String> {
2081    crate::user_dirs::home_dir().map(|home| home.to_string_lossy().into_owned())
2082}
2083
2084/// Resolve the effective base URL for a provider, checking the `base_url_env`
2085/// override first, then falling back to the configured `base_url`.
2086pub fn resolve_base_url(pdef: &ProviderDef) -> String {
2087    if let Some(env_name) = &pdef.base_url_env {
2088        if let Ok(val) = std::env::var(env_name) {
2089            // Strip surrounding quotes that some .env parsers leave intact.
2090            let trimmed = val.trim().trim_matches('"').trim_matches('\'');
2091            if !trimmed.is_empty() {
2092                return trimmed.to_string();
2093            }
2094        }
2095    }
2096    pdef.base_url.clone()
2097}
2098
2099/// Embedded copy of `llm/providers.toml`, the single source of truth for
2100/// Harn's bundled provider/model catalog. Edit the TOML, not this string.
2101const EMBEDDED_PROVIDERS_TOML: &str = include_str!("llm/providers.toml");
2102
2103/// Parse the embedded `providers.toml` into the runtime `ProvidersConfig`.
2104///
2105/// Hosts overlay this base via `HARN_PROVIDERS_CONFIG`,
2106/// `~/.config/harn/providers.toml`, `harn.toml`, package-manifest
2107/// `[llm]` sections, and per-run `set_user_overrides(...)`. The same
2108/// Serde shape applies at every layer, so there is exactly one schema to
2109/// keep coherent — no parallel Rust-literal catalog.
2110///
2111/// We `expect` on parse failure because the file is bundled into the
2112/// binary at compile time; a malformed embedded catalog is a build-time
2113/// invariant violation that should fail every test, not silently
2114/// degrade in production.
2115fn default_config() -> ProvidersConfig {
2116    parse_config_toml(EMBEDDED_PROVIDERS_TOML)
2117        .expect("embedded providers.toml must parse — invariant checked by harn-vm tests")
2118}
2119
2120#[cfg(test)]
2121fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
2122    let mut config = default_config();
2123    config.merge_from(&overlay);
2124    config
2125}
2126
2127#[cfg(test)]
2128mod tests {
2129    use super::*;
2130
2131    fn reset_overrides() {
2132        clear_user_overrides();
2133    }
2134
2135    #[test]
2136    fn test_glob_match_prefix() {
2137        assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
2138        assert!(glob_match("gpt-*", "gpt-4o"));
2139        assert!(!glob_match("claude-*", "gpt-4o"));
2140    }
2141
2142    #[test]
2143    fn test_glob_match_suffix() {
2144        assert!(glob_match("*-latest", "llama3.2-latest"));
2145        assert!(!glob_match("*-latest", "llama3.2"));
2146    }
2147
2148    #[test]
2149    fn test_glob_match_middle() {
2150        assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
2151        assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
2152    }
2153
2154    #[test]
2155    fn test_glob_match_exact() {
2156        assert!(glob_match("gpt-4o", "gpt-4o"));
2157        assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
2158    }
2159
2160    #[test]
2161    fn test_infer_provider_from_defaults() {
2162        let _guard = crate::llm::env_lock().lock().expect("env lock");
2163        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2164        unsafe {
2165            std::env::remove_var("HARN_DEFAULT_PROVIDER");
2166        }
2167
2168        assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
2169        assert_eq!(infer_provider("gpt-4o"), "openai");
2170        assert_eq!(infer_provider("o1-preview"), "openai");
2171        assert_eq!(infer_provider("o3-mini"), "openai");
2172        assert_eq!(infer_provider("o4-mini"), "openai");
2173        assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
2174        assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
2175        assert_eq!(infer_provider("llama3.2:latest"), "ollama");
2176        assert_eq!(infer_provider("unknown-model"), "anthropic");
2177
2178        unsafe {
2179            match prev_default_provider {
2180                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2181                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2182            }
2183        }
2184    }
2185
2186    #[test]
2187    fn test_infer_provider_prefix_rules() {
2188        assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
2189        assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
2190        // Even when the id also contains `/`, the local transport prefix wins.
2191        assert_eq!(infer_provider("local:owner/model"), "ollama");
2192        assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
2193    }
2194
2195    #[test]
2196    fn test_openrouter_inference_requires_one_slash() {
2197        let _guard = crate::llm::env_lock().lock().expect("env lock");
2198        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2199        unsafe {
2200            std::env::remove_var("HARN_DEFAULT_PROVIDER");
2201        }
2202
2203        assert_eq!(infer_provider("org/model"), "openrouter");
2204        assert_eq!(infer_provider("org/team/model"), "anthropic");
2205
2206        unsafe {
2207            match prev_default_provider {
2208                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2209                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2210            }
2211        }
2212    }
2213
2214    #[test]
2215    fn test_cerebras_inference_beats_openrouter_slash_fallback() {
2216        let _guard = crate::llm::env_lock().lock().expect("env lock");
2217        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2218        unsafe {
2219            std::env::remove_var("HARN_DEFAULT_PROVIDER");
2220        }
2221
2222        assert_eq!(infer_provider("cerebras/gpt-oss-120b"), "cerebras");
2223        assert_eq!(infer_provider("cerebras/zai-glm-4.7"), "cerebras");
2224        assert_eq!(infer_provider("cerebras/llama-3.3-70b"), "cerebras");
2225
2226        unsafe {
2227            match prev_default_provider {
2228                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2229                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2230            }
2231        }
2232    }
2233
2234    #[test]
2235    fn test_direct_catalog_model_id_resolves_to_catalog_provider() {
2236        // Bare model IDs that the embedded catalog hosts on Cerebras must
2237        // not be misrouted by the generic `gpt-*` / single-slash inference
2238        // fallbacks. Regression for harn#2142 (model-info routed
2239        // `gpt-oss-120b` to openai, breaking Burin TUI credential checks).
2240        let _guard = crate::llm::env_lock().lock().expect("env lock");
2241        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2242        unsafe {
2243            std::env::remove_var("HARN_DEFAULT_PROVIDER");
2244        }
2245
2246        for model in ["gpt-oss-120b", "zai-glm-4.7", "llama-3.3-70b"] {
2247            assert_eq!(
2248                infer_provider(model),
2249                "cerebras",
2250                "{model} should route to its catalog provider"
2251            );
2252            let resolved = resolve_model_info(model);
2253            assert_eq!(resolved.id, model);
2254            assert_eq!(resolved.provider, "cerebras");
2255        }
2256
2257        unsafe {
2258            match prev_default_provider {
2259                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2260                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2261            }
2262        }
2263    }
2264
2265    #[test]
2266    fn test_user_catalog_overlay_re_homes_model_provider() {
2267        // Users can re-home a built-in model by overlaying a catalog row;
2268        // the exact-match catalog lookup must honor overlays as well as the
2269        // embedded TOML.
2270        reset_overrides();
2271        let mut overlay = ProvidersConfig::default();
2272        overlay.models.insert(
2273            "gpt-4o".to_string(),
2274            ModelDef {
2275                name: "GPT-4o via OpenRouter".to_string(),
2276                provider: "openrouter".to_string(),
2277                context_window: 128_000,
2278                runtime_context_window: None,
2279                stream_timeout: None,
2280                capabilities: Vec::new(),
2281                pricing: None,
2282                deprecated: false,
2283                deprecation_note: None,
2284                superseded_by: None,
2285                fast_mode: None,
2286                quality_tags: Vec::new(),
2287                availability: ModelAvailability::default(),
2288                tier: None,
2289                open_weight: None,
2290                strengths: Vec::new(),
2291                benchmarks: std::collections::BTreeMap::new(),
2292                family: None,
2293                lineage: None,
2294                complementary_with: Vec::new(),
2295                avoid_as_reviewer_for: Vec::new(),
2296            },
2297        );
2298        set_user_overrides(Some(overlay));
2299
2300        assert_eq!(infer_provider("gpt-4o"), "openrouter");
2301
2302        reset_overrides();
2303    }
2304
2305    #[test]
2306    fn test_resolve_model_info_normalizes_provider_prefixes() {
2307        let local = resolve_model_info("local:gemma-4-e4b-it");
2308        assert_eq!(local.id, "gemma-4-e4b-it");
2309        assert_eq!(local.provider, "ollama");
2310
2311        let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
2312        assert_eq!(ollama.id, "qwen3:30b-a3b");
2313        assert_eq!(ollama.provider, "ollama");
2314
2315        let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
2316        assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
2317        assert_eq!(hf.provider, "huggingface");
2318
2319        let cerebras = resolve_model_info("cerebras/gpt-oss-120b");
2320        assert_eq!(cerebras.id, "gpt-oss-120b");
2321        assert_eq!(cerebras.provider, "cerebras");
2322
2323        let cerebras_glm = resolve_model_info("cerebras/zai-glm-4.7");
2324        assert_eq!(cerebras_glm.id, "zai-glm-4.7");
2325        assert_eq!(cerebras_glm.provider, "cerebras");
2326    }
2327
2328    #[test]
2329    fn test_model_tier_from_defaults() {
2330        // Tier is now self-declared per model row in providers.toml.
2331        // Models that match an entry use the declared value; unknown
2332        // model ids fall through to `tier_defaults.default` ("mid").
2333        assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
2334        assert_eq!(model_tier("gpt-4o"), "frontier");
2335        assert_eq!(model_tier("Qwen/Qwen3.5-9B"), "small");
2336        assert_eq!(model_tier("deepseek-v4-flash"), "mid");
2337        assert_eq!(model_tier("deepseek-v4-pro"), "frontier");
2338        assert_eq!(model_tier("MiniMax-M2.7"), "frontier");
2339        assert_eq!(model_tier("glm-5.1"), "frontier");
2340        // Unknown ids resolve to the default.
2341        assert_eq!(model_tier("definitely-not-a-real-model"), "mid");
2342    }
2343
2344    #[test]
2345    fn test_model_family_preserves_underlying_hosted_lineage() {
2346        assert_eq!(
2347            model_family("openrouter", "anthropic/claude-sonnet-4-6"),
2348            "anthropic-claude"
2349        );
2350        assert_eq!(
2351            model_family("openrouter", "google/gemini-2.5-flash"),
2352            "google-gemini"
2353        );
2354        assert_eq!(
2355            model_family("openrouter", "openai/o3-mini"),
2356            "openai-reasoning"
2357        );
2358        assert_eq!(model_lineage("openrouter", "openai/gpt-5.5"), "openai-gpt5");
2359        assert_eq!(
2360            model_lineage("openrouter", "openai/o3-mini"),
2361            "openai-reasoning"
2362        );
2363        assert_eq!(
2364            model_lineage("anthropic", "claude-opus-4-8"),
2365            "claude-opus-adaptive"
2366        );
2367        assert_eq!(model_lineage("llamacpp", "qwen3.6-35b-a3b"), "qwen3");
2368    }
2369
2370    #[test]
2371    fn test_complementary_reviewer_uses_different_family() {
2372        let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
2373            author_model: "claude-sonnet-4-6".to_string(),
2374            author_provider: None,
2375            intent: ComplementaryReviewerIntent::PlanReview,
2376            max_price_multiplier: Some(3.0),
2377        });
2378
2379        assert!(!selection.fallback, "{selection:?}");
2380        assert_eq!(selection.author.family, "anthropic-claude");
2381        assert_ne!(selection.reviewer.family, selection.author.family);
2382        assert_eq!(selection.reviewer.tier, "frontier");
2383        assert!(selection.estimated_incremental_cost.is_some());
2384    }
2385
2386    #[test]
2387    fn test_complementary_reviewer_falls_back_deterministically_on_price_cap() {
2388        let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
2389            author_model: "gpt-4o-mini".to_string(),
2390            author_provider: Some("openai".to_string()),
2391            intent: ComplementaryReviewerIntent::Review,
2392            max_price_multiplier: Some(0.01),
2393        });
2394
2395        assert!(selection.fallback, "{selection:?}");
2396        assert_eq!(selection.reviewer.id, "gpt-4o-mini");
2397        assert_eq!(selection.reviewer.family, selection.author.family);
2398        assert!(selection
2399            .fallback_reason
2400            .as_deref()
2401            .is_some_and(|reason| reason.contains("max_price_multiplier")));
2402    }
2403
2404    #[test]
2405    fn test_resolve_model_unknown_alias() {
2406        let (id, provider) = resolve_model("gpt-4o");
2407        assert_eq!(id, "gpt-4o");
2408        assert!(provider.is_none());
2409    }
2410
2411    #[test]
2412    fn test_provider_names() {
2413        let names = provider_names();
2414        assert!(names.len() >= 7);
2415        assert!(names.contains(&"anthropic".to_string()));
2416        assert!(names.contains(&"together".to_string()));
2417        assert!(names.contains(&"local".to_string()));
2418        assert!(names.contains(&"mlx".to_string()));
2419        assert!(names.contains(&"openai".to_string()));
2420        assert!(names.contains(&"ollama".to_string()));
2421        assert!(names.contains(&"bedrock".to_string()));
2422        assert!(names.contains(&"azure_openai".to_string()));
2423        assert!(names.contains(&"vertex".to_string()));
2424    }
2425
2426    #[test]
2427    fn global_provider_file_is_an_overlay_on_builtin_defaults() {
2428        let mut overlay = ProvidersConfig {
2429            default_provider: Some("ollama".to_string()),
2430            ..Default::default()
2431        };
2432        overlay.aliases.insert(
2433            "quickstart".to_string(),
2434            AliasDef {
2435                id: "llama3.2".to_string(),
2436                provider: "ollama".to_string(),
2437                tool_format: None,
2438            },
2439        );
2440
2441        let merged = merge_global_config(overlay);
2442
2443        assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
2444        assert!(merged.providers.contains_key("anthropic"));
2445        assert!(merged.providers.contains_key("ollama"));
2446        assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
2447    }
2448
2449    #[test]
2450    fn partial_provider_overlay_preserves_builtin_provider_metadata() {
2451        let overlay = parse_config_toml(
2452            r#"
2453            [providers.ollama]
2454            base_url = "http://localhost:11435"
2455            extra_headers = { "x-local" = "1" }
2456            "#,
2457        )
2458        .expect("provider overlay parses");
2459
2460        let merged = merge_global_config(overlay);
2461        let ollama = merged
2462            .providers
2463            .get("ollama")
2464            .expect("ollama remains configured");
2465
2466        assert_eq!(ollama.base_url, "http://localhost:11435");
2467        assert_eq!(ollama.auth_style, "none");
2468        assert_eq!(ollama.chat_endpoint, "/api/chat");
2469        assert_eq!(ollama.completion_endpoint.as_deref(), Some("/api/generate"));
2470        assert_eq!(ollama.cost_per_1k_in, Some(0.0));
2471        assert_eq!(ollama.cost_per_1k_out, Some(0.0));
2472        assert_eq!(
2473            ollama
2474                .healthcheck
2475                .as_ref()
2476                .and_then(|healthcheck| healthcheck.path.as_deref()),
2477            Some("/api/tags")
2478        );
2479        assert_eq!(
2480            ollama.extra_headers.get("x-local").map(String::as_str),
2481            Some("1")
2482        );
2483    }
2484
2485    #[test]
2486    fn partial_provider_overlay_can_explicitly_replace_default_auth_style() {
2487        let overlay = parse_config_toml(
2488            r#"
2489            [providers.ollama]
2490            auth_style = "bearer"
2491            auth_env = "OLLAMA_API_KEY"
2492            "#,
2493        )
2494        .expect("provider overlay parses");
2495
2496        let merged = merge_global_config(overlay);
2497        let ollama = merged
2498            .providers
2499            .get("ollama")
2500            .expect("ollama remains configured");
2501
2502        assert_eq!(ollama.auth_style, "bearer");
2503        assert_eq!(auth_env_names(&ollama.auth_env), vec!["OLLAMA_API_KEY"]);
2504        assert_eq!(ollama.chat_endpoint, "/api/chat");
2505    }
2506
2507    #[test]
2508    fn test_resolve_tier_model_default_aliases() {
2509        // Exercise the alias-resolution machinery, not the specific catalog
2510        // value: the model under each tier alias evolves as the embedded
2511        // providers.toml is updated. The invariants worth pinning are the
2512        // provider routing + catalog-registration of the resolved model.
2513        let (model, provider) = resolve_tier_model("frontier", None)
2514            .expect("frontier alias must resolve from the embedded catalog");
2515        assert_eq!(provider, "anthropic");
2516        assert!(
2517            model_catalog_entry(&model)
2518                .is_some_and(|entry| entry.provider == "anthropic" && !entry.deprecated),
2519            "frontier alias must point at a registered, non-deprecated anthropic model (got {model})"
2520        );
2521
2522        let (model, provider) = resolve_tier_model("small", None)
2523            .expect("small alias must resolve from the embedded catalog");
2524        assert!(
2525            [
2526                "openrouter",
2527                "huggingface",
2528                "local",
2529                "llamacpp",
2530                "mlx",
2531                "ollama"
2532            ]
2533            .contains(&provider.as_str()),
2534            "small tier should resolve to an open-weight provider (got {provider} / {model})"
2535        );
2536    }
2537
2538    #[test]
2539    fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
2540        // tier/<provider> takes precedence over generic tier when the
2541        // caller scopes by provider. Don't pin the specific model — the
2542        // catalog evolves.
2543        let (model, provider) = resolve_tier_model("mid", Some("openai"))
2544            .expect("mid tier scoped to openai must resolve");
2545        assert_eq!(provider, "openai");
2546        assert!(
2547            model_catalog_entry(&model).is_some(),
2548            "mid/openai alias must point at a registered model (got {model})"
2549        );
2550    }
2551
2552    #[test]
2553    fn test_provider_config_anthropic() {
2554        let pdef = provider_config("anthropic").unwrap();
2555        assert_eq!(pdef.auth_style, "header");
2556        assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
2557    }
2558
2559    #[test]
2560    fn test_provider_config_mlx() {
2561        let pdef = provider_config("mlx").unwrap();
2562        assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
2563        assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
2564        assert_eq!(
2565            pdef.healthcheck.unwrap().path.as_deref(),
2566            Some("/v1/models")
2567        );
2568
2569        let (model, provider) = resolve_model("mlx-qwen36-27b");
2570        assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
2571        assert_eq!(provider.as_deref(), Some("mlx"));
2572    }
2573
2574    #[test]
2575    fn test_enterprise_provider_defaults_and_inference() {
2576        let bedrock = provider_config("bedrock").unwrap();
2577        assert_eq!(bedrock.auth_style, "aws_sigv4");
2578        assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
2579        assert_eq!(
2580            infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
2581            "bedrock"
2582        );
2583        assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
2584
2585        let azure = provider_config("azure_openai").unwrap();
2586        assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
2587        assert_eq!(
2588            auth_env_names(&azure.auth_env),
2589            vec![
2590                "AZURE_OPENAI_API_KEY".to_string(),
2591                "AZURE_OPENAI_AD_TOKEN".to_string(),
2592                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
2593            ]
2594        );
2595
2596        let vertex = provider_config("vertex").unwrap();
2597        assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
2598        assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
2599    }
2600
2601    #[test]
2602    fn test_default_provider_env_override_for_unknown_model() {
2603        let _guard = crate::llm::env_lock().lock().expect("env lock");
2604        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2605        unsafe {
2606            std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
2607        }
2608
2609        let inference = infer_provider_detail("unknown-model");
2610
2611        unsafe {
2612            match prev_default_provider {
2613                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2614                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2615            }
2616        }
2617
2618        assert_eq!(inference.provider, "openai");
2619        assert_eq!(
2620            inference.source,
2621            crate::llm::provider::ProviderInferenceSource::DefaultFallback
2622        );
2623    }
2624
2625    #[test]
2626    fn test_unknown_model_family_ignores_default_provider_fallback() {
2627        let _guard = crate::llm::env_lock().lock().expect("env lock");
2628        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2629        unsafe {
2630            std::env::set_var("HARN_DEFAULT_PROVIDER", "ollama");
2631        }
2632
2633        let unknown = resolve_model_info("mystery-model-xyz");
2634        let known_family = resolve_model_info("deepseek-mystery-model");
2635
2636        unsafe {
2637            match prev_default_provider {
2638                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2639                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2640            }
2641        }
2642
2643        assert_eq!(unknown.provider, "ollama");
2644        assert_eq!(unknown.family, "unknown");
2645        assert_eq!(unknown.lineage, "unknown");
2646        assert_eq!(known_family.family, "deepseek");
2647        assert_eq!(known_family.lineage, "deepseek");
2648    }
2649
2650    #[test]
2651    fn test_resolve_base_url_no_env() {
2652        let pdef = ProviderDef {
2653            base_url: "https://example.com".to_string(),
2654            ..Default::default()
2655        };
2656        assert_eq!(resolve_base_url(&pdef), "https://example.com");
2657    }
2658
2659    #[test]
2660    fn test_default_config_roundtrip() {
2661        let config = default_config();
2662        assert!(!config.providers.is_empty());
2663        assert!(!config.inference_rules.is_empty());
2664        // Tier is now declared on each model row; tier_rules is allowed
2665        // to be empty (the rule table is a legacy fallback only).
2666        assert_eq!(config.tier_defaults.default, "mid");
2667        // At least the new open-weight frontiers should have explicit tiers.
2668        let frontiers = config
2669            .models
2670            .iter()
2671            .filter(|(_, m)| m.tier.as_deref() == Some("frontier"))
2672            .count();
2673        assert!(
2674            frontiers >= 4,
2675            "expected at least 4 frontier-tagged models, got {frontiers}"
2676        );
2677    }
2678
2679    #[test]
2680    fn test_local_ollama_catalog_metadata() {
2681        reset_overrides();
2682
2683        let devstral =
2684            model_catalog_entry("devstral-small-2:24b").expect("devstral-small-2 catalog entry");
2685        assert_eq!(devstral.context_window, 262_144);
2686        assert!(!devstral.capabilities.iter().any(|cap| cap == "vision"));
2687
2688        let gemma4 = model_catalog_entry("gemma4:26b").expect("gemma4 catalog entry");
2689        assert_eq!(gemma4.context_window, 262_144);
2690        assert!(gemma4.capabilities.iter().any(|cap| cap == "vision"));
2691    }
2692
2693    #[test]
2694    fn test_external_config_overlays_default_catalog() {
2695        let mut config = default_config();
2696        let mut overlay = ProvidersConfig {
2697            default_provider: Some("ollama".to_string()),
2698            ..Default::default()
2699        };
2700        overlay.providers.insert(
2701            "custom".to_string(),
2702            ProviderDef {
2703                base_url: "https://llm.example.test/v1".to_string(),
2704                chat_endpoint: "/chat/completions".to_string(),
2705                ..Default::default()
2706            },
2707        );
2708
2709        config.merge_from(&overlay);
2710
2711        assert_eq!(config.default_provider.as_deref(), Some("ollama"));
2712        assert!(config.providers.contains_key("custom"));
2713        assert!(config.providers.contains_key("anthropic"));
2714        assert!(config.providers.contains_key("ollama"));
2715    }
2716
2717    #[test]
2718    fn test_model_params_empty() {
2719        let params = model_params("claude-sonnet-4-20250514");
2720        assert!(params.is_empty());
2721    }
2722
2723    #[test]
2724    fn test_user_overrides_add_provider_and_alias() {
2725        reset_overrides();
2726        let mut overlay = ProvidersConfig::default();
2727        overlay.providers.insert(
2728            "acme".to_string(),
2729            ProviderDef {
2730                base_url: "https://llm.acme.test/v1".to_string(),
2731                chat_endpoint: "/chat/completions".to_string(),
2732                ..Default::default()
2733            },
2734        );
2735        overlay.aliases.insert(
2736            "acme-fast".to_string(),
2737            AliasDef {
2738                id: "acme/model-fast".to_string(),
2739                provider: "acme".to_string(),
2740                tool_format: Some("native".to_string()),
2741            },
2742        );
2743        set_user_overrides(Some(overlay));
2744
2745        let (model, provider) = resolve_model("acme-fast");
2746        assert_eq!(model, "acme/model-fast");
2747        assert_eq!(provider.as_deref(), Some("acme"));
2748        assert!(provider_names().contains(&"acme".to_string()));
2749        assert_eq!(
2750            provider_config("acme").map(|provider| provider.base_url),
2751            Some("https://llm.acme.test/v1".to_string())
2752        );
2753
2754        reset_overrides();
2755    }
2756
2757    #[test]
2758    fn test_default_tool_format_uses_capability_matrix() {
2759        reset_overrides();
2760
2761        assert_eq!(
2762            default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
2763            "text"
2764        );
2765        assert_eq!(
2766            default_tool_format("devstral-small-2:24b", "ollama"),
2767            "text"
2768        );
2769        assert_eq!(
2770            default_tool_format("ollama-devstral-small-2-native", "ollama"),
2771            "native"
2772        );
2773        // vLLM/SGLang-served Gemma 4 exposes OpenAI-compatible function calling,
2774        // so the local route declares native tools (matching every hosted gemma-4
2775        // sibling) rather than degrading to the text tool format.
2776        assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "native");
2777        assert_eq!(
2778            default_tool_format("deepseek/deepseek-v3.2", "openrouter"),
2779            "text"
2780        );
2781        assert_eq!(
2782            default_tool_format("qwen/qwen3-coder-flash", "openrouter"),
2783            "text"
2784        );
2785    }
2786
2787    #[test]
2788    fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
2789        reset_overrides();
2790        let mut overlay = ProvidersConfig::default();
2791        overlay.models.insert(
2792            "acme/model-fast".to_string(),
2793            ModelDef {
2794                name: "Acme Fast".to_string(),
2795                provider: "acme".to_string(),
2796                context_window: 65_536,
2797                runtime_context_window: None,
2798                stream_timeout: Some(42.0),
2799                capabilities: vec!["tools".to_string(), "streaming".to_string()],
2800                pricing: Some(ModelPricing {
2801                    input_per_mtok: 1.25,
2802                    output_per_mtok: 2.5,
2803                    cache_read_per_mtok: Some(0.25),
2804                    cache_write_per_mtok: None,
2805                }),
2806                deprecated: false,
2807                deprecation_note: None,
2808                superseded_by: None,
2809                fast_mode: None,
2810                quality_tags: Vec::new(),
2811                availability: ModelAvailability::default(),
2812                tier: None,
2813                open_weight: None,
2814                strengths: Vec::new(),
2815                benchmarks: std::collections::BTreeMap::new(),
2816                family: None,
2817                lineage: None,
2818                complementary_with: Vec::new(),
2819                avoid_as_reviewer_for: Vec::new(),
2820            },
2821        );
2822        overlay
2823            .qc_defaults
2824            .insert("acme".to_string(), "acme/model-cheap".to_string());
2825        set_user_overrides(Some(overlay));
2826
2827        let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
2828        assert_eq!(entry.context_window, 65_536);
2829        assert_eq!(
2830            entry.capabilities,
2831            vec!["streaming".to_string(), "tools".to_string()]
2832        );
2833        assert_eq!(
2834            entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
2835            Some(1.25)
2836        );
2837        assert_eq!(
2838            pricing_per_1k_for("acme", "acme/model-fast"),
2839            Some((0.00125, 0.0025))
2840        );
2841        assert_eq!(
2842            qc_default_model("acme").as_deref(),
2843            Some("acme/model-cheap")
2844        );
2845
2846        reset_overrides();
2847    }
2848
2849    #[test]
2850    fn test_user_overrides_prepend_inference_rules() {
2851        reset_overrides();
2852        let mut overlay = ProvidersConfig::default();
2853        overlay.inference_rules.push(InferenceRule {
2854            pattern: Some("internal-*".to_string()),
2855            contains: None,
2856            exact: None,
2857            provider: "openai".to_string(),
2858        });
2859        set_user_overrides(Some(overlay));
2860
2861        assert_eq!(infer_provider("internal-foo"), "openai");
2862
2863        reset_overrides();
2864    }
2865
2866    // ── Embedded providers.toml invariants ───────────────────────────────────
2867    // These tests pin properties of the *system* — TOML parses, every
2868    // alias resolves, every deprecated model has a note — without
2869    // pinning specific catalog values. They survive future catalog
2870    // churn and surface real schema breakage.
2871
2872    #[test]
2873    fn embedded_providers_toml_parses_and_is_not_trivially_empty() {
2874        let config = default_config();
2875        assert!(
2876            config.providers.len() >= 10,
2877            "expected >=10 providers in embedded catalog, got {}",
2878            config.providers.len()
2879        );
2880        assert!(
2881            config.models.len() >= 20,
2882            "expected >=20 models in embedded catalog, got {}",
2883            config.models.len()
2884        );
2885        assert!(
2886            config.aliases.len() >= 15,
2887            "expected >=15 aliases in embedded catalog, got {}",
2888            config.aliases.len()
2889        );
2890        assert_eq!(config.default_provider.as_deref(), Some("anthropic"));
2891    }
2892
2893    #[test]
2894    fn embedded_catalog_every_deprecated_model_has_a_note() {
2895        let config = default_config();
2896        let offenders: Vec<&str> = config
2897            .models
2898            .iter()
2899            .filter(|(_, model)| {
2900                model.deprecated
2901                    && model
2902                        .deprecation_note
2903                        .as_deref()
2904                        .unwrap_or("")
2905                        .trim()
2906                        .is_empty()
2907            })
2908            .map(|(id, _)| id.as_str())
2909            .collect();
2910        assert!(
2911            offenders.is_empty(),
2912            "deprecated models missing a deprecation_note: {offenders:?}"
2913        );
2914    }
2915
2916    #[test]
2917    fn embedded_cerebras_catalog_separates_public_and_dedicated_routes() {
2918        let config = default_config();
2919        for id in ["gpt-oss-120b", "zai-glm-4.7"] {
2920            let model = config.models.get(id).expect("current public Cerebras row");
2921            assert_eq!(model.provider, "cerebras");
2922            assert_eq!(model.availability, ModelAvailability::Serverless);
2923            assert!(!model.deprecated);
2924        }
2925
2926        let llama = config
2927            .models
2928            .get("llama-3.3-70b")
2929            .expect("legacy Cerebras row");
2930        assert_eq!(llama.provider, "cerebras");
2931        assert_eq!(llama.availability, ModelAvailability::Dedicated);
2932        assert!(llama.deprecated);
2933    }
2934
2935    #[test]
2936    fn embedded_catalog_every_model_targets_a_registered_provider() {
2937        let config = default_config();
2938        let known: std::collections::BTreeSet<&str> =
2939            config.providers.keys().map(String::as_str).collect();
2940        let orphans: Vec<(&str, &str)> = config
2941            .models
2942            .iter()
2943            .filter(|(_, model)| !known.contains(model.provider.as_str()))
2944            .map(|(id, model)| (id.as_str(), model.provider.as_str()))
2945            .collect();
2946        assert!(
2947            orphans.is_empty(),
2948            "models reference unknown providers: {orphans:?}"
2949        );
2950    }
2951
2952    #[test]
2953    fn embedded_catalog_every_alias_targets_a_registered_provider() {
2954        let config = default_config();
2955        let known: std::collections::BTreeSet<&str> =
2956            config.providers.keys().map(String::as_str).collect();
2957        let orphans: Vec<(&str, &str)> = config
2958            .aliases
2959            .iter()
2960            .filter(|(_, alias)| !known.contains(alias.provider.as_str()))
2961            .map(|(name, alias)| (name.as_str(), alias.provider.as_str()))
2962            .collect();
2963        assert!(
2964            orphans.is_empty(),
2965            "aliases reference unknown providers: {orphans:?}"
2966        );
2967    }
2968
2969    #[test]
2970    fn embedded_catalog_every_qc_default_targets_a_known_model() {
2971        let config = default_config();
2972        let orphans: Vec<(&str, &str)> = config
2973            .qc_defaults
2974            .iter()
2975            .filter(|(_, model_id)| !config.models.contains_key(model_id.as_str()))
2976            .map(|(provider, model_id)| (provider.as_str(), model_id.as_str()))
2977            .collect();
2978        assert!(
2979            orphans.is_empty(),
2980            "qc_defaults reference unknown models: {orphans:?}"
2981        );
2982    }
2983
2984    #[test]
2985    fn embedded_catalog_pricing_rates_are_non_negative() {
2986        let config = default_config();
2987        for (id, model) in &config.models {
2988            let Some(pricing) = &model.pricing else {
2989                continue;
2990            };
2991            assert!(
2992                pricing.input_per_mtok >= 0.0 && pricing.output_per_mtok >= 0.0,
2993                "{id}: negative pricing — in={} out={}",
2994                pricing.input_per_mtok,
2995                pricing.output_per_mtok
2996            );
2997            if let Some(rate) = pricing.cache_read_per_mtok {
2998                assert!(rate >= 0.0, "{id}: negative cache_read rate {rate}");
2999            }
3000            if let Some(rate) = pricing.cache_write_per_mtok {
3001                assert!(rate >= 0.0, "{id}: negative cache_write rate {rate}");
3002            }
3003        }
3004    }
3005
3006    #[test]
3007    fn model_availability_parses_known_strings() {
3008        assert_eq!(
3009            ModelAvailability::parse("serverless"),
3010            Some(ModelAvailability::Serverless)
3011        );
3012        assert_eq!(
3013            ModelAvailability::parse("dedicated"),
3014            Some(ModelAvailability::Dedicated)
3015        );
3016        assert_eq!(
3017            ModelAvailability::parse("unknown"),
3018            Some(ModelAvailability::Unknown)
3019        );
3020        assert_eq!(ModelAvailability::parse("provisioned"), None);
3021        for value in [
3022            ModelAvailability::Serverless,
3023            ModelAvailability::Dedicated,
3024            ModelAvailability::Unknown,
3025        ] {
3026            assert_eq!(ModelAvailability::parse(value.as_str()), Some(value));
3027        }
3028    }
3029
3030    #[test]
3031    fn embedded_catalog_marks_together_dedicated_route_as_dedicated() {
3032        let config = default_config();
3033        let model = config
3034            .models
3035            .get("Qwen/Qwen3-Coder-Next-FP8")
3036            .expect("Together Qwen3 Coder Next FP8 is cataloged");
3037        assert_eq!(model.provider, "together");
3038        assert_eq!(model.availability, ModelAvailability::Dedicated);
3039    }
3040
3041    #[test]
3042    fn embedded_catalog_dedicated_models_are_not_targeted_by_tier_aliases() {
3043        // A dedicated-only model behind a tier alias would silently fail
3044        // every serverless caller; the catalog must keep those routes
3045        // separated.
3046        let config = default_config();
3047        let dedicated: std::collections::BTreeSet<(&str, &str)> = config
3048            .models
3049            .iter()
3050            .filter(|(_, model)| model.availability == ModelAvailability::Dedicated)
3051            .map(|(id, model)| (model.provider.as_str(), id.as_str()))
3052            .collect();
3053        for (name, alias) in &config.aliases {
3054            if matches!(
3055                name.as_str(),
3056                "frontier"
3057                    | "mid"
3058                    | "small"
3059                    | "tier/frontier"
3060                    | "tier/mid"
3061                    | "tier/small"
3062                    | "sonnet"
3063                    | "opus"
3064                    | "haiku"
3065            ) {
3066                assert!(
3067                    !dedicated.contains(&(alias.provider.as_str(), alias.id.as_str())),
3068                    "tier alias `{name}` targets dedicated-only route `{}/{}`",
3069                    alias.provider,
3070                    alias.id,
3071                );
3072            }
3073        }
3074    }
3075
3076    #[test]
3077    fn embedded_catalog_tier_aliases_resolve_to_active_models() {
3078        // The three canonical tier aliases (frontier / mid / small) MUST
3079        // resolve to non-deprecated catalog entries; a default that
3080        // routes the loop into a sunsetted model is a release blocker.
3081        for alias in ["frontier", "mid", "small"] {
3082            let (model, _provider) = resolve_tier_model(alias, None)
3083                .unwrap_or_else(|| panic!("tier alias `{alias}` must resolve"));
3084            let entry = model_catalog_entry(&model).unwrap_or_else(|| {
3085                panic!("tier alias `{alias}` -> `{model}` must be a registered catalog entry")
3086            });
3087            assert!(
3088                !entry.deprecated,
3089                "tier alias `{alias}` resolves to deprecated model `{model}` ({:?})",
3090                entry.deprecation_note
3091            );
3092        }
3093    }
3094
3095    #[test]
3096    fn opus_alias_tracks_claude_opus_4_8_with_fast_mode() {
3097        // The `opus` alias must follow the newest Opus release, and that
3098        // release advertises its (off-by-default) fast-mode tier.
3099        let (model, provider) = resolve_model("opus");
3100        assert_eq!(model, "claude-opus-4-8");
3101        assert_eq!(provider.as_deref(), Some("anthropic"));
3102
3103        let opus48 = model_catalog_entry("claude-opus-4-8").expect("opus 4.8 catalog entry");
3104        assert!(!opus48.deprecated, "newest Opus must not be deprecated");
3105        let fast = opus48.fast_mode.expect("opus 4.8 advertises fast mode");
3106        assert_eq!(fast.param, "speed");
3107        assert_eq!(fast.value, "fast");
3108        assert_eq!(fast.status.as_deref(), Some("research_preview"));
3109        let fast_pricing = fast.pricing.expect("fast mode carries premium pricing");
3110        let standard = opus48.pricing.expect("opus 4.8 standard pricing");
3111        assert!(
3112            fast_pricing.input_per_mtok > standard.input_per_mtok,
3113            "fast mode must be premium-priced relative to standard"
3114        );
3115    }
3116
3117    #[test]
3118    fn superseded_opus_models_point_at_claude_opus_4_8() {
3119        // Earlier Opus rows are deprecated and carry a structured
3120        // `superseded_by` pointer to the current flagship.
3121        for model in ["claude-opus-4-7", "claude-opus-4-6"] {
3122            let entry =
3123                model_catalog_entry(model).unwrap_or_else(|| panic!("{model} catalog entry"));
3124            assert!(entry.deprecated, "{model} should be deprecated");
3125            assert_eq!(
3126                entry.superseded_by.as_deref(),
3127                Some("claude-opus-4-8"),
3128                "{model} should be superseded by claude-opus-4-8"
3129            );
3130        }
3131    }
3132
3133    #[test]
3134    fn gpt_5_5_fast_mode_rides_service_tier() {
3135        // Fast mode is provider-agnostic: OpenAI exposes it through the
3136        // `service_tier` knob rather than Anthropic's `speed`.
3137        let entry = model_catalog_entry("gpt-5.5").expect("gpt-5.5 catalog entry");
3138        let fast = entry.fast_mode.expect("gpt-5.5 advertises a fast tier");
3139        assert_eq!(fast.param, "service_tier");
3140        assert_eq!(fast.status.as_deref(), Some("ga"));
3141    }
3142}
harn_vm/llm_config.rs

harn_vm/
llm_config.rs