harn_vm/
llm_config.rs

1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::{BTreeMap, BTreeSet};
4use std::sync::atomic::{AtomicBool, Ordering};
5use std::sync::{OnceLock, RwLock};
6
7static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
8static CONFIG_PATH: OnceLock<String> = OnceLock::new();
9static RUNTIME_CATALOG_OVERLAY: OnceLock<RwLock<Option<ProvidersConfig>>> = OnceLock::new();
10
11thread_local! {
12    /// Thread-local provider config overlays installed by the CLI after it
13    /// reads the nearest `harn.toml` plus any installed package manifests.
14    /// Kept thread-local so tests and multi-VM hosts can scope extensions to
15    /// the current run without mutating the process-wide default config.
16    static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
17}
18
19#[derive(Debug, Clone, Deserialize, Default)]
20pub struct ProvidersConfig {
21    #[serde(default)]
22    pub default_provider: Option<String>,
23    #[serde(default)]
24    pub providers: BTreeMap<String, ProviderDef>,
25    #[serde(default)]
26    pub aliases: BTreeMap<String, AliasDef>,
27    #[serde(default)]
28    pub alias_tool_calling: BTreeMap<String, AliasToolCallingDef>,
29    #[serde(default)]
30    pub models: BTreeMap<String, ModelDef>,
31    #[serde(default)]
32    pub qc_defaults: BTreeMap<String, String>,
33    #[serde(default)]
34    pub inference_rules: Vec<InferenceRule>,
35    #[serde(default)]
36    pub tier_rules: Vec<TierRule>,
37    #[serde(default)]
38    pub tier_defaults: TierDefaults,
39    #[serde(default)]
40    pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
41    #[serde(default)]
42    pub model_roles: BTreeMap<String, BTreeMap<String, toml::Value>>,
43    #[serde(default)]
44    pub suppress: SuppressDef,
45}
46
47/// Routes hidden from the exported/served provider catalog artifact.
48///
49/// Lets an overlay drop baseline routes that are broken or unusable for the
50/// embedding product (e.g. a dedicated-only serving route, or a local image
51/// with a broken server-side tool parser) without forking the baseline
52/// catalog. Suppression is artifact-level presentation: it removes the model
53/// row, its aliases, and any recommendation variant derived from it, but does
54/// not block runtime resolution of an explicitly requested model id.
55///
56/// Combined with the overlay's whole-row `models` replacement, this also
57/// expresses route renames: define the row under the new id and suppress the
58/// old one.
59#[derive(Debug, Clone, Deserialize, Default, PartialEq, Eq)]
60pub struct SuppressDef {
61    /// `"provider:model_id"` selectors. Split on the FIRST colon only —
62    /// model ids may themselves contain colons (e.g. Ollama image tags such
63    /// as `ollama:qwen3.6:35b-a3b-coding-nvfp4`). Entries without a colon
64    /// match nothing.
65    #[serde(default)]
66    pub routes: Vec<String>,
67}
68
69impl ProvidersConfig {
70    pub fn is_empty(&self) -> bool {
71        self.default_provider.is_none()
72            && self.providers.is_empty()
73            && self.aliases.is_empty()
74            && self.alias_tool_calling.is_empty()
75            && self.models.is_empty()
76            && self.qc_defaults.is_empty()
77            && self.inference_rules.is_empty()
78            && self.tier_rules.is_empty()
79            && self.model_defaults.is_empty()
80            && self.model_roles.is_empty()
81            && self.suppress.routes.is_empty()
82            && self.tier_defaults.default == default_mid()
83    }
84
85    pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
86        for (name, provider) in &overlay.providers {
87            match self.providers.get_mut(name) {
88                Some(existing) => existing.merge_from(provider),
89                None => {
90                    self.providers.insert(name.clone(), provider.clone());
91                }
92            }
93        }
94        self.aliases.extend(overlay.aliases.clone());
95        self.alias_tool_calling
96            .extend(overlay.alias_tool_calling.clone());
97        self.models.extend(overlay.models.clone());
98        self.qc_defaults.extend(overlay.qc_defaults.clone());
99
100        if overlay.default_provider.is_some() {
101            self.default_provider = overlay.default_provider.clone();
102        }
103
104        if !overlay.inference_rules.is_empty() {
105            let mut merged = overlay.inference_rules.clone();
106            merged.extend(self.inference_rules.clone());
107            self.inference_rules = merged;
108        }
109
110        if !overlay.tier_rules.is_empty() {
111            let mut merged = overlay.tier_rules.clone();
112            merged.extend(self.tier_rules.clone());
113            self.tier_rules = merged;
114        }
115
116        if overlay.tier_defaults.default != default_mid() {
117            self.tier_defaults = overlay.tier_defaults.clone();
118        }
119
120        for (pattern, defaults) in &overlay.model_defaults {
121            self.model_defaults
122                .entry(pattern.clone())
123                .or_default()
124                .extend(defaults.clone());
125        }
126
127        for (role, defaults) in &overlay.model_roles {
128            self.model_roles
129                .entry(role.clone())
130                .or_default()
131                .extend(defaults.clone());
132        }
133
134        for route in &overlay.suppress.routes {
135            if !self.suppress.routes.contains(route) {
136                self.suppress.routes.push(route.clone());
137            }
138        }
139    }
140}
141
142#[derive(Debug, Clone)]
143pub struct ProviderDef {
144    pub display_name: Option<String>,
145    pub icon: Option<String>,
146    /// Provider protocol. Omitted providers use Harn's normal HTTP provider
147    /// path; `acp` launches an Agent Client Protocol server and drives it as
148    /// an agent-backed provider.
149    pub protocol: Option<String>,
150    pub base_url: String,
151    pub base_url_env: Option<String>,
152    pub auth_style: String,
153    pub auth_header: Option<String>,
154    pub auth_env: AuthEnv,
155    pub extra_headers: BTreeMap<String, String>,
156    pub chat_endpoint: String,
157    pub completion_endpoint: Option<String>,
158    pub command: Option<String>,
159    pub args: Vec<String>,
160    pub env: BTreeMap<String, String>,
161    pub cwd: Option<String>,
162    pub mcp_servers: Vec<serde_json::Value>,
163    pub healthcheck: Option<HealthcheckDef>,
164    /// Local runtime lifecycle metadata used by `harn local launch/stop`.
165    /// This is intentionally separate from provider process fields such as
166    /// `command`/`args`, which are used for ACP or external provider adapters.
167    pub local_runtime: Option<LocalRuntimeDef>,
168    pub features: Vec<String>,
169    /// Fallback provider name to try if this provider fails.
170    pub fallback: Option<String>,
171    /// Number of retries before falling back (default 0).
172    pub retry_count: Option<u32>,
173    /// Delay between retries in milliseconds (default 1000).
174    pub retry_delay_ms: Option<u64>,
175    /// Maximum requests per minute. None = unlimited.
176    pub rpm: Option<u32>,
177    /// Rich provider quota metadata. `rpm` remains as a legacy shorthand;
178    /// when both are present, this nested shape is the authoritative catalog
179    /// record and callers can still read the flattened `rpm`.
180    pub rate_limits: Option<RateLimitsDef>,
181    /// Provider/catalog pricing in USD per 1k input tokens.
182    pub cost_per_1k_in: Option<f64>,
183    /// Provider/catalog pricing in USD per 1k output tokens.
184    pub cost_per_1k_out: Option<f64>,
185    /// Observed or configured p50 latency in milliseconds.
186    pub latency_p50_ms: Option<u64>,
187    /// Optional provider-level serving performance observations.
188    pub performance: Option<ServingPerformanceDef>,
189    #[doc(hidden)]
190    pub auth_style_explicit: bool,
191}
192
193#[derive(Debug, Clone, Deserialize)]
194struct ProviderDefWire {
195    #[serde(default)]
196    display_name: Option<String>,
197    #[serde(default)]
198    icon: Option<String>,
199    #[serde(default)]
200    protocol: Option<String>,
201    #[serde(default)]
202    base_url: String,
203    #[serde(default)]
204    base_url_env: Option<String>,
205    #[serde(default)]
206    auth_style: Option<String>,
207    #[serde(default)]
208    auth_header: Option<String>,
209    #[serde(default)]
210    auth_env: AuthEnv,
211    #[serde(default)]
212    extra_headers: BTreeMap<String, String>,
213    #[serde(default)]
214    chat_endpoint: String,
215    #[serde(default)]
216    completion_endpoint: Option<String>,
217    #[serde(default)]
218    command: Option<String>,
219    #[serde(default)]
220    args: Vec<String>,
221    #[serde(default)]
222    env: BTreeMap<String, String>,
223    #[serde(default)]
224    cwd: Option<String>,
225    #[serde(default)]
226    mcp_servers: Vec<serde_json::Value>,
227    #[serde(default)]
228    healthcheck: Option<HealthcheckDef>,
229    #[serde(default)]
230    local_runtime: Option<LocalRuntimeDef>,
231    #[serde(default)]
232    features: Vec<String>,
233    #[serde(default)]
234    fallback: Option<String>,
235    #[serde(default)]
236    retry_count: Option<u32>,
237    #[serde(default)]
238    retry_delay_ms: Option<u64>,
239    #[serde(default)]
240    rpm: Option<u32>,
241    #[serde(default)]
242    rate_limits: Option<RateLimitsDef>,
243    #[serde(default)]
244    cost_per_1k_in: Option<f64>,
245    #[serde(default)]
246    cost_per_1k_out: Option<f64>,
247    #[serde(default)]
248    latency_p50_ms: Option<u64>,
249    #[serde(default)]
250    performance: Option<ServingPerformanceDef>,
251}
252
253impl<'de> Deserialize<'de> for ProviderDef {
254    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
255    where
256        D: serde::Deserializer<'de>,
257    {
258        let wire = ProviderDefWire::deserialize(deserializer)?;
259        let auth_style_explicit = wire.auth_style.is_some();
260        Ok(Self {
261            display_name: wire.display_name,
262            icon: wire.icon,
263            protocol: wire.protocol,
264            base_url: wire.base_url,
265            base_url_env: wire.base_url_env,
266            auth_style: wire.auth_style.unwrap_or_else(default_bearer),
267            auth_header: wire.auth_header,
268            auth_env: wire.auth_env,
269            extra_headers: wire.extra_headers,
270            chat_endpoint: wire.chat_endpoint,
271            completion_endpoint: wire.completion_endpoint,
272            command: wire.command,
273            args: wire.args,
274            env: wire.env,
275            cwd: wire.cwd,
276            mcp_servers: wire.mcp_servers,
277            healthcheck: wire.healthcheck,
278            local_runtime: wire.local_runtime,
279            features: wire.features,
280            fallback: wire.fallback,
281            retry_count: wire.retry_count,
282            retry_delay_ms: wire.retry_delay_ms,
283            rpm: wire.rpm,
284            rate_limits: wire.rate_limits,
285            cost_per_1k_in: wire.cost_per_1k_in,
286            cost_per_1k_out: wire.cost_per_1k_out,
287            latency_p50_ms: wire.latency_p50_ms,
288            performance: wire.performance,
289            auth_style_explicit,
290        })
291    }
292}
293
294impl Default for ProviderDef {
295    fn default() -> Self {
296        Self {
297            display_name: None,
298            icon: None,
299            protocol: None,
300            base_url: String::new(),
301            base_url_env: None,
302            auth_style: default_bearer(),
303            auth_header: None,
304            auth_env: AuthEnv::None,
305            extra_headers: BTreeMap::new(),
306            chat_endpoint: String::new(),
307            completion_endpoint: None,
308            command: None,
309            args: Vec::new(),
310            env: BTreeMap::new(),
311            cwd: None,
312            mcp_servers: Vec::new(),
313            healthcheck: None,
314            local_runtime: None,
315            features: Vec::new(),
316            fallback: None,
317            retry_count: None,
318            retry_delay_ms: None,
319            rpm: None,
320            rate_limits: None,
321            cost_per_1k_in: None,
322            cost_per_1k_out: None,
323            latency_p50_ms: None,
324            performance: None,
325            auth_style_explicit: false,
326        }
327    }
328}
329
330impl ProviderDef {
331    fn merge_from(&mut self, overlay: &ProviderDef) {
332        merge_option(&mut self.display_name, &overlay.display_name);
333        merge_option(&mut self.icon, &overlay.icon);
334        merge_option(&mut self.protocol, &overlay.protocol);
335        merge_string(&mut self.base_url, &overlay.base_url);
336        merge_option(&mut self.base_url_env, &overlay.base_url_env);
337        let overlay_uses_default_auth_style = overlay.auth_style == default_bearer();
338        if overlay.auth_style_explicit
339            || !overlay_uses_default_auth_style
340            || self.auth_style == default_bearer()
341        {
342            self.auth_style = overlay.auth_style.clone();
343            self.auth_style_explicit |=
344                overlay.auth_style_explicit || !overlay_uses_default_auth_style;
345        }
346        merge_option(&mut self.auth_header, &overlay.auth_header);
347        if !overlay.auth_env.is_none() {
348            self.auth_env = overlay.auth_env.clone();
349        }
350        self.extra_headers.extend(overlay.extra_headers.clone());
351        merge_string(&mut self.chat_endpoint, &overlay.chat_endpoint);
352        merge_option(&mut self.completion_endpoint, &overlay.completion_endpoint);
353        merge_option(&mut self.command, &overlay.command);
354        merge_vec(&mut self.args, &overlay.args);
355        self.env.extend(overlay.env.clone());
356        merge_option(&mut self.cwd, &overlay.cwd);
357        merge_vec(&mut self.mcp_servers, &overlay.mcp_servers);
358        merge_option(&mut self.healthcheck, &overlay.healthcheck);
359        merge_option(&mut self.local_runtime, &overlay.local_runtime);
360        merge_vec(&mut self.features, &overlay.features);
361        merge_option(&mut self.fallback, &overlay.fallback);
362        merge_option(&mut self.retry_count, &overlay.retry_count);
363        merge_option(&mut self.retry_delay_ms, &overlay.retry_delay_ms);
364        merge_option(&mut self.rpm, &overlay.rpm);
365        merge_option(&mut self.rate_limits, &overlay.rate_limits);
366        merge_option(&mut self.cost_per_1k_in, &overlay.cost_per_1k_in);
367        merge_option(&mut self.cost_per_1k_out, &overlay.cost_per_1k_out);
368        merge_option(&mut self.latency_p50_ms, &overlay.latency_p50_ms);
369        merge_option(&mut self.performance, &overlay.performance);
370    }
371}
372
373fn merge_option<T: Clone>(base: &mut Option<T>, overlay: &Option<T>) {
374    if overlay.is_some() {
375        *base = overlay.clone();
376    }
377}
378
379fn merge_string(base: &mut String, overlay: &str) {
380    if !overlay.is_empty() {
381        *base = overlay.to_string();
382    }
383}
384
385fn merge_vec<T: Clone>(base: &mut Vec<T>, overlay: &[T]) {
386    if !overlay.is_empty() {
387        *base = overlay.to_vec();
388    }
389}
390
391fn default_bearer() -> String {
392    "bearer".to_string()
393}
394
395/// Auth env var name(s) for the provider. Can be a single string or an array
396/// (tried in order until one is set).
397#[derive(Debug, Clone, Deserialize, Default)]
398#[serde(untagged)]
399pub enum AuthEnv {
400    #[default]
401    None,
402    Single(String),
403    Multiple(Vec<String>),
404}
405
406impl AuthEnv {
407    fn is_none(&self) -> bool {
408        matches!(self, AuthEnv::None)
409    }
410}
411
412#[derive(Debug, Clone, Deserialize)]
413pub struct HealthcheckDef {
414    pub method: String,
415    #[serde(default)]
416    pub path: Option<String>,
417    #[serde(default)]
418    pub url: Option<String>,
419    #[serde(default)]
420    pub body: Option<String>,
421}
422
423#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
424pub struct LocalRuntimeDef {
425    /// Lifecycle style: `daemon_api` for runtimes with their own resident
426    /// daemon (Ollama), `managed_process` for Harn-spawned servers.
427    #[serde(default, skip_serializing_if = "Option::is_none")]
428    pub kind: Option<String>,
429    /// Command Harn should execute for managed-process runtimes.
430    #[serde(default, skip_serializing_if = "Option::is_none")]
431    pub command: Option<String>,
432    /// Default model source/path/repo. User overlays may set this; embedded
433    /// catalog rows avoid machine-specific absolute paths except examples.
434    #[serde(default, skip_serializing_if = "Option::is_none")]
435    pub model_source: Option<String>,
436    /// Environment variable that can provide a model source.
437    #[serde(default, skip_serializing_if = "Option::is_none")]
438    pub model_source_env: Option<String>,
439    /// Default port when the provider base URL has none.
440    #[serde(default, skip_serializing_if = "Option::is_none")]
441    pub default_port: Option<u16>,
442    /// Argument names used by the runtime CLI.
443    #[serde(default, skip_serializing_if = "Option::is_none")]
444    pub model_arg: Option<String>,
445    #[serde(default, skip_serializing_if = "Option::is_none")]
446    pub served_model_arg: Option<String>,
447    #[serde(default, skip_serializing_if = "Option::is_none")]
448    pub host_arg: Option<String>,
449    #[serde(default, skip_serializing_if = "Option::is_none")]
450    pub port_arg: Option<String>,
451    #[serde(default, skip_serializing_if = "Option::is_none")]
452    pub ctx_arg: Option<String>,
453    #[serde(default, skip_serializing_if = "Option::is_none")]
454    pub parallel_arg: Option<String>,
455    #[serde(default, skip_serializing_if = "Option::is_none")]
456    pub gpu_layers_arg: Option<String>,
457    #[serde(default, skip_serializing_if = "Option::is_none")]
458    pub cache_type_k_arg: Option<String>,
459    #[serde(default, skip_serializing_if = "Option::is_none")]
460    pub cache_type_v_arg: Option<String>,
461    #[serde(default, skip_serializing_if = "Option::is_none")]
462    pub cache_ram_arg: Option<String>,
463    /// Extra arguments Harn applies by default when launching this runtime.
464    #[serde(default, skip_serializing_if = "Vec::is_empty")]
465    pub default_args: Vec<String>,
466    /// Stop strategy: `keep_alive_zero`, `pid`, or `external`.
467    #[serde(default, skip_serializing_if = "Option::is_none")]
468    pub stop: Option<String>,
469    /// Official docs/source URL for the lifecycle contract.
470    #[serde(default, skip_serializing_if = "Option::is_none")]
471    pub source_url: Option<String>,
472    /// YYYY-MM-DD date when the local runtime row was last verified.
473    #[serde(default, skip_serializing_if = "Option::is_none")]
474    pub last_verified: Option<String>,
475    /// Short operational note surfaced by CLI docs/help.
476    #[serde(default, skip_serializing_if = "Option::is_none")]
477    pub notes: Option<String>,
478}
479
480#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
481pub struct LocalMemoryDef {
482    /// Empirical resident memory observed for this route/runtime.
483    #[serde(default, skip_serializing_if = "Option::is_none")]
484    pub measured_resident_gib: Option<f64>,
485    /// Context size used for the empirical measurement.
486    #[serde(default, skip_serializing_if = "Option::is_none")]
487    pub measured_context_window: Option<u64>,
488    /// KV-cache type used for the empirical measurement.
489    #[serde(default, skip_serializing_if = "Option::is_none")]
490    pub measured_cache_type: Option<String>,
491    /// Approximate non-context resident footprint for this model/runtime.
492    #[serde(default, skip_serializing_if = "Option::is_none")]
493    pub base_resident_gib: Option<f64>,
494    /// Approximate GiB consumed by KV cache per 1,000 context tokens at the
495    /// default cache type.
496    #[serde(default, skip_serializing_if = "Option::is_none")]
497    pub kv_cache_gib_per_1k_ctx: Option<f64>,
498    /// Cache-type multiplier relative to `kv_cache_gib_per_1k_ctx`.
499    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
500    pub cache_type_multipliers: BTreeMap<String, f64>,
501    /// Cache type assumed when the launch command does not set K/V cache.
502    #[serde(default, skip_serializing_if = "Option::is_none")]
503    pub default_cache_type: Option<String>,
504    /// Minimum headroom Harn should leave for the OS and other apps.
505    #[serde(default, skip_serializing_if = "Option::is_none")]
506    pub safety_margin_gib: Option<f64>,
507    /// Highest context Harn should recommend automatically from this row.
508    #[serde(default, skip_serializing_if = "Option::is_none")]
509    pub max_recommended_context: Option<u64>,
510    /// Official or empirical source for the sizing row.
511    #[serde(default, skip_serializing_if = "Option::is_none")]
512    pub source_url: Option<String>,
513    /// YYYY-MM-DD date when the sizing row was last verified.
514    #[serde(default, skip_serializing_if = "Option::is_none")]
515    pub last_verified: Option<String>,
516    /// Short operational note surfaced by CLI diagnostics/docs.
517    #[serde(default, skip_serializing_if = "Option::is_none")]
518    pub notes: Option<String>,
519}
520
521impl LocalMemoryDef {
522    pub fn is_empty(&self) -> bool {
523        self.measured_resident_gib.is_none()
524            && self.measured_context_window.is_none()
525            && self.measured_cache_type.is_none()
526            && self.base_resident_gib.is_none()
527            && self.kv_cache_gib_per_1k_ctx.is_none()
528            && self.cache_type_multipliers.is_empty()
529            && self.default_cache_type.is_none()
530            && self.safety_margin_gib.is_none()
531            && self.max_recommended_context.is_none()
532            && self.source_url.is_none()
533            && self.last_verified.is_none()
534            && self.notes.is_none()
535    }
536}
537
538#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
539pub struct AliasDef {
540    pub id: String,
541    pub provider: String,
542    /// Per-model tool format override: "native" or "text". When set, this
543    /// takes precedence over the provider-level default. Models with strong
544    /// tool-calling fine-tuning (Kimi-K2.5, GPT-4o) should use "native";
545    /// models better served by text-based tool calling use "text".
546    #[serde(default)]
547    pub tool_format: Option<String>,
548}
549
550#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
551pub struct AliasToolCallingDef {
552    #[serde(default)]
553    #[serde(skip_serializing_if = "Option::is_none")]
554    pub native: Option<String>,
555    #[serde(default)]
556    #[serde(skip_serializing_if = "Option::is_none")]
557    pub text: Option<String>,
558    #[serde(default)]
559    #[serde(skip_serializing_if = "Option::is_none")]
560    pub streaming_native: Option<String>,
561    #[serde(default)]
562    #[serde(skip_serializing_if = "Option::is_none")]
563    pub fallback_mode: Option<String>,
564    #[serde(default)]
565    #[serde(skip_serializing_if = "Option::is_none")]
566    pub failure_reason: Option<String>,
567    #[serde(default)]
568    #[serde(skip_serializing_if = "Option::is_none")]
569    pub last_probe_at: Option<String>,
570}
571
572#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
573pub struct ModelPricing {
574    pub input_per_mtok: f64,
575    pub output_per_mtok: f64,
576    #[serde(default)]
577    pub cache_read_per_mtok: Option<f64>,
578    #[serde(default)]
579    pub cache_write_per_mtok: Option<f64>,
580}
581
582/// Provider or model quota metadata. Providers publish these along several
583/// axes, and any one exhausted bucket can trigger throttling.
584#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
585pub struct RateLimitsDef {
586    /// Requests per minute.
587    #[serde(default, skip_serializing_if = "Option::is_none")]
588    pub rpm: Option<u32>,
589    /// Requests per hour.
590    #[serde(default, skip_serializing_if = "Option::is_none")]
591    pub rph: Option<u32>,
592    /// Requests per day.
593    #[serde(default, skip_serializing_if = "Option::is_none")]
594    pub rpd: Option<u32>,
595    /// Total tokens per minute.
596    #[serde(default, skip_serializing_if = "Option::is_none")]
597    pub tpm: Option<u64>,
598    /// Total tokens per hour.
599    #[serde(default, skip_serializing_if = "Option::is_none")]
600    pub tph: Option<u64>,
601    /// Total tokens per day.
602    #[serde(default, skip_serializing_if = "Option::is_none")]
603    pub tpd: Option<u64>,
604    /// Input tokens per minute, when the provider splits input/output quotas.
605    #[serde(default, skip_serializing_if = "Option::is_none")]
606    pub input_tpm: Option<u64>,
607    /// Output tokens per minute, when the provider splits input/output quotas.
608    #[serde(default, skip_serializing_if = "Option::is_none")]
609    pub output_tpm: Option<u64>,
610    /// Concurrent in-flight requests, if published.
611    #[serde(default, skip_serializing_if = "Option::is_none")]
612    pub concurrency: Option<u32>,
613    /// Account tier or route class these limits describe.
614    #[serde(default, skip_serializing_if = "Option::is_none")]
615    pub tier: Option<String>,
616    /// Official source URL for the row.
617    #[serde(default, skip_serializing_if = "Option::is_none")]
618    pub source_url: Option<String>,
619    /// YYYY-MM-DD date when the row was last verified.
620    #[serde(default, skip_serializing_if = "Option::is_none")]
621    pub last_verified: Option<String>,
622    /// Free-text caveat for account-dependent or burst limits.
623    #[serde(default, skip_serializing_if = "Option::is_none")]
624    pub notes: Option<String>,
625}
626
627impl RateLimitsDef {
628    pub fn is_empty(&self) -> bool {
629        self.rpm.is_none()
630            && self.rph.is_none()
631            && self.rpd.is_none()
632            && self.tpm.is_none()
633            && self.tph.is_none()
634            && self.tpd.is_none()
635            && self.input_tpm.is_none()
636            && self.output_tpm.is_none()
637            && self.concurrency.is_none()
638            && self.tier.is_none()
639            && self.source_url.is_none()
640            && self.last_verified.is_none()
641            && self.notes.is_none()
642    }
643
644    pub fn with_rpm_fallback(mut self, rpm: Option<u32>) -> Option<Self> {
645        if self.rpm.is_none() {
646            self.rpm = rpm;
647        }
648        (!self.is_empty()).then_some(self)
649    }
650}
651
652/// Optional provider/model serving-performance observation. This records
653/// benchmark or live-probe facts, not a hard runtime contract; callers should
654/// treat missing fields as unknown and stale dates as advisory.
655#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
656pub struct ServingPerformanceDef {
657    /// Observed time-to-first-token in milliseconds.
658    #[serde(default, skip_serializing_if = "Option::is_none")]
659    pub observed_ttft_ms: Option<u64>,
660    /// Observed output generation rate in tokens per second.
661    #[serde(default, skip_serializing_if = "Option::is_none")]
662    pub output_tokens_per_sec: Option<f64>,
663    /// End-to-end time-to-answer in seconds for the cited benchmark, when
664    /// reported separately from TTFT/generation rate.
665    #[serde(default, skip_serializing_if = "Option::is_none")]
666    pub time_to_answer_s: Option<f64>,
667    /// Source label, e.g. `artificial_analysis`, `harn_probe`, or
668    /// `provider_blog`.
669    #[serde(default, skip_serializing_if = "Option::is_none")]
670    pub source: Option<String>,
671    /// Source URL for the observation.
672    #[serde(default, skip_serializing_if = "Option::is_none")]
673    pub source_url: Option<String>,
674    /// YYYY-MM-DD date when the observation was last verified.
675    #[serde(default, skip_serializing_if = "Option::is_none")]
676    pub last_verified: Option<String>,
677    /// Number of requests or benchmark samples behind this row, if known.
678    #[serde(default, skip_serializing_if = "Option::is_none")]
679    pub sample_size: Option<u32>,
680    /// Short caveat such as streaming mode, warm/cold route, or prompt shape.
681    #[serde(default, skip_serializing_if = "Option::is_none")]
682    pub notes: Option<String>,
683}
684
685impl ServingPerformanceDef {
686    pub fn is_empty(&self) -> bool {
687        self.observed_ttft_ms.is_none()
688            && self.output_tokens_per_sec.is_none()
689            && self.time_to_answer_s.is_none()
690            && self.source.is_none()
691            && self.source_url.is_none()
692            && self.last_verified.is_none()
693            && self.sample_size.is_none()
694            && self.notes.is_none()
695    }
696}
697
698/// Logical-model facts separated from provider serving routes. These fields
699/// describe the underlying weights or public model family, not Harn's alias or
700/// provider/model selector.
701#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
702pub struct ModelArchitectureDef {
703    /// Total parameter count in billions.
704    #[serde(default, skip_serializing_if = "Option::is_none")]
705    pub parameter_count_b: Option<f64>,
706    /// Active parameter count in billions for MoE models.
707    #[serde(default, skip_serializing_if = "Option::is_none")]
708    pub active_parameter_count_b: Option<f64>,
709    /// True for mixture-of-experts models.
710    #[serde(default, skip_serializing_if = "Option::is_none")]
711    pub moe: Option<bool>,
712    /// Quantization advertised by this route, if route-specific.
713    #[serde(default, skip_serializing_if = "Option::is_none")]
714    pub quantization: Option<String>,
715    /// Numeric precision advertised by this route, if known.
716    #[serde(default, skip_serializing_if = "Option::is_none")]
717    pub precision: Option<String>,
718    /// License identifier or short label.
719    #[serde(default, skip_serializing_if = "Option::is_none")]
720    pub license: Option<String>,
721    /// Tokenizer family or implementation hint.
722    #[serde(default, skip_serializing_if = "Option::is_none")]
723    pub tokenizer: Option<String>,
724    /// Public knowledge cutoff claim, when published.
725    #[serde(default, skip_serializing_if = "Option::is_none")]
726    pub knowledge_cutoff: Option<String>,
727    /// Official source URL for these facts.
728    #[serde(default, skip_serializing_if = "Option::is_none")]
729    pub source_url: Option<String>,
730    /// YYYY-MM-DD date when these facts were last verified.
731    #[serde(default, skip_serializing_if = "Option::is_none")]
732    pub last_verified: Option<String>,
733}
734
735impl ModelArchitectureDef {
736    pub fn is_empty(&self) -> bool {
737        self.parameter_count_b.is_none()
738            && self.active_parameter_count_b.is_none()
739            && self.moe.is_none()
740            && self.quantization.is_none()
741            && self.precision.is_none()
742            && self.license.is_none()
743            && self.tokenizer.is_none()
744            && self.knowledge_cutoff.is_none()
745            && self.source_url.is_none()
746            && self.last_verified.is_none()
747    }
748}
749
750/// Optional accelerated-serving ("fast mode") tier for a model. Off by
751/// default: its presence only *describes* that the provider offers a
752/// faster, premium-priced serving path running the same weights — callers
753/// must explicitly opt in via the provider's request knob, so nothing here
754/// changes default behavior. Deliberately provider-agnostic: Anthropic
755/// exposes the tier as `speed = "fast"` (beta-gated), while OpenAI uses
756/// `service_tier = "fast"` / `"priority"`. Premium pricing is stored as
757/// absolute per-MTok rates rather than a single multiplier because
758/// providers price the tier asymmetrically (Anthropic Opus 4.8 is 2x
759/// standard; Opus 4.7 fast mode is 6x).
760#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
761pub struct FastModeDef {
762    /// Request field that opts into the fast tier (e.g. "speed" for
763    /// Anthropic, "service_tier" for OpenAI).
764    pub param: String,
765    /// Value to send on `param` (e.g. "fast", "priority").
766    pub value: String,
767    /// Provider beta/feature header required to use the tier, if any
768    /// (e.g. Anthropic "fast-mode-2026-02-01").
769    #[serde(default)]
770    pub beta_header: Option<String>,
771    /// Output-tokens-per-second speedup vs standard serving (e.g. 2.5).
772    #[serde(default)]
773    pub otps_speedup: Option<f64>,
774    /// Lifecycle of the fast tier: "ga" | "research_preview" |
775    /// "deprecated". None when unspecified.
776    #[serde(default)]
777    pub status: Option<String>,
778    /// Premium pricing charged while the fast tier is active (absolute
779    /// per-MTok rates, not a multiplier on standard pricing).
780    #[serde(default)]
781    pub pricing: Option<ModelPricing>,
782    /// Free-text note: constraints, deprecation timeline, etc.
783    #[serde(default)]
784    pub note: Option<String>,
785}
786
787#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
788pub struct ModelDef {
789    pub name: String,
790    pub provider: String,
791    pub context_window: u64,
792    /// Provider-independent logical model id, when multiple serving routes map
793    /// to the same weights or model family.
794    #[serde(default)]
795    pub logical_model: Option<String>,
796    /// Equivalence class for failover/escalation candidates. Entries in the
797    /// same group are capability-compatible alternatives, not byte-identical
798    /// APIs; callers must still re-render transcripts for the target provider.
799    #[serde(default)]
800    pub equivalence_group: Option<String>,
801    /// Serving-route detail such as "serverless", "priority", "fp8", or a
802    /// provider route slug. This is intentionally separate from `name`.
803    #[serde(default)]
804    pub served_variant: Option<String>,
805    /// Provider-native model id to send on the wire. Defaults to the catalog
806    /// key. Required when two providers expose the same native id and Harn
807    /// needs a unique catalog key for each route.
808    #[serde(default)]
809    pub wire_model: Option<String>,
810    /// Preferred API dialect for the route, e.g. `openai_chat`,
811    /// `openai_responses`, `anthropic_messages`, `gemini_generate_content`.
812    #[serde(default)]
813    pub api_dialect: Option<String>,
814    /// Route-specific token/request quota metadata.
815    #[serde(default)]
816    pub rate_limits: Option<RateLimitsDef>,
817    /// Optional route-level serving performance observations.
818    #[serde(default)]
819    pub performance: Option<ServingPerformanceDef>,
820    /// Underlying model architecture facts separated from the provider id.
821    #[serde(default)]
822    pub architecture: Option<ModelArchitectureDef>,
823    /// Local launch memory-sizing hints used by `harn local launch`.
824    #[serde(default)]
825    pub local_memory: Option<LocalMemoryDef>,
826    #[serde(default)]
827    pub runtime_context_window: Option<u64>,
828    #[serde(default)]
829    pub stream_timeout: Option<f64>,
830    #[serde(default)]
831    pub capabilities: Vec<String>,
832    #[serde(default)]
833    pub pricing: Option<ModelPricing>,
834    #[serde(default)]
835    pub deprecated: bool,
836    #[serde(default)]
837    pub deprecation_note: Option<String>,
838    /// Structured replacement pointer: the catalog id of the model that
839    /// supersedes this one (e.g. an older Opus row points at the newest
840    /// Opus). Lets release tooling express "migrate to X" in a
841    /// machine-readable way instead of burying it in `deprecation_note`
842    /// free text. A model may be superseded without being `deprecated`
843    /// (a newer option exists but this one is still fully supported);
844    /// pair it with `deprecated = true` once a sunset is announced.
845    #[serde(default)]
846    pub superseded_by: Option<String>,
847    /// Accelerated-serving ("fast mode") tier metadata, when the model's
848    /// provider offers one. Off by default — see [`FastModeDef`]. None for
849    /// models with no faster serving path.
850    #[serde(default)]
851    pub fast_mode: Option<FastModeDef>,
852    #[serde(default)]
853    pub quality_tags: Vec<String>,
854    /// Whether the model can be reached over a normal API-key serverless call,
855    /// or only via a dedicated/provisioned endpoint that the caller must spin
856    /// up out-of-band. Providers like Together list dedicated-only routes
857    /// alongside serverless ones in `/v1/models`, so this metadata lets clients
858    /// avoid presenting them as one-click options.
859    #[serde(default)]
860    pub availability: ModelAvailability,
861    /// Popular-consensus tier label. Enum-typed string: "small" | "mid" |
862    /// "frontier" | "reasoning". Self-declared per model (no pattern-matched
863    /// rule table) so the catalog is the single source of truth. When None
864    /// the resolver returns the catalog default ("mid"). Use the richer
865    /// `strengths` + `benchmarks` fields to pick models for specific
866    /// workloads — `tier` exists only as a coarse popular-consensus shortcut.
867    #[serde(default)]
868    pub tier: Option<String>,
869    /// True when the model weights are downloadable / self-hostable
870    /// (open-weight / open-source license, regardless of commercial-use
871    /// restrictions). False when weights are closed (Anthropic, OpenAI,
872    /// Google, etc.). None when the catalog row predates the migration.
873    #[serde(default)]
874    pub open_weight: Option<bool>,
875    /// Workload-shaped strength tags. Conventional values include
876    /// `coding`, `summarization`, `long_context`, `tool_use`, `reasoning`,
877    /// `vision`, `speed`, `cheap`, `agentic`. Selectors should treat
878    /// missing entries as "no claim" rather than "no strength."
879    #[serde(default)]
880    pub strengths: Vec<String>,
881    /// Public benchmark numbers, keyed by a snake_case identifier
882    /// (`swe_bench_verified`, `humaneval`, `aa_intelligence_index`, etc.).
883    /// Values are the raw published scores. The selector layer is free
884    /// to normalize per benchmark; the catalog records the canonical
885    /// score so future readers can audit the source.
886    #[serde(default)]
887    pub benchmarks: BTreeMap<String, f64>,
888    /// Normalized model-family token used as a diversity signal for
889    /// reviewer selection. Distinct from provider: hosted wrappers should
890    /// keep the underlying family (for example OpenRouter-hosted Claude
891    /// still uses `anthropic-claude`).
892    #[serde(default)]
893    pub family: Option<String>,
894    /// Narrower family lineage used by option-pack calibration.
895    #[serde(default)]
896    pub lineage: Option<String>,
897    /// Preferred reviewer families for critique/review workloads.
898    #[serde(default)]
899    pub complementary_with: Vec<String>,
900    /// Author families, lineages, model ids, or provider/model selectors
901    /// this row should not review.
902    #[serde(default)]
903    pub avoid_as_reviewer_for: Vec<String>,
904}
905
906#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Default)]
907#[serde(rename_all = "snake_case")]
908pub enum ModelAvailability {
909    /// Reachable through the provider's normal API-key path with no extra
910    /// setup. The default for cataloged hosted/local models: by cataloging a
911    /// row we are claiming the route works out of the box.
912    #[default]
913    Serverless,
914    /// Requires the caller to provision a dedicated endpoint before requests
915    /// will succeed. The catalog row exists for selection/pricing UI, but
916    /// hosts must not auto-route to it.
917    Dedicated,
918    /// Availability is not known ahead of time. Used for routes that were
919    /// surfaced dynamically (e.g. through `/v1/models`) without a static
920    /// claim from Harn or the user.
921    Unknown,
922}
923
924impl ModelAvailability {
925    pub fn as_str(self) -> &'static str {
926        match self {
927            Self::Serverless => "serverless",
928            Self::Dedicated => "dedicated",
929            Self::Unknown => "unknown",
930        }
931    }
932
933    pub fn parse(value: &str) -> Option<Self> {
934        match value {
935            "serverless" => Some(Self::Serverless),
936            "dedicated" => Some(Self::Dedicated),
937            "unknown" => Some(Self::Unknown),
938            _ => None,
939        }
940    }
941}
942
943#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
944pub struct ResolvedModel {
945    pub id: String,
946    pub provider: String,
947    pub alias: Option<String>,
948    pub tool_format: String,
949    pub tier: String,
950    pub family: String,
951    pub lineage: String,
952}
953
954#[derive(Debug, Clone, PartialEq)]
955pub struct ComplementaryReviewerOptions {
956    pub author_model: String,
957    pub author_provider: Option<String>,
958    pub intent: ComplementaryReviewerIntent,
959    pub max_price_multiplier: Option<f64>,
960}
961
962#[derive(Debug, Clone, Copy, PartialEq, Eq)]
963pub enum ComplementaryReviewerIntent {
964    Review,
965    Critique,
966    PlanReview,
967}
968
969impl ComplementaryReviewerIntent {
970    pub fn parse(value: &str) -> Option<Self> {
971        match value {
972            "review" => Some(Self::Review),
973            "critique" => Some(Self::Critique),
974            "plan_review" => Some(Self::PlanReview),
975            _ => None,
976        }
977    }
978
979    pub fn as_str(self) -> &'static str {
980        match self {
981            Self::Review => "review",
982            Self::Critique => "critique",
983            Self::PlanReview => "plan_review",
984        }
985    }
986}
987
988#[derive(Debug, Clone, Serialize, PartialEq)]
989pub struct ComplementaryReviewerSelection {
990    pub intent: String,
991    pub author: ComplementaryModelIdentity,
992    pub reviewer: ComplementaryModelIdentity,
993    pub fallback: bool,
994    pub fallback_reason: Option<String>,
995    /// Machine-readable reason a caller can branch on when `fallback` is
996    /// `true`, distinct from the human-readable `fallback_reason`/`reason`
997    /// prose. `None` on the success path. Lets a caller hard-fail an
998    /// independent-review step rather than silently degrade to self-review.
999    /// See [`ReviewerFallbackCode`] for the stable set of values.
1000    #[serde(skip_serializing_if = "Option::is_none")]
1001    pub fallback_code: Option<String>,
1002    pub reason: String,
1003    pub estimated_incremental_cost: Option<ComplementaryCostEstimate>,
1004}
1005
1006/// Stable, machine-readable reasons `pick_complementary_reviewer` falls back
1007/// to the author model. Serialized as the `fallback_code` string so harn
1008/// pipelines and Rust callers can branch deterministically instead of parsing
1009/// prose. New variants are additive; existing codes are append-only contract.
1010#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1011pub enum ReviewerFallbackCode {
1012    /// The author model's family could not be resolved, so no independent
1013    /// family comparison is possible.
1014    UnknownAuthorFamily,
1015    /// Different-family candidates exist but none satisfy `max_price_multiplier`.
1016    NoDiffFamilyWithinPrice,
1017    /// No active, serverless, different-family reviewer is cataloged at all.
1018    NoDiffFamilyServerless,
1019    /// Different-family candidates exist but were all excluded (e.g. every
1020    /// one declares `avoid_as_reviewer_for` the author).
1021    AllDiffFamilyExcluded,
1022}
1023
1024impl ReviewerFallbackCode {
1025    pub fn as_code(self) -> &'static str {
1026        match self {
1027            Self::UnknownAuthorFamily => "unknown_author_family",
1028            Self::NoDiffFamilyWithinPrice => "no_diff_family_within_price",
1029            Self::NoDiffFamilyServerless => "no_diff_family_serverless",
1030            Self::AllDiffFamilyExcluded => "all_diff_family_excluded",
1031        }
1032    }
1033}
1034
1035#[derive(Debug, Clone, Serialize, PartialEq)]
1036pub struct ComplementaryModelIdentity {
1037    pub id: String,
1038    pub provider: String,
1039    pub family: String,
1040    pub lineage: String,
1041    pub tier: String,
1042    #[serde(skip_serializing_if = "Option::is_none")]
1043    pub pricing: Option<ModelPricing>,
1044}
1045
1046#[derive(Debug, Clone, Serialize, PartialEq)]
1047pub struct ComplementaryCostEstimate {
1048    pub input_per_mtok: f64,
1049    pub output_per_mtok: f64,
1050    pub total_per_mtok: f64,
1051    #[serde(skip_serializing_if = "Option::is_none")]
1052    pub multiplier_vs_author: Option<f64>,
1053}
1054
1055#[derive(Debug, Clone, Deserialize)]
1056pub struct InferenceRule {
1057    #[serde(default)]
1058    pub pattern: Option<String>,
1059    #[serde(default)]
1060    pub contains: Option<String>,
1061    #[serde(default)]
1062    pub exact: Option<String>,
1063    pub provider: String,
1064}
1065
1066#[derive(Debug, Clone, Deserialize)]
1067pub struct TierRule {
1068    #[serde(default)]
1069    pub pattern: Option<String>,
1070    #[serde(default)]
1071    pub contains: Option<String>,
1072    #[serde(default)]
1073    pub exact: Option<String>,
1074    pub tier: String,
1075}
1076
1077#[derive(Debug, Clone, Deserialize)]
1078pub struct TierDefaults {
1079    #[serde(default = "default_mid")]
1080    pub default: String,
1081}
1082
1083impl Default for TierDefaults {
1084    fn default() -> Self {
1085        Self {
1086            default: default_mid(),
1087        }
1088    }
1089}
1090
1091fn default_mid() -> String {
1092    "mid".to_string()
1093}
1094
1095/// Load and cache the providers config. Called once at VM startup.
1096pub fn load_config() -> &'static ProvidersConfig {
1097    CONFIG.get_or_init(|| {
1098        let mut config = default_config();
1099        let verbose_config_logging = matches!(
1100            std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
1101            Some("1" | "true" | "TRUE" | "yes" | "YES")
1102        ) || matches!(
1103            std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
1104            Some("1" | "true" | "TRUE" | "yes" | "YES")
1105        );
1106        if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
1107            if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
1108                config.merge_from(&overlay);
1109                let _ = CONFIG_PATH.set(path);
1110                return config;
1111            }
1112        }
1113        if should_load_home_config() {
1114            if let Some(home) = dirs_or_home() {
1115                let path = format!("{home}/.config/harn/providers.toml");
1116                if let Some(overlay) = read_external_config(&path, false) {
1117                    config.merge_from(&overlay);
1118                    let _ = CONFIG_PATH.set(path);
1119                    return config;
1120                }
1121            }
1122        }
1123        config
1124    })
1125}
1126
1127fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
1128    match std::fs::read_to_string(path) {
1129        Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
1130            Ok(config) => {
1131                if verbose {
1132                    eprintln!(
1133                        "[llm_config] Loaded {} providers, {} aliases from {}",
1134                        config.providers.len(),
1135                        config.aliases.len(),
1136                        path
1137                    );
1138                }
1139                Some(config)
1140            }
1141            Err(error) => {
1142                eprintln!("[llm_config] TOML parse error in {path}: {error}");
1143                None
1144            }
1145        },
1146        Err(error) => {
1147            if verbose {
1148                eprintln!("[llm_config] Cannot read {path}: {error}");
1149            }
1150            None
1151        }
1152    }
1153}
1154
1155fn should_load_home_config() -> bool {
1156    // Unit tests should cover embedded defaults plus explicit overlays, not
1157    // whichever provider file happens to exist on the developer machine.
1158    !cfg!(test)
1159}
1160
1161/// Parse a provider/model catalog overlay in the same shape as
1162/// `providers.toml` or `[llm]` package-manifest sections.
1163pub fn parse_config_toml(src: &str) -> Result<ProvidersConfig, toml::de::Error> {
1164    toml::from_str::<ProvidersConfig>(src)
1165}
1166
1167/// Returns the filesystem path of the currently-loaded providers config, if
1168/// any. Returns `None` when built-in defaults are active.
1169pub fn loaded_config_path() -> Option<std::path::PathBuf> {
1170    // Force lazy init so CONFIG_PATH is populated if a file was loaded.
1171    let _ = load_config();
1172    CONFIG_PATH.get().map(std::path::PathBuf::from)
1173}
1174
1175/// Install per-run provider config overlays. The overlay uses the same shape as
1176/// `providers.toml`, but lives under `[llm]` in `harn.toml` and package
1177/// manifests. Passing `None` clears the overlay.
1178pub fn set_user_overrides(config: Option<ProvidersConfig>) {
1179    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
1180}
1181
1182/// Clear per-run provider config overlays.
1183pub fn clear_user_overrides() {
1184    set_user_overrides(None);
1185}
1186
1187/// Install the process-wide runtime catalog overlay used by
1188/// `provider_catalog::refresh_runtime_catalog`. Per-run user overlays still
1189/// merge last so project-local provider config can override hosted catalog
1190/// updates.
1191pub fn set_runtime_catalog_overlay(config: Option<ProvidersConfig>) {
1192    *runtime_catalog_overlay()
1193        .write()
1194        .expect("runtime catalog overlay poisoned") = config;
1195}
1196
1197pub fn clear_runtime_catalog_overlay() {
1198    set_runtime_catalog_overlay(None);
1199}
1200
1201pub(crate) fn effective_config() -> ProvidersConfig {
1202    let user_overrides = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1203    effective_config_with_user_overrides(user_overrides.as_ref())
1204}
1205
1206/// Provider config built purely from the compiled-in `EMBEDDED_PROVIDERS_TOML`
1207/// snapshot, ignoring every ambient layer: the developer's
1208/// `~/.config/harn/providers.toml`, `HARN_PROVIDERS_CONFIG`, the process
1209/// runtime-catalog overlay, and thread-local user overrides.
1210///
1211/// This is the hermetic source of truth for *generating* the checked-in
1212/// `spec/provider-catalog/*` artifacts. Artifact generation must be a pure
1213/// function of the source tree so a developer's personal aliases/providers
1214/// never leak into shipped artifacts (which then makes clean CI flag drift).
1215/// Runtime catalog presentation must keep using [`effective_config`] /
1216/// [`effective_config_with_user_overrides`], which legitimately reflect the
1217/// host's live configuration.
1218///
1219/// An optional explicit overlay (e.g. a `--overlay` file named on the command
1220/// line) is merged on top of the embedded base. Unlike the home file and env
1221/// layers, that overlay is a declared, reproducible input rather than ambient
1222/// machine state, so it is safe to honor while staying hermetic.
1223pub fn embedded_config(explicit_overlay: Option<&ProvidersConfig>) -> ProvidersConfig {
1224    let mut config = default_config();
1225    if let Some(overlay) = explicit_overlay {
1226        config.merge_from(overlay);
1227    }
1228    config
1229}
1230
1231pub(crate) fn effective_config_with_user_overrides(
1232    user_overrides: Option<&ProvidersConfig>,
1233) -> ProvidersConfig {
1234    let mut merged = load_config().clone();
1235    if let Some(overlay) = runtime_catalog_overlay()
1236        .read()
1237        .expect("runtime catalog overlay poisoned")
1238        .as_ref()
1239    {
1240        merged.merge_from(overlay);
1241    }
1242    if let Some(overlay) = user_overrides {
1243        merged.merge_from(overlay);
1244    }
1245    merged
1246}
1247
1248fn runtime_catalog_overlay() -> &'static RwLock<Option<ProvidersConfig>> {
1249    RUNTIME_CATALOG_OVERLAY.get_or_init(|| RwLock::new(None))
1250}
1251
1252/// Resolve a model alias to (model_id, provider_name).
1253pub fn resolve_model(alias: &str) -> (String, Option<String>) {
1254    let config = effective_config();
1255    if let Some(a) = config.aliases.get(alias) {
1256        return (a.id.clone(), Some(a.provider.clone()));
1257    }
1258    (normalize_model_id(alias), None)
1259}
1260
1261/// Strip host/provider selector prefixes that identify transport, not the
1262/// provider-native model id. This mirrors the host's existing normalization so
1263/// `ollama:qwen3:30b` reaches Ollama as `qwen3:30b` instead of an invalid
1264/// model named `ollama`. Cerebras follows the same convention but uses a
1265/// slash separator (`cerebras/gpt-oss-120b`) because its own /v1/models
1266/// endpoint returns bare names that overlap OpenAI's families.
1267pub fn normalize_model_id(raw: &str) -> String {
1268    for prefix in PROVIDER_SELECTOR_PREFIXES {
1269        if let Some(stripped) = raw.strip_prefix(prefix) {
1270            return stripped.to_string();
1271        }
1272    }
1273    raw.to_string()
1274}
1275
1276const PROVIDER_SELECTOR_PREFIXES: &[&str] =
1277    &["ollama:", "local:", "huggingface:", "hf:", "cerebras/"];
1278
1279/// Resolve an alias or selector into the complete catalog identity hosts need:
1280/// provider inference, prefix-normalized model id, default tool format, and tier.
1281pub fn resolve_model_info(selector: &str) -> ResolvedModel {
1282    let config = effective_config();
1283    if let Some(alias) = config.aliases.get(selector) {
1284        let id = alias.id.clone();
1285        let provider = alias.provider.clone();
1286        let requested = alias
1287            .tool_format
1288            .clone()
1289            .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
1290        let tool_format = guard_tool_format(&provider, &id, &requested, Some(selector));
1291        return ResolvedModel {
1292            tier: model_tier_with_config(&config, &id),
1293            family: model_family_with_config(&config, &provider, &id),
1294            lineage: model_lineage_with_config(&config, &provider, &id),
1295            id,
1296            provider,
1297            alias: Some(selector.to_string()),
1298            tool_format,
1299        };
1300    }
1301
1302    let id = normalize_model_id(selector);
1303    let inference = infer_provider_with_config(&config, selector);
1304    let source = inference.source;
1305    let provider = inference.provider;
1306    let requested = default_tool_format_with_config(&config, &id, &provider);
1307    let tool_format = guard_tool_format(&provider, &id, &requested, None);
1308    let tier = model_tier_with_config(&config, &id);
1309    let family = model_family_with_inference_source(&config, &provider, &id, source);
1310    let lineage = model_lineage_with_inference_source(&config, &provider, &id, source);
1311    ResolvedModel {
1312        id,
1313        provider,
1314        alias: None,
1315        tool_format,
1316        tier,
1317        family,
1318        lineage,
1319    }
1320}
1321
1322/// Run the requested `tool_format` through the capability registry's
1323/// dialect-validity gate, returning the safe format to actually use. When the
1324/// registry auto-corrects a known-broken combo (e.g. a `native` pin on a
1325/// `native_unreliable` route that silently drops to unparsed DSML text), the
1326/// correction is logged once at resolution time so a harness developer sees
1327/// *why* their pinned format was not honored — never a silent vanishing.
1328fn guard_tool_format(provider: &str, model: &str, requested: &str, alias: Option<&str>) -> String {
1329    let decision = crate::llm::capabilities::validate_tool_format(provider, model, requested);
1330    if let Some(reason) = &decision.correction {
1331        tracing::warn!(
1332            target: "harn::llm::tool_format",
1333            alias = alias.unwrap_or(""),
1334            "{reason}"
1335        );
1336    }
1337    decision.effective
1338}
1339
1340/// Infer provider from a model ID using inference rules.
1341pub fn infer_provider(model_id: &str) -> String {
1342    infer_provider_detail(model_id).provider
1343}
1344
1345/// Infer provider from a model ID and retain whether the configured default was used.
1346pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
1347    let config = effective_config();
1348    infer_provider_with_config(&config, model_id)
1349}
1350
1351fn infer_provider_with_config(
1352    config: &ProvidersConfig,
1353    model_id: &str,
1354) -> crate::llm::provider::ProviderInference {
1355    if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
1356        return crate::llm::provider::ProviderInference::builtin("ollama");
1357    }
1358    if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
1359        return crate::llm::provider::ProviderInference::builtin("huggingface");
1360    }
1361    // Exact catalog rows are the most authoritative declaration of where
1362    // a model is hosted: any pattern-based inference rule is necessarily
1363    // less specific than `[models."<id>"].provider = "<name>"`. Catalogs
1364    // include user overlays, so users can still re-home a model by
1365    // setting a catalog entry in their own providers.toml.
1366    let normalized_id = normalize_model_id(model_id);
1367    if let Some(model) = config
1368        .models
1369        .get(model_id)
1370        .or_else(|| config.models.get(&normalized_id))
1371    {
1372        return crate::llm::provider::ProviderInference::builtin(model.provider.clone());
1373    }
1374    for rule in &config.inference_rules {
1375        if let Some(exact) = &rule.exact {
1376            if model_id == exact {
1377                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1378            }
1379        }
1380        if let Some(pattern) = &rule.pattern {
1381            if glob_match(pattern, model_id) {
1382                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1383            }
1384        }
1385        if let Some(substr) = &rule.contains {
1386            if model_id.contains(substr.as_str()) {
1387                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1388            }
1389        }
1390    }
1391    crate::llm::provider::infer_provider_from_model_id(
1392        model_id,
1393        &default_provider_with_config(config),
1394    )
1395}
1396
1397pub fn default_provider() -> String {
1398    let config = effective_config();
1399    default_provider_with_config(&config)
1400}
1401
1402fn default_provider_with_config(config: &ProvidersConfig) -> String {
1403    std::env::var("HARN_DEFAULT_PROVIDER")
1404        .ok()
1405        .map(|value| value.trim().to_string())
1406        .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1407        .or_else(|| {
1408            config
1409                .default_provider
1410                .as_deref()
1411                .map(str::trim)
1412                .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1413                .map(str::to_string)
1414        })
1415        .unwrap_or_else(|| auto_select_provider(config))
1416}
1417
1418/// Provider assumed when nothing is configured and no credentials are found.
1419/// Anthropic is Harn's documented default; [`auto_select_provider`] only falls
1420/// back to it after probing for a credentialed or local provider, and warns
1421/// once so adopters without Anthropic credentials get a clear nudge instead of
1422/// a raw auth failure.
1423const FALLBACK_PROVIDER: &str = "anthropic";
1424
1425static AUTO_PROVIDER_WARNED: AtomicBool = AtomicBool::new(false);
1426
1427/// True when any of the provider's auth env vars holds a non-empty value.
1428fn provider_has_credentials(def: &ProviderDef) -> bool {
1429    auth_env_names(&def.auth_env)
1430        .iter()
1431        .any(|name| std::env::var(name).is_ok_and(|value| !value.trim().is_empty()))
1432}
1433
1434/// True when the provider can serve without cloud credentials — a managed
1435/// local runtime (`harn local`) or an auth-free endpoint such as Ollama.
1436fn provider_is_local(def: &ProviderDef) -> bool {
1437    def.local_runtime.is_some() || matches!(def.auth_env, AuthEnv::None)
1438}
1439
1440/// Emit a provider auto-selection notice at most once per process.
1441fn warn_auto_provider_once(message: &str) {
1442    if !AUTO_PROVIDER_WARNED.swap(true, Ordering::Relaxed) {
1443        crate::events::log_warn("llm_config", message);
1444    }
1445}
1446
1447/// Choose a provider when neither `HARN_DEFAULT_PROVIDER` nor
1448/// `config.default_provider` is set. Prefers a credentialed cloud provider,
1449/// then a locally-available one, and only then falls back to the documented
1450/// default. Detection is portable: it reads provider `auth_env` variables and
1451/// `local_runtime` metadata from the catalog — never hardcoded paths or ports.
1452fn auto_select_provider(config: &ProvidersConfig) -> String {
1453    // Well-known providers first for a stable, predictable choice; then any
1454    // other configured provider (BTreeMap iteration is sorted/deterministic).
1455    const PREFERRED: &[&str] = &[
1456        "anthropic",
1457        "openai",
1458        "google",
1459        "azure-openai",
1460        "groq",
1461        "mistral",
1462        "deepseek",
1463        "xai",
1464        "openrouter",
1465    ];
1466    for name in PREFERRED {
1467        if config
1468            .providers
1469            .get(*name)
1470            .is_some_and(provider_has_credentials)
1471        {
1472            if *name != FALLBACK_PROVIDER {
1473                warn_auto_provider_once(&format!(
1474                    "no default provider configured; using '{name}' (its API key is set). \
1475                     Set HARN_DEFAULT_PROVIDER or `default_provider` to silence this."
1476                ));
1477            }
1478            return (*name).to_string();
1479        }
1480    }
1481    for (name, def) in &config.providers {
1482        if provider_has_credentials(def) {
1483            warn_auto_provider_once(&format!(
1484                "no default provider configured; using '{name}' (its API key is set). \
1485                 Set HARN_DEFAULT_PROVIDER or `default_provider` to silence this."
1486            ));
1487            return name.clone();
1488        }
1489    }
1490    // No cloud credentials: prefer something that runs locally with no key.
1491    for (name, def) in &config.providers {
1492        if provider_is_local(def) {
1493            warn_auto_provider_once(&format!(
1494                "no provider API keys found; using local provider '{name}'. \
1495                 Set an API key + HARN_DEFAULT_PROVIDER to use a cloud provider."
1496            ));
1497            return name.clone();
1498        }
1499    }
1500    // Nothing detected. Fall back to the documented default and say how to fix.
1501    warn_auto_provider_once(&format!(
1502        "no LLM provider configured and no API keys detected; defaulting to \
1503         '{FALLBACK_PROVIDER}'. Set ANTHROPIC_API_KEY (or another provider's key plus \
1504         HARN_DEFAULT_PROVIDER), or run a local model with `harn local launch`."
1505    ));
1506    FALLBACK_PROVIDER.to_string()
1507}
1508
1509/// Get model tier ("small", "mid", "frontier").
1510pub fn model_tier(model_id: &str) -> String {
1511    let config = effective_config();
1512    model_tier_with_config(&config, model_id)
1513}
1514
1515pub(crate) fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
1516    // Per-model self-declared tier wins. This is the only path.
1517    if let Some(model) = config.models.get(model_id) {
1518        if let Some(tier) = model.tier.as_deref() {
1519            let trimmed = tier.trim();
1520            if !trimmed.is_empty() {
1521                return trimmed.to_string();
1522            }
1523        }
1524    }
1525    // Legacy pattern-rules: still consulted while we finish migrating the
1526    // long tail of models to per-row `tier = "..."`. Newly added rows
1527    // should set `tier` directly; the rule table is a fallback only.
1528    for rule in &config.tier_rules {
1529        if let Some(exact) = &rule.exact {
1530            if model_id == exact {
1531                return rule.tier.clone();
1532            }
1533        }
1534        if let Some(pattern) = &rule.pattern {
1535            if glob_match(pattern, model_id) {
1536                return rule.tier.clone();
1537            }
1538        }
1539        if let Some(substr) = &rule.contains {
1540            if model_id.contains(substr.as_str()) {
1541                return rule.tier.clone();
1542            }
1543        }
1544    }
1545    config.tier_defaults.default.clone()
1546}
1547
1548/// Return the normalized model-family token used for cross-family review.
1549pub fn model_family(provider: &str, model_id: &str) -> String {
1550    let config = effective_config();
1551    model_family_with_config(&config, provider, model_id)
1552}
1553
1554pub(crate) fn model_family_with_config(
1555    config: &ProvidersConfig,
1556    provider: &str,
1557    model_id: &str,
1558) -> String {
1559    catalog_family_token(config, model_id)
1560        .unwrap_or_else(|| derive_model_family(provider, model_id))
1561}
1562
1563fn model_family_with_inference_source(
1564    config: &ProvidersConfig,
1565    provider: &str,
1566    model_id: &str,
1567    source: crate::llm::provider::ProviderInferenceSource,
1568) -> String {
1569    if let Some(family) = catalog_family_token(config, model_id) {
1570        return family;
1571    }
1572    let id_family = derive_model_family("", model_id);
1573    if id_family != "unknown" {
1574        return id_family;
1575    }
1576    if matches!(
1577        source,
1578        crate::llm::provider::ProviderInferenceSource::DefaultFallback
1579    ) {
1580        return "unknown".to_string();
1581    }
1582    derive_model_family(provider, model_id)
1583}
1584
1585/// Return the narrower lineage token used for model-aware option packs.
1586pub fn model_lineage(provider: &str, model_id: &str) -> String {
1587    let config = effective_config();
1588    model_lineage_with_config(&config, provider, model_id)
1589}
1590
1591pub(crate) fn model_lineage_with_config(
1592    config: &ProvidersConfig,
1593    provider: &str,
1594    model_id: &str,
1595) -> String {
1596    catalog_lineage_token(config, model_id)
1597        .unwrap_or_else(|| derive_model_lineage(provider, model_id))
1598}
1599
1600fn model_lineage_with_inference_source(
1601    config: &ProvidersConfig,
1602    provider: &str,
1603    model_id: &str,
1604    source: crate::llm::provider::ProviderInferenceSource,
1605) -> String {
1606    if let Some(lineage) = catalog_lineage_token(config, model_id) {
1607        return lineage;
1608    }
1609    let id_lineage = derive_model_lineage("", model_id);
1610    if id_lineage != "unknown" {
1611        return id_lineage;
1612    }
1613    if matches!(
1614        source,
1615        crate::llm::provider::ProviderInferenceSource::DefaultFallback
1616    ) {
1617        return "unknown".to_string();
1618    }
1619    derive_model_lineage(provider, model_id)
1620}
1621
1622fn catalog_family_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1623    config
1624        .models
1625        .get(model_id)
1626        .and_then(|model| normalized_catalog_token(model.family.as_deref()))
1627}
1628
1629fn catalog_lineage_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1630    config
1631        .models
1632        .get(model_id)
1633        .and_then(|model| normalized_catalog_token(model.lineage.as_deref()))
1634}
1635
1636fn normalized_catalog_token(value: Option<&str>) -> Option<String> {
1637    value
1638        .map(str::trim)
1639        .filter(|value| !value.is_empty())
1640        .map(|value| value.to_ascii_lowercase().replace('_', "-"))
1641}
1642
1643fn derive_model_family(provider: &str, model_id: &str) -> String {
1644    let id = model_id.to_ascii_lowercase();
1645    if contains_any(&id, &["claude", "anthropic.claude"]) {
1646        return "anthropic-claude".to_string();
1647    }
1648    if contains_any(&id, &["gemini", "google/gemini"]) {
1649        return "google-gemini".to_string();
1650    }
1651    if contains_any(&id, &["deepseek"]) {
1652        return "deepseek".to_string();
1653    }
1654    if contains_any(&id, &["qwen"]) {
1655        return "qwen".to_string();
1656    }
1657    if contains_any(&id, &["kimi", "moonshot"]) {
1658        return "kimi".to_string();
1659    }
1660    if contains_any(&id, &["glm", "z-ai/glm", "zhipu"]) {
1661        return "glm".to_string();
1662    }
1663    if contains_any(&id, &["mistral", "mixtral", "devstral"]) {
1664        return "mistral".to_string();
1665    }
1666    if contains_any(&id, &["minimax"]) {
1667        return "minimax".to_string();
1668    }
1669    if contains_any(&id, &["llama"]) {
1670        return "llama".to_string();
1671    }
1672    if contains_any(&id, &["gemma"]) {
1673        return "gemma".to_string();
1674    }
1675    if is_openai_reasoning_model(&id) {
1676        return "openai-reasoning".to_string();
1677    }
1678    if contains_any(&id, &["gpt-oss", "openai/gpt", "gpt-"]) {
1679        return "openai-gpt".to_string();
1680    }
1681    match provider {
1682        "anthropic" | "bedrock" | "vertex-anthropic" => "anthropic-claude".to_string(),
1683        "openai" | "azure" | "azure_openai" => "openai-gpt".to_string(),
1684        "gemini" | "vertex" | "google" => "google-gemini".to_string(),
1685        "deepseek" => "deepseek".to_string(),
1686        "zai" => "glm".to_string(),
1687        "minimax" => "minimax".to_string(),
1688        other if !other.is_empty() => normalize_identifier_token(other),
1689        _ => "unknown".to_string(),
1690    }
1691}
1692
1693fn derive_model_lineage(provider: &str, model_id: &str) -> String {
1694    let id = model_id.to_ascii_lowercase();
1695    if contains_any(&id, &["haiku"]) {
1696        return "claude-haiku".to_string();
1697    }
1698    if contains_any(&id, &["opus-4-7", "opus-4-8", "opus-mythos"]) {
1699        return "claude-opus-adaptive".to_string();
1700    }
1701    if contains_any(&id, &["claude"]) {
1702        return "claude-sonnet-opus".to_string();
1703    }
1704    if contains_any(&id, &["gpt-5"]) {
1705        return "openai-gpt5".to_string();
1706    }
1707    if is_openai_reasoning_model(&id) {
1708        return "openai-reasoning".to_string();
1709    }
1710    if contains_any(&id, &["gpt-", "gpt_"]) {
1711        return "openai-legacy".to_string();
1712    }
1713    if contains_any(&id, &["gemini"]) {
1714        if contains_any(&id, &["flash"]) {
1715            return "gemini-flash".to_string();
1716        }
1717        return "gemini-pro".to_string();
1718    }
1719    if contains_any(&id, &["qwen3", "qwen/qwen3"]) {
1720        return "qwen3".to_string();
1721    }
1722    if contains_any(&id, &["gemma4", "gemma-4"]) {
1723        return "gemma4".to_string();
1724    }
1725    let family = derive_model_family(provider, model_id);
1726    if family == "unknown" {
1727        "unknown".to_string()
1728    } else {
1729        family
1730    }
1731}
1732
1733fn contains_any(haystack: &str, needles: &[&str]) -> bool {
1734    needles.iter().any(|needle| haystack.contains(needle))
1735}
1736
1737fn starts_with_any(haystack: &str, prefixes: &[&str]) -> bool {
1738    prefixes.iter().any(|prefix| haystack.starts_with(prefix))
1739}
1740
1741fn is_openai_reasoning_model(id: &str) -> bool {
1742    starts_with_any(id, &["o1", "o3", "o4"])
1743        || contains_any(
1744            id,
1745            &[
1746                "/o1", "/o3", "/o4", ":o1", ":o3", ":o4", ".o1", ".o3", ".o4",
1747            ],
1748        )
1749}
1750
1751fn normalize_identifier_token(value: &str) -> String {
1752    value
1753        .trim()
1754        .to_ascii_lowercase()
1755        .chars()
1756        .map(|ch| {
1757            if ch.is_ascii_alphanumeric() || ch == '-' {
1758                ch
1759            } else {
1760                '-'
1761            }
1762        })
1763        .collect::<String>()
1764        .split('-')
1765        .filter(|part| !part.is_empty())
1766        .collect::<Vec<_>>()
1767        .join("-")
1768}
1769
1770/// Get provider config for resolving base_url, auth, etc.
1771pub fn provider_config(name: &str) -> Option<ProviderDef> {
1772    effective_config().providers.get(name).cloned()
1773}
1774
1775pub fn provider_protocol(name: &str) -> Option<String> {
1776    provider_config(name).and_then(|def| def.protocol)
1777}
1778
1779pub fn provider_uses_acp(name: &str) -> bool {
1780    provider_protocol(name)
1781        .as_deref()
1782        .is_some_and(|protocol| protocol.eq_ignore_ascii_case("acp"))
1783}
1784
1785/// Get model-specific default parameters (temperature, etc.).
1786/// Matches glob patterns in model_defaults keys.
1787pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
1788    let config = effective_config();
1789    let mut params = BTreeMap::new();
1790    for (pattern, defaults) in &config.model_defaults {
1791        if glob_match(pattern, model_id) {
1792            for (k, v) in defaults {
1793                params.insert(k.clone(), v.clone());
1794            }
1795        }
1796    }
1797    params
1798}
1799
1800/// Get per-role LLM defaults, e.g. `[model_roles.merge]`.
1801///
1802/// Role defaults are intentionally shaped like ordinary `llm_call` options:
1803/// callers can pin `provider`/`model`, install `route_policy` or `prefer`,
1804/// and tune budget/latency knobs without creating a parallel routing stack.
1805/// Environment variables provide a lightweight operational override for
1806/// merge/fast-apply workers:
1807///
1808/// - `HARN_LLM_MERGE_PROVIDER`, `HARN_LLM_MERGE_MODEL`,
1809///   `HARN_LLM_MERGE_ROUTE_POLICY`
1810/// - `HARN_LLM_FAST_APPLY_PROVIDER`, `HARN_LLM_FAST_APPLY_MODEL`,
1811///   `HARN_LLM_FAST_APPLY_ROUTE_POLICY`
1812/// - `HARN_LLM_ROLE_<ROLE>_PROVIDER`, `_MODEL`, `_ROUTE_POLICY`
1813pub fn model_role_defaults(role: &str) -> BTreeMap<String, toml::Value> {
1814    let normalized = normalize_model_role_name(role);
1815    if normalized.is_empty() {
1816        return BTreeMap::new();
1817    }
1818    let config = effective_config();
1819    let mut params = BTreeMap::new();
1820    for key in role_lookup_keys(&normalized) {
1821        extend_model_role_defaults(&config, &key, &mut params);
1822    }
1823    apply_model_role_env_overrides(&normalized, &mut params);
1824    params
1825}
1826
1827fn extend_model_role_defaults(
1828    config: &ProvidersConfig,
1829    role: &str,
1830    params: &mut BTreeMap<String, toml::Value>,
1831) {
1832    for (configured_role, defaults) in &config.model_roles {
1833        if normalize_model_role_name(configured_role) == role {
1834            params.extend(defaults.clone());
1835        }
1836    }
1837    if let Some(defaults) = config.model_roles.get(role) {
1838        params.extend(defaults.clone());
1839    }
1840}
1841
1842fn normalize_model_role_name(role: &str) -> String {
1843    role.trim().to_ascii_lowercase().replace('-', "_")
1844}
1845
1846fn role_lookup_keys(role: &str) -> Vec<String> {
1847    if role == "merge" {
1848        vec!["fast_apply".to_string(), "merge".to_string()]
1849    } else if role == "fast_apply" {
1850        vec!["merge".to_string(), "fast_apply".to_string()]
1851    } else {
1852        vec![role.to_string()]
1853    }
1854}
1855
1856fn role_env_token(role: &str) -> String {
1857    role.chars()
1858        .map(|ch| {
1859            if ch.is_ascii_alphanumeric() {
1860                ch.to_ascii_uppercase()
1861            } else {
1862                '_'
1863            }
1864        })
1865        .collect::<String>()
1866        .split('_')
1867        .filter(|part| !part.is_empty())
1868        .collect::<Vec<_>>()
1869        .join("_")
1870}
1871
1872fn apply_model_role_env_overrides(role: &str, params: &mut BTreeMap<String, toml::Value>) {
1873    for alias in role_env_aliases(role) {
1874        apply_model_role_env_var(&format!("HARN_LLM_{alias}_PROVIDER"), "provider", params);
1875        apply_model_role_env_var(&format!("HARN_LLM_{alias}_MODEL"), "model", params);
1876        apply_model_role_env_var(
1877            &format!("HARN_LLM_{alias}_ROUTE_POLICY"),
1878            "route_policy",
1879            params,
1880        );
1881        apply_model_role_env_var(
1882            &format!("HARN_LLM_ROLE_{alias}_PROVIDER"),
1883            "provider",
1884            params,
1885        );
1886        apply_model_role_env_var(&format!("HARN_LLM_ROLE_{alias}_MODEL"), "model", params);
1887        apply_model_role_env_var(
1888            &format!("HARN_LLM_ROLE_{alias}_ROUTE_POLICY"),
1889            "route_policy",
1890            params,
1891        );
1892    }
1893}
1894
1895fn role_env_aliases(role: &str) -> Vec<String> {
1896    let token = role_env_token(role);
1897    if token.is_empty() {
1898        return Vec::new();
1899    }
1900    if token == "MERGE" {
1901        vec!["FAST_APPLY".to_string(), "MERGE".to_string()]
1902    } else if token == "FAST_APPLY" {
1903        vec!["MERGE".to_string(), "FAST_APPLY".to_string()]
1904    } else {
1905        vec![token]
1906    }
1907}
1908
1909fn apply_model_role_env_var(
1910    env_name: &str,
1911    option_name: &str,
1912    params: &mut BTreeMap<String, toml::Value>,
1913) {
1914    let Ok(value) = std::env::var(env_name) else {
1915        return;
1916    };
1917    let trimmed = value.trim();
1918    if trimmed.is_empty() {
1919        return;
1920    }
1921    params.insert(
1922        option_name.to_string(),
1923        toml::Value::String(trimmed.to_string()),
1924    );
1925}
1926
1927/// Get list of configured provider names.
1928pub fn provider_names() -> Vec<String> {
1929    effective_config().providers.keys().cloned().collect()
1930}
1931
1932/// Return every configured alias name, sorted deterministically.
1933pub fn known_model_names() -> Vec<String> {
1934    effective_config().aliases.keys().cloned().collect()
1935}
1936
1937pub fn alias_entries() -> Vec<(String, AliasDef)> {
1938    effective_config().aliases.into_iter().collect()
1939}
1940
1941pub fn alias_tool_calling_entry(alias: &str) -> Option<AliasToolCallingDef> {
1942    effective_config().alias_tool_calling.get(alias).cloned()
1943}
1944
1945/// Return every configured model-catalog entry, sorted by provider then id.
1946pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
1947    let config = effective_config();
1948    model_catalog_entries_with_config(&config)
1949}
1950
1951pub(crate) fn model_catalog_entries_with_config(
1952    config: &ProvidersConfig,
1953) -> Vec<(String, ModelDef)> {
1954    sorted_model_entries_with_config(config)
1955        .into_iter()
1956        .map(|(id, model)| {
1957            let provider = model.provider.clone();
1958            (
1959                id.clone(),
1960                with_effective_capability_tags(id, provider, model),
1961            )
1962        })
1963        .collect()
1964}
1965
1966pub(crate) fn sorted_model_entries_with_config(
1967    config: &ProvidersConfig,
1968) -> Vec<(String, ModelDef)> {
1969    let mut entries: Vec<_> = config
1970        .models
1971        .iter()
1972        .map(|(id, model)| (id.clone(), model.clone()))
1973        .collect();
1974    entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
1975        model_a
1976            .provider
1977            .cmp(&model_b.provider)
1978            .then_with(|| id_a.cmp(id_b))
1979    });
1980    entries
1981}
1982
1983pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
1984    effective_config()
1985        .models
1986        .get(model_id)
1987        .cloned()
1988        .map(|model| {
1989            let provider = model.provider.clone();
1990            with_effective_capability_tags(model_id.to_string(), provider, model)
1991        })
1992}
1993
1994pub fn model_rate_limits(model_id: &str) -> Option<RateLimitsDef> {
1995    model_catalog_entry(model_id).and_then(|model| model.rate_limits)
1996}
1997
1998pub fn wire_model_id(model_id: &str) -> String {
1999    model_catalog_entry(model_id)
2000        .and_then(|model| model.wire_model)
2001        .unwrap_or_else(|| model_id.to_string())
2002}
2003
2004pub fn provider_rate_limits(provider: &str) -> Option<RateLimitsDef> {
2005    provider_config(provider).and_then(|provider| {
2006        provider
2007            .rate_limits
2008            .unwrap_or_default()
2009            .with_rpm_fallback(provider.rpm)
2010    })
2011}
2012
2013pub fn model_equivalence_group(model_id: &str) -> Option<String> {
2014    model_catalog_entry(model_id).and_then(|model| {
2015        model
2016            .equivalence_group
2017            .or(model.logical_model)
2018            .filter(|group| !group.trim().is_empty())
2019    })
2020}
2021
2022/// Return same-logical-model routes that can be considered for explicit
2023/// failover or cross-provider experiments. Equivalence is a catalog assertion
2024/// about compatible model weights/family, not wire-level identity.
2025pub fn equivalent_model_catalog_entries(selector: &str) -> Vec<(String, ModelDef)> {
2026    let resolved = resolve_model_info(selector);
2027    let Some(group) = model_equivalence_group(&resolved.id) else {
2028        return Vec::new();
2029    };
2030    let config = effective_config();
2031    let Some(source) = config.models.get(&resolved.id) else {
2032        return Vec::new();
2033    };
2034    let source_caps = crate::llm::capabilities::lookup(&source.provider, &resolved.id);
2035    let source_context = source
2036        .runtime_context_window
2037        .unwrap_or(source.context_window);
2038
2039    sorted_model_entries_with_config(&config)
2040        .into_iter()
2041        .filter(|(id, model)| !(id == &resolved.id && model.provider == resolved.provider))
2042        .filter(|(_, model)| !model.deprecated)
2043        .filter(|(_, model)| model.availability != ModelAvailability::Dedicated)
2044        .filter(|(_, model)| {
2045            model.equivalence_group.as_deref() == Some(group.as_str())
2046                || model.logical_model.as_deref() == Some(group.as_str())
2047        })
2048        .filter(|(id, model)| {
2049            let caps = crate::llm::capabilities::lookup(&model.provider, id);
2050            let candidate_context = model.runtime_context_window.unwrap_or(model.context_window);
2051            candidate_context >= source_context
2052                && (!source_caps.native_tools || caps.native_tools)
2053                && (!source_caps.text_tool_wire_format_supported
2054                    || caps.text_tool_wire_format_supported)
2055                && (!source_caps.reasoning_effort_supported || caps.reasoning_effort_supported)
2056                && source_caps.structured_output_mode == caps.structured_output_mode
2057        })
2058        .map(|(id, model)| {
2059            let provider = model.provider.clone();
2060            (
2061                id.clone(),
2062                with_effective_capability_tags(id, provider, model),
2063            )
2064        })
2065        .collect()
2066}
2067
2068pub fn qc_default_model(provider: &str) -> Option<String> {
2069    std::env::var("BURIN_QC_MODEL")
2070        .ok()
2071        .filter(|value| !value.trim().is_empty())
2072        .or_else(|| {
2073            effective_config()
2074                .qc_defaults
2075                .get(&provider.to_lowercase())
2076                .cloned()
2077        })
2078}
2079
2080pub fn default_model_for_provider(provider: &str) -> String {
2081    if provider_uses_acp(provider) {
2082        return "default".to_string();
2083    }
2084    match provider {
2085        "local" => std::env::var("LOCAL_LLM_MODEL")
2086            .or_else(|_| std::env::var("HARN_LLM_MODEL"))
2087            .unwrap_or_else(|_| "gemma-4-26b-a4b-it".to_string()),
2088        "mlx" => std::env::var("MLX_MODEL_ID")
2089            .unwrap_or_else(|_| "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit".to_string()),
2090        "openai" => "gpt-4o-mini".to_string(),
2091        "ollama" => "llama3.2".to_string(),
2092        "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
2093        _ => "claude-sonnet-4-6".to_string(),
2094    }
2095}
2096
2097pub fn qc_defaults() -> BTreeMap<String, String> {
2098    effective_config().qc_defaults
2099}
2100
2101pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
2102    effective_config()
2103        .models
2104        .get(model_id)
2105        .and_then(|model| model.pricing.clone())
2106}
2107
2108/// Premium per-MTok pricing for a model's accelerated-serving ("fast mode")
2109/// tier, when the catalog declares one. Returns `None` for models with no
2110/// fast tier or a tier that omits explicit pricing — callers fall back to
2111/// standard pricing in that case.
2112pub fn model_fast_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
2113    effective_config()
2114        .models
2115        .get(model_id)
2116        .and_then(|model| model.fast_mode.as_ref())
2117        .and_then(|fast_mode| fast_mode.pricing.clone())
2118}
2119
2120pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
2121    model_pricing_per_mtok(model_id)
2122        .map(|pricing| {
2123            (
2124                pricing.input_per_mtok / 1000.0,
2125                pricing.output_per_mtok / 1000.0,
2126            )
2127        })
2128        .or_else(|| {
2129            let (input, output, _) = provider_economics(provider);
2130            match (input, output) {
2131                (Some(input), Some(output)) => Some((input, output)),
2132                _ => None,
2133            }
2134        })
2135}
2136
2137pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
2138    match auth_env {
2139        AuthEnv::None => Vec::new(),
2140        AuthEnv::Single(name) => vec![name.clone()],
2141        AuthEnv::Multiple(names) => names.clone(),
2142    }
2143}
2144
2145pub fn provider_key_available(provider: &str) -> bool {
2146    let Some(pdef) = provider_config(provider) else {
2147        return provider == "ollama";
2148    };
2149    if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
2150        return true;
2151    }
2152    auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
2153        std::env::var(env_name)
2154            .ok()
2155            .is_some_and(|value| !value.trim().is_empty())
2156    })
2157}
2158
2159pub fn available_provider_names() -> Vec<String> {
2160    provider_names()
2161        .into_iter()
2162        .filter(|provider| provider_key_available(provider))
2163        .collect()
2164}
2165
2166/// Check if a provider advertises a legacy provider-level feature.
2167pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
2168    provider_config(provider)
2169        .map(|p| p.features.iter().any(|f| f == feature))
2170        .unwrap_or(false)
2171}
2172
2173/// Provider-level catalog pricing/latency. Model-specific catalog pricing
2174/// wins when available; this is the adapter-level fallback used by routing
2175/// and portal summaries when a model has no explicit catalog entry.
2176pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
2177    provider_config(provider)
2178        .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
2179        .unwrap_or((None, None, None))
2180}
2181
2182/// The tool-call channel a `tool_format` string addresses.
2183///
2184/// `native` is the provider JSON tool-calling channel; `text` (the canonical
2185/// tagged/heredoc grammar) and `json` (fenced-JSON) are both TEXT-channel
2186/// formats — they ride in the assistant's visible content and parse with a
2187/// text parser. This is the single source of truth for "is this format a
2188/// text-channel format?" so the parity gates, native-tools resolution, and
2189/// tool-result message role all agree.
2190#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2191pub enum ToolFormatChannel {
2192    /// Provider native JSON tool calling.
2193    Native,
2194    /// A text-channel grammar carried in assistant content (`text` or `json`).
2195    Text,
2196}
2197
2198/// Classify a `tool_format` string into its channel, or `None` for an unknown
2199/// value (a typo, or a not-yet-wired format). Callers use this to reject
2200/// unknown formats loudly instead of silently defaulting.
2201///
2202/// EXHAUSTIVE-MATCH GUARD: this `match` is the canonical place tool_format is
2203/// switched. Adding a new format requires a branch here, so a half-wired
2204/// format fails to compile rather than silently reading as text.
2205pub fn tool_format_channel(format: &str) -> Option<ToolFormatChannel> {
2206    match format {
2207        "native" => Some(ToolFormatChannel::Native),
2208        "text" | "json" => Some(ToolFormatChannel::Text),
2209        _ => None,
2210    }
2211}
2212
2213/// True when `format` is a tool_format Harn understands (`native`, `text`, or
2214/// `json`). Used to gate the capability-matrix `preferred_tool_format` so a
2215/// pinned format is honored, while an unknown value falls through to the
2216/// native/text heuristic.
2217pub fn is_known_tool_format(format: &str) -> bool {
2218    tool_format_channel(format).is_some()
2219}
2220
2221/// Resolve the default tool format for a model+provider combination.
2222/// Priority: alias `tool_format` (matched by model ID) > provider/model
2223/// capability matrix > legacy provider feature > "json" (the global
2224/// text-channel default; heredoc "text" is opt-in via a pin or explicit
2225/// request).
2226pub fn default_tool_format(model: &str, provider: &str) -> String {
2227    let config = effective_config();
2228    default_tool_format_with_config(&config, model, provider)
2229}
2230
2231fn default_tool_format_with_config(
2232    config: &ProvidersConfig,
2233    model: &str,
2234    provider: &str,
2235) -> String {
2236    // Aliases match by model ID + provider, or by alias name.
2237    for (name, alias) in &config.aliases {
2238        let matches = (alias.id == model && alias.provider == provider) || name == model;
2239        if matches {
2240            if let Some(ref fmt) = alias.tool_format {
2241                return fmt.clone();
2242            }
2243        }
2244    }
2245    let capabilities = crate::llm::capabilities::lookup(provider, model);
2246    if let Some(format) = capabilities.preferred_tool_format.as_deref() {
2247        // A capability row may pin any known tool_format, including `text`
2248        // (heredoc) — the reverse safety valve a regressing model uses to pin
2249        // OFF the global json default. `json` is also honored when a row sets
2250        // it. The exhaustive match below is the EXHAUSTIVE-MATCH GUARD: a new
2251        // tool_format that isn't classified here fails loudly rather than
2252        // silently falling through to the native/json heuristic.
2253        if is_known_tool_format(format) {
2254            return format.to_string();
2255        }
2256    }
2257    let capability_matrix_native = capabilities.native_tools;
2258    let legacy_provider_native = config
2259        .providers
2260        .get(provider)
2261        .map(|p| p.features.iter().any(|f| f == "native_tools"))
2262        .unwrap_or(false);
2263    if capability_matrix_native || legacy_provider_native {
2264        "native".to_string()
2265    } else {
2266        // GLOBAL DEFAULT: a text-channel model with no pinned format resolves
2267        // to fenced-json (`json`), not heredoc (`text`). The win is STRUCTURAL
2268        // — a JSON string can't carry a raw newline, so a `<<EOF` content
2269        // delimiter never collides with the call wrapper (heredoc's known
2270        // production defect: models leak `<<EOF` into file content → the
2271        // `line 0: <<` thrash). Fenced-json swept a clean 1.0/1.0/1.0
2272        // (compliance/parse-determinism/expressiveness) across every model
2273        // measured, and the structural guarantee generalizes to unmeasured
2274        // models. Heredoc (`text`) stays selectable explicitly and via a
2275        // per-model `preferred_tool_format = "text"` pin (the reverse valve).
2276        "json".to_string()
2277    }
2278}
2279
2280fn with_effective_capability_tags(
2281    model_id: String,
2282    provider: String,
2283    mut model: ModelDef,
2284) -> ModelDef {
2285    model.capabilities = effective_model_capability_tags(&provider, &model_id);
2286    model
2287}
2288
2289/// Legacy display tags derived from the canonical provider/model capability
2290/// matrix. The matrix is the source of truth; `models.*.capabilities` in
2291/// providers.toml is accepted only for backwards-compatible parsing.
2292pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
2293    let caps = crate::llm::capabilities::lookup(provider, model_id);
2294    capability_tags_from_capabilities(&caps)
2295}
2296
2297pub(crate) fn capability_tags_from_capabilities(
2298    caps: &crate::llm::capabilities::Capabilities,
2299) -> Vec<String> {
2300    let mut tags = Vec::new();
2301    // Today all Harn chat providers expose streaming. Keep this as a
2302    // transport baseline rather than a duplicated per-model declaration.
2303    tags.push("streaming".to_string());
2304    if caps.native_tools || caps.text_tool_wire_format_supported {
2305        tags.push("tools".to_string());
2306    }
2307    if !caps.tool_search.is_empty() {
2308        tags.push("tool_search".to_string());
2309    }
2310    if caps.vision || caps.vision_supported {
2311        tags.push("vision".to_string());
2312    }
2313    if caps.audio {
2314        tags.push("audio".to_string());
2315    }
2316    if caps.pdf {
2317        tags.push("pdf".to_string());
2318    }
2319    if caps.video {
2320        tags.push("video".to_string());
2321    }
2322    if caps.files_api_supported {
2323        tags.push("files".to_string());
2324    }
2325    if caps.prompt_caching {
2326        tags.push("prompt_caching".to_string());
2327    }
2328    if !caps.thinking_modes.is_empty() {
2329        tags.push("thinking".to_string());
2330    }
2331    if caps.interleaved_thinking_supported
2332        || caps
2333            .thinking_modes
2334            .iter()
2335            .any(|mode| mode == "adaptive" || mode == "effort")
2336    {
2337        tags.push("extended_thinking".to_string());
2338    }
2339    if caps.structured_output.is_some() || caps.json_schema.is_some() {
2340        tags.push("structured_output".to_string());
2341    }
2342    tags
2343}
2344
2345/// Resolve a tier or alias into a concrete model/provider pair.
2346pub fn resolve_tier_model(
2347    target: &str,
2348    preferred_provider: Option<&str>,
2349) -> Option<(String, String)> {
2350    let config = effective_config();
2351
2352    if let Some(alias) = config.aliases.get(target) {
2353        return Some((alias.id.clone(), alias.provider.clone()));
2354    }
2355
2356    let candidate_aliases = if let Some(provider) = preferred_provider {
2357        vec![
2358            format!("{provider}/{target}"),
2359            format!("{provider}:{target}"),
2360            format!("tier/{target}"),
2361            target.to_string(),
2362        ]
2363    } else {
2364        vec![format!("tier/{target}"), target.to_string()]
2365    };
2366
2367    for alias_name in candidate_aliases {
2368        if let Some(alias) = config.aliases.get(&alias_name) {
2369            return Some((alias.id.clone(), alias.provider.clone()));
2370        }
2371    }
2372
2373    None
2374}
2375
2376/// Return all configured alias-backed model/provider pairs whose resolved
2377/// model falls into the requested capability tier. The result is de-duplicated
2378/// and sorted deterministically by provider then model id.
2379pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
2380    let config = effective_config();
2381    let mut seen = std::collections::BTreeSet::new();
2382    let mut candidates = Vec::new();
2383
2384    for alias in config.aliases.values() {
2385        let pair = (alias.id.clone(), alias.provider.clone());
2386        if seen.contains(&pair) {
2387            continue;
2388        }
2389        if model_tier(&alias.id) == target {
2390            seen.insert(pair.clone());
2391            candidates.push(pair);
2392        }
2393    }
2394
2395    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2396        provider_a
2397            .cmp(provider_b)
2398            .then_with(|| model_a.cmp(model_b))
2399    });
2400    candidates
2401}
2402
2403/// Return all configured alias-backed model/provider pairs. Used by routing
2404/// policies that need to compare alternatives across tiers.
2405pub fn all_model_candidates() -> Vec<(String, String)> {
2406    let config = effective_config();
2407    let mut seen = std::collections::BTreeSet::new();
2408    let mut candidates = Vec::new();
2409
2410    for alias in config.aliases.values() {
2411        let pair = (alias.id.clone(), alias.provider.clone());
2412        if seen.insert(pair.clone()) {
2413            candidates.push(pair);
2414        }
2415    }
2416
2417    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2418        provider_a
2419            .cmp(provider_b)
2420            .then_with(|| model_a.cmp(model_b))
2421    });
2422    candidates
2423}
2424
2425pub fn pick_complementary_reviewer(
2426    options: ComplementaryReviewerOptions,
2427) -> ComplementaryReviewerSelection {
2428    let config = effective_config();
2429    let mut author = resolve_model_info(&options.author_model);
2430    if let Some(provider) = options
2431        .author_provider
2432        .as_deref()
2433        .map(str::trim)
2434        .filter(|provider| !provider.is_empty())
2435    {
2436        author.provider = provider.to_string();
2437        author.family = model_family_with_config(&config, &author.provider, &author.id);
2438        author.lineage = model_lineage_with_config(&config, &author.provider, &author.id);
2439        author.tool_format = default_tool_format_with_config(&config, &author.id, &author.provider);
2440    }
2441    let author_entry = config.models.get(&author.id);
2442    let author_identity = complementary_identity(
2443        author.id.clone(),
2444        author.provider.clone(),
2445        author.family.clone(),
2446        author.lineage.clone(),
2447        author.tier.clone(),
2448        author_entry.and_then(|model| model.pricing.clone()),
2449    );
2450
2451    let fallback =
2452        |code: ReviewerFallbackCode, fallback_reason: String| ComplementaryReviewerSelection {
2453            intent: options.intent.as_str().to_string(),
2454            reviewer: author_identity.clone(),
2455            estimated_incremental_cost: cost_estimate(
2456                author_identity.pricing.as_ref(),
2457                author_identity.pricing.as_ref(),
2458            ),
2459            author: author_identity.clone(),
2460            fallback: true,
2461            reason: format!(
2462                "using author model {} because {fallback_reason}",
2463                author_identity.id
2464            ),
2465            fallback_reason: Some(fallback_reason),
2466            fallback_code: Some(code.as_code().to_string()),
2467        };
2468
2469    if author_identity.family == "unknown" {
2470        return fallback(
2471            ReviewerFallbackCode::UnknownAuthorFamily,
2472            "author model family is unknown".to_string(),
2473        );
2474    }
2475
2476    let preferred_families = author_entry
2477        .map(|model| model.complementary_with.clone())
2478        .unwrap_or_default();
2479    let author_refs = reviewer_match_refs(&author_identity);
2480    let mut rejected_by_price = 0usize;
2481    let mut diff_family_seen = 0usize;
2482    let mut candidates = Vec::new();
2483
2484    for (id, model) in config.models.iter() {
2485        if id == &author_identity.id && model.provider == author_identity.provider {
2486            continue;
2487        }
2488        if model.deprecated || model.availability != ModelAvailability::Serverless {
2489            continue;
2490        }
2491        let family = model_family_with_config(&config, &model.provider, id);
2492        if family == "unknown" || family == author_identity.family {
2493            continue;
2494        }
2495        diff_family_seen += 1;
2496        let lineage = model_lineage_with_config(&config, &model.provider, id);
2497        let candidate_identity = complementary_identity(
2498            id.clone(),
2499            model.provider.clone(),
2500            family,
2501            lineage,
2502            model_tier_with_config(&config, id),
2503            model.pricing.clone(),
2504        );
2505        if model
2506            .avoid_as_reviewer_for
2507            .iter()
2508            .any(|selector| refs_contain_selector(&author_refs, selector))
2509        {
2510            continue;
2511        }
2512        if exceeds_price_cap(
2513            author_identity.pricing.as_ref(),
2514            candidate_identity.pricing.as_ref(),
2515            options.max_price_multiplier,
2516        ) {
2517            rejected_by_price += 1;
2518            continue;
2519        }
2520        let score = reviewer_score(
2521            &options,
2522            &author_identity,
2523            &candidate_identity,
2524            model,
2525            &preferred_families,
2526        );
2527        candidates.push(ReviewerCandidate {
2528            identity: candidate_identity,
2529            score,
2530        });
2531    }
2532
2533    candidates.sort_by(|left, right| {
2534        right
2535            .score
2536            .partial_cmp(&left.score)
2537            .unwrap_or(std::cmp::Ordering::Equal)
2538            .then_with(|| left.identity.provider.cmp(&right.identity.provider))
2539            .then_with(|| left.identity.id.cmp(&right.identity.id))
2540    });
2541
2542    let Some(best) = candidates.into_iter().next() else {
2543        if rejected_by_price > 0 {
2544            let cap = options.max_price_multiplier.unwrap_or_default();
2545            return fallback(
2546                ReviewerFallbackCode::NoDiffFamilyWithinPrice,
2547                format!("no different-family reviewer satisfied max_price_multiplier {cap}"),
2548            );
2549        }
2550        if diff_family_seen == 0 {
2551            return fallback(
2552                ReviewerFallbackCode::NoDiffFamilyServerless,
2553                "no active serverless different-family reviewer is cataloged".to_string(),
2554            );
2555        }
2556        return fallback(
2557            ReviewerFallbackCode::AllDiffFamilyExcluded,
2558            "all different-family reviewer candidates were excluded".to_string(),
2559        );
2560    };
2561
2562    let estimate = cost_estimate(
2563        best.identity.pricing.as_ref(),
2564        author_identity.pricing.as_ref(),
2565    );
2566    ComplementaryReviewerSelection {
2567        intent: options.intent.as_str().to_string(),
2568        reason: reviewer_reason(&author_identity, &best.identity, estimate.as_ref()),
2569        estimated_incremental_cost: estimate,
2570        author: author_identity,
2571        reviewer: best.identity,
2572        fallback: false,
2573        fallback_reason: None,
2574        fallback_code: None,
2575    }
2576}
2577
2578#[derive(Debug, Clone)]
2579struct ReviewerCandidate {
2580    identity: ComplementaryModelIdentity,
2581    score: f64,
2582}
2583
2584fn complementary_identity(
2585    id: String,
2586    provider: String,
2587    family: String,
2588    lineage: String,
2589    tier: String,
2590    pricing: Option<ModelPricing>,
2591) -> ComplementaryModelIdentity {
2592    ComplementaryModelIdentity {
2593        id,
2594        provider,
2595        family,
2596        lineage,
2597        tier,
2598        pricing,
2599    }
2600}
2601
2602fn reviewer_score(
2603    options: &ComplementaryReviewerOptions,
2604    author: &ComplementaryModelIdentity,
2605    candidate: &ComplementaryModelIdentity,
2606    model: &ModelDef,
2607    preferred_families: &[String],
2608) -> f64 {
2609    let candidate_refs = reviewer_match_refs(candidate);
2610    let mut score = 0.0;
2611    if let Some(rank) = preferred_families
2612        .iter()
2613        .position(|selector| refs_contain_selector(&candidate_refs, selector))
2614    {
2615        score += 1_000.0 - rank as f64;
2616    }
2617    if candidate.provider != author.provider {
2618        score += 100.0;
2619    }
2620    score += match tier_distance(&author.tier, &candidate.tier) {
2621        0 => 80.0,
2622        1 => 45.0,
2623        2 => 15.0,
2624        _ => 0.0,
2625    };
2626    for strength in intent_strengths(options.intent) {
2627        if model.strengths.iter().any(|tag| tag == strength) {
2628            score += 8.0;
2629        }
2630    }
2631    if model.capabilities.iter().any(|tag| tag == "tools") {
2632        score += 4.0;
2633    }
2634    if let (Some(author_total), Some(candidate_total)) = (
2635        pricing_total(author.pricing.as_ref()),
2636        pricing_total(candidate.pricing.as_ref()),
2637    ) {
2638        if author_total > 0.0 {
2639            let ratio = candidate_total / author_total;
2640            if ratio <= 1.0 {
2641                score += 20.0;
2642            }
2643            score -= (ratio - 1.0).abs().min(10.0) * 8.0;
2644        }
2645    }
2646    score
2647}
2648
2649fn intent_strengths(intent: ComplementaryReviewerIntent) -> &'static [&'static str] {
2650    match intent {
2651        ComplementaryReviewerIntent::Review => &["reasoning", "coding", "tool_use"],
2652        ComplementaryReviewerIntent::Critique => &["reasoning", "long_context", "tool_use"],
2653        ComplementaryReviewerIntent::PlanReview => {
2654            &["reasoning", "coding", "agentic", "long_context", "tool_use"]
2655        }
2656    }
2657}
2658
2659fn tier_distance(left: &str, right: &str) -> u8 {
2660    let left = tier_rank(left);
2661    let right = tier_rank(right);
2662    left.abs_diff(right)
2663}
2664
2665fn tier_rank(tier: &str) -> u8 {
2666    match tier {
2667        "small" => 0,
2668        "mid" => 1,
2669        "frontier" | "reasoning" => 2,
2670        _ => 1,
2671    }
2672}
2673
2674fn exceeds_price_cap(
2675    author_pricing: Option<&ModelPricing>,
2676    candidate_pricing: Option<&ModelPricing>,
2677    max_price_multiplier: Option<f64>,
2678) -> bool {
2679    let Some(max_price_multiplier) = max_price_multiplier else {
2680        return false;
2681    };
2682    let Some(author_total) = pricing_total(author_pricing) else {
2683        return false;
2684    };
2685    let Some(candidate_total) = pricing_total(candidate_pricing) else {
2686        return true;
2687    };
2688    author_total > 0.0 && candidate_total > author_total * max_price_multiplier
2689}
2690
2691fn cost_estimate(
2692    reviewer_pricing: Option<&ModelPricing>,
2693    author_pricing: Option<&ModelPricing>,
2694) -> Option<ComplementaryCostEstimate> {
2695    let reviewer_pricing = reviewer_pricing?;
2696    let total_per_mtok = reviewer_pricing.input_per_mtok + reviewer_pricing.output_per_mtok;
2697    let multiplier_vs_author = pricing_total(author_pricing)
2698        .filter(|author_total| *author_total > 0.0)
2699        .map(|author_total| total_per_mtok / author_total);
2700    Some(ComplementaryCostEstimate {
2701        input_per_mtok: reviewer_pricing.input_per_mtok,
2702        output_per_mtok: reviewer_pricing.output_per_mtok,
2703        total_per_mtok,
2704        multiplier_vs_author,
2705    })
2706}
2707
2708fn pricing_total(pricing: Option<&ModelPricing>) -> Option<f64> {
2709    pricing.map(|pricing| pricing.input_per_mtok + pricing.output_per_mtok)
2710}
2711
2712fn reviewer_reason(
2713    author: &ComplementaryModelIdentity,
2714    reviewer: &ComplementaryModelIdentity,
2715    estimate: Option<&ComplementaryCostEstimate>,
2716) -> String {
2717    let cost = estimate
2718        .and_then(|estimate| estimate.multiplier_vs_author)
2719        .map(|multiplier| format!("{multiplier:.2}x the author model price"))
2720        .unwrap_or_else(|| "price ratio unavailable".to_string());
2721    format!(
2722        "selected {} via {} because family {} differs from author family {}, tier {} matches author tier {}, and {}",
2723        reviewer.id,
2724        reviewer.provider,
2725        reviewer.family,
2726        author.family,
2727        reviewer.tier,
2728        author.tier,
2729        cost
2730    )
2731}
2732
2733fn reviewer_match_refs(identity: &ComplementaryModelIdentity) -> BTreeSet<String> {
2734    BTreeSet::from([
2735        identity.id.to_ascii_lowercase(),
2736        identity.provider.to_ascii_lowercase(),
2737        format!("{}/{}", identity.provider, identity.id).to_ascii_lowercase(),
2738        format!("{}:{}", identity.provider, identity.id).to_ascii_lowercase(),
2739        identity.family.to_ascii_lowercase(),
2740        identity.lineage.to_ascii_lowercase(),
2741    ])
2742}
2743
2744fn refs_contain_selector(refs: &BTreeSet<String>, selector: &str) -> bool {
2745    normalized_catalog_token(Some(selector))
2746        .or_else(|| Some(selector.trim().to_ascii_lowercase()))
2747        .is_some_and(|selector| refs.contains(&selector))
2748}
2749
2750// Model-pattern matching for forms like "claude-*", "qwen/*", "ollama:*".
2751// Shared workspace semantics live in `harn-glob`.
2752use harn_glob::match_name as glob_match;
2753
2754fn dirs_or_home() -> Option<String> {
2755    crate::user_dirs::home_dir().map(|home| home.to_string_lossy().into_owned())
2756}
2757
2758/// Resolve the effective base URL for a provider, checking the `base_url_env`
2759/// override first, then falling back to the configured `base_url`.
2760pub fn resolve_base_url(pdef: &ProviderDef) -> String {
2761    if let Some(env_name) = &pdef.base_url_env {
2762        if let Ok(val) = std::env::var(env_name) {
2763            // Strip surrounding quotes that some .env parsers leave intact.
2764            let trimmed = val.trim().trim_matches('"').trim_matches('\'');
2765            if !trimmed.is_empty() {
2766                return trimmed.to_string();
2767            }
2768        }
2769    }
2770    pdef.base_url.clone()
2771}
2772
2773/// Embedded copy of generated `llm/providers.toml`, built from
2774/// `llm/catalog_sources/**/*.toml` by `harn provider catalog build-config`.
2775/// Edit the fragments, not this generated snapshot or this string.
2776const EMBEDDED_PROVIDERS_TOML: &str = include_str!("llm/providers.toml");
2777
2778/// Parse the embedded generated `providers.toml` into the runtime
2779/// `ProvidersConfig`.
2780///
2781/// Hosts overlay this base via `HARN_PROVIDERS_CONFIG`,
2782/// `~/.config/harn/providers.toml`, `harn.toml`, package-manifest
2783/// `[llm]` sections, and per-run `set_user_overrides(...)`. The same
2784/// Serde shape applies at every layer, so there is exactly one schema to
2785/// keep coherent — no parallel Rust-literal catalog.
2786///
2787/// We `expect` on parse failure because the file is bundled into the
2788/// binary at compile time; a malformed embedded catalog is a build-time
2789/// invariant violation that should fail every test, not silently
2790/// degrade in production.
2791fn default_config() -> ProvidersConfig {
2792    parse_config_toml(EMBEDDED_PROVIDERS_TOML)
2793        .expect("embedded providers.toml must parse — invariant checked by harn-vm tests")
2794}
2795
2796#[cfg(test)]
2797fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
2798    let mut config = default_config();
2799    config.merge_from(&overlay);
2800    config
2801}
2802
2803#[cfg(test)]
2804mod tests {
2805    use super::*;
2806
2807    fn reset_overrides() {
2808        clear_user_overrides();
2809    }
2810
2811    #[test]
2812    fn resolve_model_info_guards_bad_native_pin_on_unreliable_route() {
2813        reset_overrides();
2814        // An alias that pins tool_format = "native" for DeepSeek V3.2 on
2815        // OpenRouter — a route the capability registry knows is
2816        // native_unreliable (drops to unparsed DSML text). Before the
2817        // footgun-removal gate this bad pin survived resolution verbatim and
2818        // produced vanishing tool calls; now it is steered to the route's safe
2819        // text-channel format.
2820        let overlay = parse_config_toml(
2821            "[aliases.guard-ds]\nid = \"deepseek/deepseek-v3.2\"\nprovider = \"openrouter\"\ntool_format = \"native\"\n",
2822        )
2823        .expect("overlay parses");
2824        set_user_overrides(Some(overlay));
2825        let resolved = resolve_model_info("guard-ds");
2826        assert_eq!(
2827            resolved.tool_format, "text",
2828            "a native pin on a native_unreliable route must be auto-corrected to text"
2829        );
2830        clear_user_overrides();
2831
2832        // A safe native pin (a route with no adverse parity) is untouched.
2833        let overlay_ok = parse_config_toml(
2834            "[aliases.guard-ds-ok]\nid = \"deepseek/deepseek-v3-base\"\nprovider = \"openrouter\"\ntool_format = \"native\"\n",
2835        )
2836        .expect("overlay parses");
2837        set_user_overrides(Some(overlay_ok));
2838        let resolved_ok = resolve_model_info("guard-ds-ok");
2839        assert_eq!(resolved_ok.tool_format, "native");
2840        clear_user_overrides();
2841    }
2842
2843    #[test]
2844    fn auto_select_prefers_local_provider_without_cloud_credentials() {
2845        // A catalog whose only provider is local and auth-free resolves to it
2846        // regardless of ambient cloud API keys: no preferred/credentialed cloud
2847        // provider is present, so the local fallback wins deterministically.
2848        let config = parse_config_toml(
2849            "[providers.ollama]\nbase_url = \"http://localhost:11434\"\nchat_endpoint = \"/v1/chat/completions\"\n",
2850        )
2851        .expect("config parses");
2852        assert!(provider_is_local(config.providers.get("ollama").unwrap()));
2853        assert_eq!(auto_select_provider(&config), "ollama");
2854    }
2855
2856    #[test]
2857    fn auto_select_falls_back_to_documented_default_when_empty() {
2858        let config = parse_config_toml("").expect("config parses");
2859        assert_eq!(auto_select_provider(&config), FALLBACK_PROVIDER);
2860    }
2861
2862    #[test]
2863    fn suppress_routes_parse_and_merge_dedupe() {
2864        let mut base =
2865            parse_config_toml("[suppress]\nroutes = [\"together:Qwen/Qwen3-Coder-Next-FP8\"]\n")
2866                .expect("base parses");
2867        assert!(!base.is_empty(), "a suppress-only overlay is not empty");
2868        let overlay = parse_config_toml(
2869            "[suppress]\nroutes = [\"together:Qwen/Qwen3-Coder-Next-FP8\", \"ollama:img:tag\"]\n",
2870        )
2871        .expect("overlay parses");
2872        base.merge_from(&overlay);
2873        assert_eq!(
2874            base.suppress.routes,
2875            vec![
2876                "together:Qwen/Qwen3-Coder-Next-FP8".to_string(),
2877                "ollama:img:tag".to_string(),
2878            ],
2879            "merge appends new selectors without duplicating existing ones"
2880        );
2881    }
2882
2883    #[test]
2884    fn test_glob_match_prefix() {
2885        assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
2886        assert!(glob_match("gpt-*", "gpt-4o"));
2887        assert!(!glob_match("claude-*", "gpt-4o"));
2888    }
2889
2890    #[test]
2891    fn test_glob_match_suffix() {
2892        assert!(glob_match("*-latest", "llama3.2-latest"));
2893        assert!(!glob_match("*-latest", "llama3.2"));
2894    }
2895
2896    #[test]
2897    fn test_glob_match_middle() {
2898        assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
2899        assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
2900    }
2901
2902    #[test]
2903    fn test_glob_match_exact() {
2904        assert!(glob_match("gpt-4o", "gpt-4o"));
2905        assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
2906    }
2907
2908    #[test]
2909    fn test_infer_provider_from_defaults() {
2910        let _guard = crate::llm::env_guard();
2911        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2912        unsafe {
2913            std::env::remove_var("HARN_DEFAULT_PROVIDER");
2914        }
2915
2916        assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
2917        assert_eq!(infer_provider("gpt-4o"), "openai");
2918        assert_eq!(infer_provider("o1-preview"), "openai");
2919        assert_eq!(infer_provider("o3-mini"), "openai");
2920        assert_eq!(infer_provider("o4-mini"), "openai");
2921        assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
2922        assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
2923        assert_eq!(infer_provider("llama3.2:latest"), "ollama");
2924        assert_eq!(infer_provider("unknown-model"), "anthropic");
2925
2926        unsafe {
2927            match prev_default_provider {
2928                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2929                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2930            }
2931        }
2932    }
2933
2934    #[test]
2935    fn test_infer_provider_prefix_rules() {
2936        assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
2937        assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
2938        // Even when the id also contains `/`, the local transport prefix wins.
2939        assert_eq!(infer_provider("local:owner/model"), "ollama");
2940        assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
2941    }
2942
2943    #[test]
2944    fn test_openrouter_inference_requires_one_slash() {
2945        let _guard = crate::llm::env_guard();
2946        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2947        unsafe {
2948            std::env::remove_var("HARN_DEFAULT_PROVIDER");
2949        }
2950
2951        assert_eq!(infer_provider("org/model"), "openrouter");
2952        assert_eq!(infer_provider("org/team/model"), "anthropic");
2953
2954        unsafe {
2955            match prev_default_provider {
2956                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2957                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2958            }
2959        }
2960    }
2961
2962    #[test]
2963    fn test_cerebras_inference_beats_openrouter_slash_fallback() {
2964        let _guard = crate::llm::env_guard();
2965        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2966        unsafe {
2967            std::env::remove_var("HARN_DEFAULT_PROVIDER");
2968        }
2969
2970        assert_eq!(infer_provider("cerebras/gpt-oss-120b"), "cerebras");
2971        assert_eq!(infer_provider("cerebras/zai-glm-4.7"), "cerebras");
2972        assert_eq!(infer_provider("cerebras/llama-3.3-70b"), "cerebras");
2973
2974        unsafe {
2975            match prev_default_provider {
2976                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2977                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2978            }
2979        }
2980    }
2981
2982    #[test]
2983    fn test_direct_catalog_model_id_resolves_to_catalog_provider() {
2984        // Bare model IDs that the embedded catalog hosts on Cerebras must
2985        // not be misrouted by the generic `gpt-*` / single-slash inference
2986        // fallbacks. Regression for harn#2142 (model-info routed
2987        // `gpt-oss-120b` to openai, breaking host TUI credential checks).
2988        let _guard = crate::llm::env_guard();
2989        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2990        unsafe {
2991            std::env::remove_var("HARN_DEFAULT_PROVIDER");
2992        }
2993
2994        for model in ["gpt-oss-120b", "zai-glm-4.7", "llama-3.3-70b"] {
2995            assert_eq!(
2996                infer_provider(model),
2997                "cerebras",
2998                "{model} should route to its catalog provider"
2999            );
3000            let resolved = resolve_model_info(model);
3001            assert_eq!(resolved.id, model);
3002            assert_eq!(resolved.provider, "cerebras");
3003        }
3004
3005        unsafe {
3006            match prev_default_provider {
3007                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3008                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3009            }
3010        }
3011    }
3012
3013    #[test]
3014    fn test_equivalent_model_catalog_entries_use_capability_compatible_routes() {
3015        reset_overrides();
3016
3017        assert_eq!(
3018            wire_model_id("groq/openai/gpt-oss-120b"),
3019            "openai/gpt-oss-120b"
3020        );
3021        assert_eq!(wire_model_id("gpt-oss-120b"), "gpt-oss-120b");
3022
3023        let equivalents = equivalent_model_catalog_entries("gpt-oss-120b");
3024        let ids = equivalents
3025            .iter()
3026            .map(|(id, _)| id.as_str())
3027            .collect::<Vec<_>>();
3028
3029        assert!(
3030            ids.contains(&"groq/openai/gpt-oss-120b"),
3031            "Cerebras GPT-OSS should surface the Groq serving variant"
3032        );
3033        assert!(
3034            !ids.contains(&"gpt-oss-120b"),
3035            "equivalence results should not include the source row"
3036        );
3037        assert!(equivalents.iter().all(|(_, model)| {
3038            model.equivalence_group.as_deref() == Some("openai-gpt-oss-120b")
3039        }));
3040    }
3041
3042    #[test]
3043    fn fireworks_gpt_oss_route_has_real_context_window() {
3044        // Regression: the Fireworks-served `accounts/fireworks/models/gpt-oss-120b`
3045        // wire id had NO catalog row, so its context window resolved to None and
3046        // the agent's auto-compaction budget had nothing to enforce — the prompt
3047        // grew until Fireworks rejected the turn with HTTP 400 [context_overflow]
3048        // (session 019ee303: 197467 tokens > 131071 max). Cataloging the real
3049        // 131072 window lets compaction trigger before the hard limit.
3050        reset_overrides();
3051
3052        let entry = model_catalog_entry("accounts/fireworks/models/gpt-oss-120b")
3053            .expect("Fireworks gpt-oss-120b must be in the model catalog");
3054        assert_eq!(entry.context_window, 131_072);
3055        assert_eq!(entry.provider, "fireworks");
3056        assert_eq!(
3057            entry.equivalence_group.as_deref(),
3058            Some("openai-gpt-oss-120b"),
3059        );
3060    }
3061
3062    #[test]
3063    fn test_user_catalog_overlay_re_homes_model_provider() {
3064        // Users can re-home a built-in model by overlaying a catalog row;
3065        // the exact-match catalog lookup must honor overlays as well as the
3066        // embedded TOML.
3067        reset_overrides();
3068        let mut overlay = ProvidersConfig::default();
3069        overlay.models.insert(
3070            "gpt-4o".to_string(),
3071            ModelDef {
3072                name: "GPT-4o via OpenRouter".to_string(),
3073                provider: "openrouter".to_string(),
3074                context_window: 128_000,
3075                logical_model: None,
3076                equivalence_group: None,
3077                served_variant: None,
3078                wire_model: None,
3079                api_dialect: None,
3080                rate_limits: None,
3081                performance: None,
3082                architecture: None,
3083                local_memory: None,
3084                runtime_context_window: None,
3085                stream_timeout: None,
3086                capabilities: Vec::new(),
3087                pricing: None,
3088                deprecated: false,
3089                deprecation_note: None,
3090                superseded_by: None,
3091                fast_mode: None,
3092                quality_tags: Vec::new(),
3093                availability: ModelAvailability::default(),
3094                tier: None,
3095                open_weight: None,
3096                strengths: Vec::new(),
3097                benchmarks: std::collections::BTreeMap::new(),
3098                family: None,
3099                lineage: None,
3100                complementary_with: Vec::new(),
3101                avoid_as_reviewer_for: Vec::new(),
3102            },
3103        );
3104        set_user_overrides(Some(overlay));
3105
3106        assert_eq!(infer_provider("gpt-4o"), "openrouter");
3107
3108        reset_overrides();
3109    }
3110
3111    #[test]
3112    fn test_resolve_model_info_normalizes_provider_prefixes() {
3113        let local = resolve_model_info("local:gemma-4-e4b-it");
3114        assert_eq!(local.id, "gemma-4-e4b-it");
3115        assert_eq!(local.provider, "ollama");
3116
3117        let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
3118        assert_eq!(ollama.id, "qwen3:30b-a3b");
3119        assert_eq!(ollama.provider, "ollama");
3120
3121        let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
3122        assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
3123        assert_eq!(hf.provider, "huggingface");
3124
3125        let cerebras = resolve_model_info("cerebras/gpt-oss-120b");
3126        assert_eq!(cerebras.id, "gpt-oss-120b");
3127        assert_eq!(cerebras.provider, "cerebras");
3128
3129        let cerebras_glm = resolve_model_info("cerebras/zai-glm-4.7");
3130        assert_eq!(cerebras_glm.id, "zai-glm-4.7");
3131        assert_eq!(cerebras_glm.provider, "cerebras");
3132    }
3133
3134    #[test]
3135    fn test_model_tier_from_defaults() {
3136        // Tier is now self-declared per model row in providers.toml.
3137        // Models that match an entry use the declared value; unknown
3138        // model ids fall through to `tier_defaults.default` ("mid").
3139        assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
3140        assert_eq!(model_tier("gpt-4o"), "frontier");
3141        assert_eq!(model_tier("Qwen/Qwen3.5-9B"), "small");
3142        assert_eq!(model_tier("deepseek-v4-flash"), "mid");
3143        assert_eq!(model_tier("deepseek-v4-pro"), "frontier");
3144        assert_eq!(model_tier("MiniMax-M2.7"), "frontier");
3145        assert_eq!(model_tier("glm-5.1"), "frontier");
3146        // Unknown ids resolve to the default.
3147        assert_eq!(model_tier("definitely-not-a-real-model"), "mid");
3148    }
3149
3150    #[test]
3151    fn test_model_family_preserves_underlying_hosted_lineage() {
3152        assert_eq!(
3153            model_family("openrouter", "anthropic/claude-sonnet-4-6"),
3154            "anthropic-claude"
3155        );
3156        assert_eq!(
3157            model_family("openrouter", "google/gemini-2.5-flash"),
3158            "google-gemini"
3159        );
3160        assert_eq!(
3161            model_family("openrouter", "openai/o3-mini"),
3162            "openai-reasoning"
3163        );
3164        assert_eq!(model_lineage("openrouter", "openai/gpt-5.5"), "openai-gpt5");
3165        assert_eq!(
3166            model_lineage("openrouter", "openai/o3-mini"),
3167            "openai-reasoning"
3168        );
3169        assert_eq!(
3170            model_lineage("anthropic", "claude-opus-4-8"),
3171            "claude-opus-adaptive"
3172        );
3173        assert_eq!(model_lineage("llamacpp", "qwen3.6-35b-a3b"), "qwen3");
3174    }
3175
3176    #[test]
3177    fn test_complementary_reviewer_uses_different_family() {
3178        let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
3179            author_model: "claude-sonnet-4-6".to_string(),
3180            author_provider: None,
3181            intent: ComplementaryReviewerIntent::PlanReview,
3182            max_price_multiplier: Some(3.0),
3183        });
3184
3185        assert!(!selection.fallback, "{selection:?}");
3186        assert_eq!(selection.author.family, "anthropic-claude");
3187        assert_ne!(selection.reviewer.family, selection.author.family);
3188        assert_eq!(selection.reviewer.tier, "frontier");
3189        assert!(selection.estimated_incremental_cost.is_some());
3190        // Success path carries no machine-readable fallback code, so a caller
3191        // can treat `fallback_code.is_some()` as "must not self-review".
3192        assert_eq!(selection.fallback_code, None, "{selection:?}");
3193    }
3194
3195    #[test]
3196    fn test_complementary_reviewer_falls_back_deterministically_on_price_cap() {
3197        let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
3198            author_model: "gpt-4o-mini".to_string(),
3199            author_provider: Some("openai".to_string()),
3200            intent: ComplementaryReviewerIntent::Review,
3201            max_price_multiplier: Some(0.01),
3202        });
3203
3204        assert!(selection.fallback, "{selection:?}");
3205        assert_eq!(selection.reviewer.id, "gpt-4o-mini");
3206        assert_eq!(selection.reviewer.family, selection.author.family);
3207        assert!(selection
3208            .fallback_reason
3209            .as_deref()
3210            .is_some_and(|reason| reason.contains("max_price_multiplier")));
3211        // The machine-readable code is stable regardless of the prose; a caller
3212        // hard-fails an independent-review step by branching on this, never by
3213        // parsing `fallback_reason`.
3214        assert_eq!(
3215            selection.fallback_code.as_deref(),
3216            Some(ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code()),
3217            "{selection:?}"
3218        );
3219        assert_eq!(
3220            ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code(),
3221            "no_diff_family_within_price"
3222        );
3223    }
3224
3225    #[test]
3226    fn test_reviewer_fallback_codes_are_stable_strings() {
3227        // Append-only contract: harn pipelines and Rust callers branch on these
3228        // exact strings, so changing one is a breaking change.
3229        assert_eq!(
3230            ReviewerFallbackCode::UnknownAuthorFamily.as_code(),
3231            "unknown_author_family"
3232        );
3233        assert_eq!(
3234            ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code(),
3235            "no_diff_family_within_price"
3236        );
3237        assert_eq!(
3238            ReviewerFallbackCode::NoDiffFamilyServerless.as_code(),
3239            "no_diff_family_serverless"
3240        );
3241        assert_eq!(
3242            ReviewerFallbackCode::AllDiffFamilyExcluded.as_code(),
3243            "all_diff_family_excluded"
3244        );
3245    }
3246
3247    #[test]
3248    fn test_resolve_model_unknown_alias() {
3249        let (id, provider) = resolve_model("gpt-4o");
3250        assert_eq!(id, "gpt-4o");
3251        assert!(provider.is_none());
3252    }
3253
3254    #[test]
3255    fn test_provider_names() {
3256        let names = provider_names();
3257        assert!(names.len() >= 7);
3258        assert!(names.contains(&"anthropic".to_string()));
3259        assert!(names.contains(&"together".to_string()));
3260        assert!(names.contains(&"local".to_string()));
3261        assert!(names.contains(&"mlx".to_string()));
3262        assert!(names.contains(&"openai".to_string()));
3263        assert!(names.contains(&"ollama".to_string()));
3264        assert!(names.contains(&"bedrock".to_string()));
3265        assert!(names.contains(&"azure_openai".to_string()));
3266        assert!(names.contains(&"vertex".to_string()));
3267    }
3268
3269    #[test]
3270    fn global_provider_file_is_an_overlay_on_builtin_defaults() {
3271        let mut overlay = ProvidersConfig {
3272            default_provider: Some("ollama".to_string()),
3273            ..Default::default()
3274        };
3275        overlay.aliases.insert(
3276            "quickstart".to_string(),
3277            AliasDef {
3278                id: "llama3.2".to_string(),
3279                provider: "ollama".to_string(),
3280                tool_format: None,
3281            },
3282        );
3283
3284        let merged = merge_global_config(overlay);
3285
3286        assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
3287        assert!(merged.providers.contains_key("anthropic"));
3288        assert!(merged.providers.contains_key("ollama"));
3289        assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
3290    }
3291
3292    #[test]
3293    fn partial_provider_overlay_preserves_builtin_provider_metadata() {
3294        let overlay = parse_config_toml(
3295            r#"
3296            [providers.ollama]
3297            base_url = "http://localhost:11435"
3298            extra_headers = { "x-local" = "1" }
3299            "#,
3300        )
3301        .expect("provider overlay parses");
3302
3303        let merged = merge_global_config(overlay);
3304        let ollama = merged
3305            .providers
3306            .get("ollama")
3307            .expect("ollama remains configured");
3308
3309        assert_eq!(ollama.base_url, "http://localhost:11435");
3310        assert_eq!(ollama.auth_style, "none");
3311        assert_eq!(ollama.chat_endpoint, "/api/chat");
3312        assert_eq!(ollama.completion_endpoint.as_deref(), Some("/api/generate"));
3313        assert_eq!(ollama.cost_per_1k_in, Some(0.0));
3314        assert_eq!(ollama.cost_per_1k_out, Some(0.0));
3315        assert_eq!(
3316            ollama
3317                .healthcheck
3318                .as_ref()
3319                .and_then(|healthcheck| healthcheck.path.as_deref()),
3320            Some("/api/tags")
3321        );
3322        assert_eq!(
3323            ollama.extra_headers.get("x-local").map(String::as_str),
3324            Some("1")
3325        );
3326    }
3327
3328    #[test]
3329    fn partial_provider_overlay_can_explicitly_replace_default_auth_style() {
3330        let overlay = parse_config_toml(
3331            r#"
3332            [providers.ollama]
3333            auth_style = "bearer"
3334            auth_env = "OLLAMA_API_KEY"
3335            "#,
3336        )
3337        .expect("provider overlay parses");
3338
3339        let merged = merge_global_config(overlay);
3340        let ollama = merged
3341            .providers
3342            .get("ollama")
3343            .expect("ollama remains configured");
3344
3345        assert_eq!(ollama.auth_style, "bearer");
3346        assert_eq!(auth_env_names(&ollama.auth_env), vec!["OLLAMA_API_KEY"]);
3347        assert_eq!(ollama.chat_endpoint, "/api/chat");
3348    }
3349
3350    #[test]
3351    fn test_resolve_tier_model_default_aliases() {
3352        // Exercise the alias-resolution machinery, not the specific catalog
3353        // value: the model under each tier alias evolves as the embedded
3354        // providers.toml is updated. The invariants worth pinning are the
3355        // provider routing + catalog-registration of the resolved model.
3356        let (model, provider) = resolve_tier_model("frontier", None)
3357            .expect("frontier alias must resolve from the embedded catalog");
3358        assert_eq!(provider, "anthropic");
3359        assert!(
3360            model_catalog_entry(&model)
3361                .is_some_and(|entry| entry.provider == "anthropic" && !entry.deprecated),
3362            "frontier alias must point at a registered, non-deprecated anthropic model (got {model})"
3363        );
3364
3365        let (model, provider) = resolve_tier_model("small", None)
3366            .expect("small alias must resolve from the embedded catalog");
3367        assert!(
3368            [
3369                "openrouter",
3370                "huggingface",
3371                "local",
3372                "llamacpp",
3373                "mlx",
3374                "ollama"
3375            ]
3376            .contains(&provider.as_str()),
3377            "small tier should resolve to an open-weight provider (got {provider} / {model})"
3378        );
3379    }
3380
3381    #[test]
3382    fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
3383        // tier/<provider> takes precedence over generic tier when the
3384        // caller scopes by provider. Don't pin the specific model — the
3385        // catalog evolves.
3386        let (model, provider) = resolve_tier_model("mid", Some("openai"))
3387            .expect("mid tier scoped to openai must resolve");
3388        assert_eq!(provider, "openai");
3389        assert!(
3390            model_catalog_entry(&model).is_some(),
3391            "mid/openai alias must point at a registered model (got {model})"
3392        );
3393    }
3394
3395    #[test]
3396    fn test_provider_config_anthropic() {
3397        let pdef = provider_config("anthropic").unwrap();
3398        assert_eq!(pdef.auth_style, "header");
3399        assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
3400    }
3401
3402    #[test]
3403    fn test_provider_config_mlx() {
3404        let pdef = provider_config("mlx").unwrap();
3405        assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
3406        assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
3407        assert_eq!(
3408            pdef.healthcheck.unwrap().path.as_deref(),
3409            Some("/v1/models")
3410        );
3411
3412        let (model, provider) = resolve_model("mlx-qwen36-27b");
3413        assert_eq!(model, "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit");
3414        assert_eq!(provider.as_deref(), Some("mlx"));
3415    }
3416
3417    #[test]
3418    fn test_enterprise_provider_defaults_and_inference() {
3419        let bedrock = provider_config("bedrock").unwrap();
3420        assert_eq!(bedrock.auth_style, "aws_sigv4");
3421        assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
3422        assert_eq!(
3423            infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
3424            "bedrock"
3425        );
3426        assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
3427
3428        let azure = provider_config("azure_openai").unwrap();
3429        assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
3430        assert_eq!(
3431            auth_env_names(&azure.auth_env),
3432            vec![
3433                "AZURE_OPENAI_API_KEY".to_string(),
3434                "AZURE_OPENAI_AD_TOKEN".to_string(),
3435                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
3436            ]
3437        );
3438
3439        let vertex = provider_config("vertex").unwrap();
3440        assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
3441        assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
3442    }
3443
3444    #[test]
3445    fn test_default_provider_env_override_for_unknown_model() {
3446        let _guard = crate::llm::env_guard();
3447        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3448        unsafe {
3449            std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
3450        }
3451
3452        let inference = infer_provider_detail("unknown-model");
3453
3454        unsafe {
3455            match prev_default_provider {
3456                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3457                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3458            }
3459        }
3460
3461        assert_eq!(inference.provider, "openai");
3462        assert_eq!(
3463            inference.source,
3464            crate::llm::provider::ProviderInferenceSource::DefaultFallback
3465        );
3466    }
3467
3468    #[test]
3469    fn test_unknown_model_family_ignores_default_provider_fallback() {
3470        let _guard = crate::llm::env_guard();
3471        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3472        unsafe {
3473            std::env::set_var("HARN_DEFAULT_PROVIDER", "ollama");
3474        }
3475
3476        let unknown = resolve_model_info("mystery-model-xyz");
3477        let known_family = resolve_model_info("deepseek-mystery-model");
3478
3479        unsafe {
3480            match prev_default_provider {
3481                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3482                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3483            }
3484        }
3485
3486        assert_eq!(unknown.provider, "ollama");
3487        assert_eq!(unknown.family, "unknown");
3488        assert_eq!(unknown.lineage, "unknown");
3489        assert_eq!(known_family.family, "deepseek");
3490        assert_eq!(known_family.lineage, "deepseek");
3491    }
3492
3493    #[test]
3494    fn test_resolve_base_url_no_env() {
3495        let pdef = ProviderDef {
3496            base_url: "https://example.com".to_string(),
3497            ..Default::default()
3498        };
3499        assert_eq!(resolve_base_url(&pdef), "https://example.com");
3500    }
3501
3502    #[test]
3503    fn test_default_config_roundtrip() {
3504        let config = default_config();
3505        assert!(!config.providers.is_empty());
3506        assert!(!config.inference_rules.is_empty());
3507        // Tier is now declared on each model row; tier_rules is allowed
3508        // to be empty (the rule table is a legacy fallback only).
3509        assert_eq!(config.tier_defaults.default, "mid");
3510        // At least the new open-weight frontiers should have explicit tiers.
3511        let frontiers = config
3512            .models
3513            .iter()
3514            .filter(|(_, m)| m.tier.as_deref() == Some("frontier"))
3515            .count();
3516        assert!(
3517            frontiers >= 4,
3518            "expected at least 4 frontier-tagged models, got {frontiers}"
3519        );
3520    }
3521
3522    #[test]
3523    fn test_local_ollama_catalog_metadata() {
3524        reset_overrides();
3525
3526        let devstral =
3527            model_catalog_entry("devstral-small-2:24b").expect("devstral-small-2 catalog entry");
3528        assert_eq!(devstral.context_window, 262_144);
3529        assert!(!devstral.capabilities.iter().any(|cap| cap == "vision"));
3530
3531        let gemma4 = model_catalog_entry("gemma4:26b").expect("gemma4 catalog entry");
3532        assert_eq!(gemma4.context_window, 262_144);
3533        assert!(gemma4.capabilities.iter().any(|cap| cap == "vision"));
3534    }
3535
3536    #[test]
3537    fn local_gemma4_source_tags_match_structured_capability_tags() {
3538        reset_overrides();
3539        let config = default_config();
3540        for id in [
3541            "gemma-4-e2b-it",
3542            "gemma-4-e4b-it",
3543            "gemma-4-12b-it",
3544            "gemma-4-26b-a4b-it",
3545            "gemma-4-31b-it",
3546        ] {
3547            let source = config
3548                .models
3549                .get(id)
3550                .unwrap_or_else(|| panic!("{id} should be in the embedded catalog"));
3551            let derived = effective_model_capability_tags(&source.provider, id);
3552            assert_eq!(
3553                source.capabilities, derived,
3554                "{}/{} source capabilities must match derived capability_tags",
3555                source.provider, id
3556            );
3557        }
3558    }
3559
3560    #[test]
3561    fn capability_tags_include_structured_capability_flags() {
3562        let caps = crate::llm::capabilities::Capabilities {
3563            native_tools: true,
3564            tool_search: vec!["web".to_string()],
3565            vision_supported: true,
3566            audio: true,
3567            pdf: true,
3568            video: true,
3569            files_api_supported: true,
3570            prompt_caching: true,
3571            thinking_modes: vec!["enabled".to_string()],
3572            structured_output: Some("native".to_string()),
3573            ..Default::default()
3574        };
3575
3576        assert_eq!(
3577            capability_tags_from_capabilities(&caps),
3578            vec![
3579                "streaming",
3580                "tools",
3581                "tool_search",
3582                "vision",
3583                "audio",
3584                "pdf",
3585                "video",
3586                "files",
3587                "prompt_caching",
3588                "thinking",
3589                "structured_output",
3590            ]
3591        );
3592    }
3593
3594    #[test]
3595    fn test_external_config_overlays_default_catalog() {
3596        let mut config = default_config();
3597        let mut overlay = ProvidersConfig {
3598            default_provider: Some("ollama".to_string()),
3599            ..Default::default()
3600        };
3601        overlay.providers.insert(
3602            "custom".to_string(),
3603            ProviderDef {
3604                base_url: "https://llm.example.test/v1".to_string(),
3605                chat_endpoint: "/chat/completions".to_string(),
3606                ..Default::default()
3607            },
3608        );
3609
3610        config.merge_from(&overlay);
3611
3612        assert_eq!(config.default_provider.as_deref(), Some("ollama"));
3613        assert!(config.providers.contains_key("custom"));
3614        assert!(config.providers.contains_key("anthropic"));
3615        assert!(config.providers.contains_key("ollama"));
3616    }
3617
3618    #[test]
3619    fn test_model_params_empty() {
3620        let params = model_params("claude-sonnet-4-20250514");
3621        assert!(params.is_empty());
3622    }
3623
3624    #[test]
3625    fn test_user_overrides_add_provider_and_alias() {
3626        reset_overrides();
3627        let mut overlay = ProvidersConfig::default();
3628        overlay.providers.insert(
3629            "acme".to_string(),
3630            ProviderDef {
3631                base_url: "https://llm.acme.test/v1".to_string(),
3632                chat_endpoint: "/chat/completions".to_string(),
3633                ..Default::default()
3634            },
3635        );
3636        overlay.aliases.insert(
3637            "acme-fast".to_string(),
3638            AliasDef {
3639                id: "acme/model-fast".to_string(),
3640                provider: "acme".to_string(),
3641                tool_format: Some("native".to_string()),
3642            },
3643        );
3644        set_user_overrides(Some(overlay));
3645
3646        let (model, provider) = resolve_model("acme-fast");
3647        assert_eq!(model, "acme/model-fast");
3648        assert_eq!(provider.as_deref(), Some("acme"));
3649        assert!(provider_names().contains(&"acme".to_string()));
3650        assert_eq!(
3651            provider_config("acme").map(|provider| provider.base_url),
3652            Some("https://llm.acme.test/v1".to_string())
3653        );
3654
3655        reset_overrides();
3656    }
3657
3658    #[test]
3659    fn test_default_tool_format_uses_capability_matrix() {
3660        reset_overrides();
3661
3662        assert_eq!(
3663            default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
3664            "native"
3665        );
3666        // devstral dropped its stale heredoc `text` pin (it has no reserved-token
3667        // constraint, so there was no structural reason to stay on heredoc) and
3668        // now inherits the global `json` text-channel default. Heredoc is still
3669        // reachable via an explicit `preferred_tool_format = "text"` pin.
3670        assert_eq!(
3671            default_tool_format("devstral-small-2:24b", "ollama"),
3672            "json"
3673        );
3674        // vLLM/SGLang-served Gemma 4 exposes OpenAI-compatible function calling,
3675        // so the local route declares native tools (matching every hosted gemma-4
3676        // sibling) rather than degrading to a text tool format.
3677        assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "native");
3678        // deepseek-v3.2 and qwen3-coder both pin `text` in the capability
3679        // matrix, so they keep heredoc rather than inheriting the json default.
3680        assert_eq!(
3681            default_tool_format("deepseek/deepseek-v3.2", "openrouter"),
3682            "text"
3683        );
3684        assert_eq!(
3685            default_tool_format("qwen/qwen3-coder-flash", "openrouter"),
3686            "text"
3687        );
3688        // GPT-OSS tool defaults are provider-specific: aggregate OpenRouter and
3689        // Fireworks use Harn's heredoc text tools, as does DeepInfra — its
3690        // native Harmony channel drops tool calls into the private reasoning
3691        // channel (footgun), so it is pinned to text. Native-reliable hosts
3692        // (Cerebras, Groq) stay on provider-native tool calls.
3693        assert_eq!(
3694            default_tool_format("openai/gpt-oss-120b", "openrouter"),
3695            "text"
3696        );
3697        assert_eq!(
3698            default_tool_format("accounts/fireworks/models/gpt-oss-120b", "fireworks"),
3699            "text"
3700        );
3701        assert_eq!(default_tool_format("gpt-oss-120b", "cerebras"), "native");
3702        assert_eq!(
3703            default_tool_format("openai/gpt-oss-120b", "deepinfra"),
3704            "text"
3705        );
3706        assert_eq!(default_tool_format("openai/gpt-oss-120b", "groq"), "native");
3707    }
3708
3709    #[test]
3710    fn test_default_tool_format_unpinned_text_channel_is_json() {
3711        reset_overrides();
3712
3713        // GLOBAL DEFAULT FLIP: a model with no capability-matrix pin and no
3714        // native tool support resolves to fenced-json (`json`), not heredoc
3715        // (`text`). This is the behavior change — an unknown text-channel model
3716        // gets the delimiter-safe default. (Native-capable unknowns still get
3717        // `native`; pinned models still honor their pin, covered above.)
3718        assert_eq!(default_tool_format("mystery-model-xyz", "ollama"), "json");
3719    }
3720
3721    #[test]
3722    fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
3723        reset_overrides();
3724        let mut overlay = ProvidersConfig::default();
3725        overlay.models.insert(
3726            "acme/model-fast".to_string(),
3727            ModelDef {
3728                name: "Acme Fast".to_string(),
3729                provider: "acme".to_string(),
3730                context_window: 65_536,
3731                logical_model: None,
3732                equivalence_group: None,
3733                served_variant: None,
3734                wire_model: None,
3735                api_dialect: None,
3736                rate_limits: None,
3737                performance: None,
3738                architecture: None,
3739                local_memory: None,
3740                runtime_context_window: None,
3741                stream_timeout: Some(42.0),
3742                capabilities: vec!["tools".to_string(), "streaming".to_string()],
3743                pricing: Some(ModelPricing {
3744                    input_per_mtok: 1.25,
3745                    output_per_mtok: 2.5,
3746                    cache_read_per_mtok: Some(0.25),
3747                    cache_write_per_mtok: None,
3748                }),
3749                deprecated: false,
3750                deprecation_note: None,
3751                superseded_by: None,
3752                fast_mode: None,
3753                quality_tags: Vec::new(),
3754                availability: ModelAvailability::default(),
3755                tier: None,
3756                open_weight: None,
3757                strengths: Vec::new(),
3758                benchmarks: std::collections::BTreeMap::new(),
3759                family: None,
3760                lineage: None,
3761                complementary_with: Vec::new(),
3762                avoid_as_reviewer_for: Vec::new(),
3763            },
3764        );
3765        overlay
3766            .qc_defaults
3767            .insert("acme".to_string(), "acme/model-cheap".to_string());
3768        set_user_overrides(Some(overlay));
3769
3770        let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
3771        assert_eq!(entry.context_window, 65_536);
3772        assert_eq!(
3773            entry.capabilities,
3774            vec!["streaming".to_string(), "tools".to_string()]
3775        );
3776        assert_eq!(
3777            entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
3778            Some(1.25)
3779        );
3780        assert_eq!(
3781            pricing_per_1k_for("acme", "acme/model-fast"),
3782            Some((0.00125, 0.0025))
3783        );
3784        assert_eq!(
3785            qc_default_model("acme").as_deref(),
3786            Some("acme/model-cheap")
3787        );
3788
3789        reset_overrides();
3790    }
3791
3792    #[test]
3793    fn test_user_overrides_prepend_inference_rules() {
3794        reset_overrides();
3795        let mut overlay = ProvidersConfig::default();
3796        overlay.inference_rules.push(InferenceRule {
3797            pattern: Some("internal-*".to_string()),
3798            contains: None,
3799            exact: None,
3800            provider: "openai".to_string(),
3801        });
3802        set_user_overrides(Some(overlay));
3803
3804        assert_eq!(infer_provider("internal-foo"), "openai");
3805
3806        reset_overrides();
3807    }
3808
3809    // ── Embedded providers.toml invariants ───────────────────────────────────
3810    // These tests pin properties of the *system* — TOML parses, every
3811    // alias resolves, every deprecated model has a note — without
3812    // pinning specific catalog values. They survive future catalog
3813    // churn and surface real schema breakage.
3814
3815    #[test]
3816    fn embedded_providers_toml_parses_and_is_not_trivially_empty() {
3817        let config = default_config();
3818        assert!(
3819            config.providers.len() >= 10,
3820            "expected >=10 providers in embedded catalog, got {}",
3821            config.providers.len()
3822        );
3823        assert!(
3824            config.models.len() >= 20,
3825            "expected >=20 models in embedded catalog, got {}",
3826            config.models.len()
3827        );
3828        assert!(
3829            config.aliases.len() >= 15,
3830            "expected >=15 aliases in embedded catalog, got {}",
3831            config.aliases.len()
3832        );
3833        assert_eq!(config.default_provider.as_deref(), Some("anthropic"));
3834    }
3835
3836    #[test]
3837    fn embedded_catalog_every_deprecated_model_has_a_note() {
3838        let config = default_config();
3839        let offenders: Vec<&str> = config
3840            .models
3841            .iter()
3842            .filter(|(_, model)| {
3843                model.deprecated
3844                    && model
3845                        .deprecation_note
3846                        .as_deref()
3847                        .unwrap_or("")
3848                        .trim()
3849                        .is_empty()
3850            })
3851            .map(|(id, _)| id.as_str())
3852            .collect();
3853        assert!(
3854            offenders.is_empty(),
3855            "deprecated models missing a deprecation_note: {offenders:?}"
3856        );
3857    }
3858
3859    #[test]
3860    fn embedded_cerebras_catalog_separates_public_and_dedicated_routes() {
3861        let config = default_config();
3862        for id in ["gpt-oss-120b", "zai-glm-4.7"] {
3863            let model = config.models.get(id).expect("current public Cerebras row");
3864            assert_eq!(model.provider, "cerebras");
3865            assert_eq!(model.availability, ModelAvailability::Serverless);
3866            assert!(!model.deprecated);
3867        }
3868
3869        let llama = config
3870            .models
3871            .get("llama-3.3-70b")
3872            .expect("legacy Cerebras row");
3873        assert_eq!(llama.provider, "cerebras");
3874        assert_eq!(llama.availability, ModelAvailability::Dedicated);
3875        assert!(llama.deprecated);
3876    }
3877
3878    #[test]
3879    fn embedded_openrouter_gpt_oss_120b_has_no_fragment_bleed() {
3880        // Regression for the provider-catalog leading-key bleed: the openrouter
3881        // `openai/gpt-oss-120b` row was the last model in its fragment with no
3882        // inline tier/open_weight/strengths, so the next fragment's leading bare
3883        // keys reattached to it after raw-text concatenation — mislabeling it as
3884        // `open_weight = false` with a spurious `vision` strength. It must now be
3885        // self-described: open weight, no vision, and a tier consistent with the
3886        // rest of its equivalence group.
3887        let config = default_config();
3888        let model = config
3889            .models
3890            .get("openai/gpt-oss-120b")
3891            .expect("openrouter gpt-oss-120b row");
3892        assert_eq!(model.provider, "openrouter");
3893        assert_eq!(
3894            model.open_weight,
3895            Some(true),
3896            "gpt-oss-120b is Apache-2.0 open weight, not the bled-in open_weight=false"
3897        );
3898        assert!(
3899            !model.strengths.iter().any(|s| s == "vision"),
3900            "gpt-oss-120b is text-only; the bled-in `vision` strength must be gone: {:?}",
3901            model.strengths
3902        );
3903        assert!(
3904            !model.strengths.is_empty(),
3905            "gpt-oss-120b must carry its own strengths, not None"
3906        );
3907
3908        // tier is a property of the logical model: every active row in the
3909        // openai-gpt-oss-120b equivalence group must agree.
3910        let group_tiers: std::collections::BTreeSet<_> = config
3911            .models
3912            .values()
3913            .filter(|m| {
3914                m.equivalence_group.as_deref() == Some("openai-gpt-oss-120b") && !m.deprecated
3915            })
3916            .map(|m| m.tier.clone())
3917            .collect();
3918        assert_eq!(
3919            group_tiers.len(),
3920            1,
3921            "openai-gpt-oss-120b group must share one tier, got {group_tiers:?}"
3922        );
3923    }
3924
3925    #[test]
3926    fn embedded_catalog_every_model_targets_a_registered_provider() {
3927        let config = default_config();
3928        let known: std::collections::BTreeSet<&str> =
3929            config.providers.keys().map(String::as_str).collect();
3930        let orphans: Vec<(&str, &str)> = config
3931            .models
3932            .iter()
3933            .filter(|(_, model)| !known.contains(model.provider.as_str()))
3934            .map(|(id, model)| (id.as_str(), model.provider.as_str()))
3935            .collect();
3936        assert!(
3937            orphans.is_empty(),
3938            "models reference unknown providers: {orphans:?}"
3939        );
3940    }
3941
3942    #[test]
3943    fn embedded_catalog_every_alias_targets_a_registered_provider() {
3944        let config = default_config();
3945        let known: std::collections::BTreeSet<&str> =
3946            config.providers.keys().map(String::as_str).collect();
3947        let orphans: Vec<(&str, &str)> = config
3948            .aliases
3949            .iter()
3950            .filter(|(_, alias)| !known.contains(alias.provider.as_str()))
3951            .map(|(name, alias)| (name.as_str(), alias.provider.as_str()))
3952            .collect();
3953        assert!(
3954            orphans.is_empty(),
3955            "aliases reference unknown providers: {orphans:?}"
3956        );
3957    }
3958
3959    #[test]
3960    fn embedded_catalog_every_qc_default_targets_a_known_model() {
3961        let config = default_config();
3962        let orphans: Vec<(&str, &str)> = config
3963            .qc_defaults
3964            .iter()
3965            .filter(|(_, model_id)| !config.models.contains_key(model_id.as_str()))
3966            .map(|(provider, model_id)| (provider.as_str(), model_id.as_str()))
3967            .collect();
3968        assert!(
3969            orphans.is_empty(),
3970            "qc_defaults reference unknown models: {orphans:?}"
3971        );
3972    }
3973
3974    #[test]
3975    fn embedded_catalog_pricing_rates_are_non_negative() {
3976        let config = default_config();
3977        for (id, model) in &config.models {
3978            let Some(pricing) = &model.pricing else {
3979                continue;
3980            };
3981            assert!(
3982                pricing.input_per_mtok >= 0.0 && pricing.output_per_mtok >= 0.0,
3983                "{id}: negative pricing — in={} out={}",
3984                pricing.input_per_mtok,
3985                pricing.output_per_mtok
3986            );
3987            if let Some(rate) = pricing.cache_read_per_mtok {
3988                assert!(rate >= 0.0, "{id}: negative cache_read rate {rate}");
3989            }
3990            if let Some(rate) = pricing.cache_write_per_mtok {
3991                assert!(rate >= 0.0, "{id}: negative cache_write rate {rate}");
3992            }
3993        }
3994    }
3995
3996    #[test]
3997    fn model_availability_parses_known_strings() {
3998        assert_eq!(
3999            ModelAvailability::parse("serverless"),
4000            Some(ModelAvailability::Serverless)
4001        );
4002        assert_eq!(
4003            ModelAvailability::parse("dedicated"),
4004            Some(ModelAvailability::Dedicated)
4005        );
4006        assert_eq!(
4007            ModelAvailability::parse("unknown"),
4008            Some(ModelAvailability::Unknown)
4009        );
4010        assert_eq!(ModelAvailability::parse("provisioned"), None);
4011        for value in [
4012            ModelAvailability::Serverless,
4013            ModelAvailability::Dedicated,
4014            ModelAvailability::Unknown,
4015        ] {
4016            assert_eq!(ModelAvailability::parse(value.as_str()), Some(value));
4017        }
4018    }
4019
4020    #[test]
4021    fn embedded_catalog_marks_together_dedicated_route_as_dedicated() {
4022        let config = default_config();
4023        let model = config
4024            .models
4025            .get("Qwen/Qwen3-Coder-Next-FP8")
4026            .expect("Together Qwen3 Coder Next FP8 is cataloged");
4027        assert_eq!(model.provider, "together");
4028        assert_eq!(model.availability, ModelAvailability::Dedicated);
4029    }
4030
4031    #[test]
4032    fn embedded_catalog_dedicated_models_are_not_targeted_by_tier_aliases() {
4033        // A dedicated-only model behind a tier alias would silently fail
4034        // every serverless caller; the catalog must keep those routes
4035        // separated.
4036        let config = default_config();
4037        let dedicated: std::collections::BTreeSet<(&str, &str)> = config
4038            .models
4039            .iter()
4040            .filter(|(_, model)| model.availability == ModelAvailability::Dedicated)
4041            .map(|(id, model)| (model.provider.as_str(), id.as_str()))
4042            .collect();
4043        for (name, alias) in &config.aliases {
4044            if matches!(
4045                name.as_str(),
4046                "frontier"
4047                    | "mid"
4048                    | "small"
4049                    | "tier/frontier"
4050                    | "tier/mid"
4051                    | "tier/small"
4052                    | "sonnet"
4053                    | "opus"
4054                    | "haiku"
4055            ) {
4056                assert!(
4057                    !dedicated.contains(&(alias.provider.as_str(), alias.id.as_str())),
4058                    "tier alias `{name}` targets dedicated-only route `{}/{}`",
4059                    alias.provider,
4060                    alias.id,
4061                );
4062            }
4063        }
4064    }
4065
4066    #[test]
4067    fn embedded_catalog_tier_aliases_resolve_to_active_models() {
4068        // The three canonical tier aliases (frontier / mid / small) MUST
4069        // resolve to non-deprecated catalog entries; a default that
4070        // routes the loop into a sunsetted model is a release blocker.
4071        for alias in ["frontier", "mid", "small"] {
4072            let (model, _provider) = resolve_tier_model(alias, None)
4073                .unwrap_or_else(|| panic!("tier alias `{alias}` must resolve"));
4074            let entry = model_catalog_entry(&model).unwrap_or_else(|| {
4075                panic!("tier alias `{alias}` -> `{model}` must be a registered catalog entry")
4076            });
4077            assert!(
4078                !entry.deprecated,
4079                "tier alias `{alias}` resolves to deprecated model `{model}` ({:?})",
4080                entry.deprecation_note
4081            );
4082        }
4083    }
4084
4085    #[test]
4086    fn opus_alias_tracks_claude_opus_4_8_with_fast_mode() {
4087        // The `opus` alias must follow the newest Opus release, and that
4088        // release advertises its (off-by-default) fast-mode tier.
4089        let (model, provider) = resolve_model("opus");
4090        assert_eq!(model, "claude-opus-4-8");
4091        assert_eq!(provider.as_deref(), Some("anthropic"));
4092
4093        let opus48 = model_catalog_entry("claude-opus-4-8").expect("opus 4.8 catalog entry");
4094        assert!(!opus48.deprecated, "newest Opus must not be deprecated");
4095        let fast = opus48.fast_mode.expect("opus 4.8 advertises fast mode");
4096        assert_eq!(fast.param, "speed");
4097        assert_eq!(fast.value, "fast");
4098        assert_eq!(fast.status.as_deref(), Some("research_preview"));
4099        let fast_pricing = fast.pricing.expect("fast mode carries premium pricing");
4100        let standard = opus48.pricing.expect("opus 4.8 standard pricing");
4101        assert!(
4102            fast_pricing.input_per_mtok > standard.input_per_mtok,
4103            "fast mode must be premium-priced relative to standard"
4104        );
4105    }
4106
4107    #[test]
4108    fn superseded_opus_models_point_at_claude_opus_4_8() {
4109        // Earlier Opus rows are deprecated and carry a structured
4110        // `superseded_by` pointer to the current flagship.
4111        for model in ["claude-opus-4-7", "claude-opus-4-6"] {
4112            let entry =
4113                model_catalog_entry(model).unwrap_or_else(|| panic!("{model} catalog entry"));
4114            assert!(entry.deprecated, "{model} should be deprecated");
4115            assert_eq!(
4116                entry.superseded_by.as_deref(),
4117                Some("claude-opus-4-8"),
4118                "{model} should be superseded by claude-opus-4-8"
4119            );
4120        }
4121    }
4122
4123    #[test]
4124    fn opus_46_no_longer_advertises_fast_mode() {
4125        let opus46 = model_catalog_entry("claude-opus-4-6").expect("opus 4.6 catalog entry");
4126        assert!(
4127            opus46.fast_mode.is_none(),
4128            "Anthropic removed Opus 4.6 fast mode on 2026-06-29; Harn should not advertise it"
4129        );
4130
4131        let opus47 = model_catalog_entry("claude-opus-4-7").expect("opus 4.7 catalog entry");
4132        assert!(
4133            opus47.fast_mode.is_some(),
4134            "Opus 4.7 still advertises its own fast-mode tier"
4135        );
4136    }
4137
4138    #[test]
4139    fn gpt_5_5_fast_mode_rides_service_tier() {
4140        // Fast mode is provider-agnostic: OpenAI exposes it through the
4141        // `service_tier` knob rather than Anthropic's `speed`.
4142        let entry = model_catalog_entry("gpt-5.5").expect("gpt-5.5 catalog entry");
4143        let fast = entry.fast_mode.expect("gpt-5.5 advertises a fast tier");
4144        assert_eq!(fast.param, "service_tier");
4145        assert_eq!(fast.status.as_deref(), Some("ga"));
4146    }
4147}
harn_vm/llm_config.rs

harn_vm/
llm_config.rs