harn_vm/
llm_config.rs

1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::{BTreeMap, BTreeSet};
4use std::sync::atomic::{AtomicBool, Ordering};
5use std::sync::{OnceLock, RwLock};
6
7static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
8static CONFIG_PATH: OnceLock<String> = OnceLock::new();
9static RUNTIME_CATALOG_OVERLAY: OnceLock<RwLock<Option<ProvidersConfig>>> = OnceLock::new();
10
11thread_local! {
12    /// Thread-local provider config overlays installed by the CLI after it
13    /// reads the nearest `harn.toml` plus any installed package manifests.
14    /// Kept thread-local so tests and multi-VM hosts can scope extensions to
15    /// the current run without mutating the process-wide default config.
16    static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
17}
18
19#[derive(Debug, Clone, Deserialize, Default)]
20pub struct ProvidersConfig {
21    #[serde(default)]
22    pub default_provider: Option<String>,
23    #[serde(default)]
24    pub providers: BTreeMap<String, ProviderDef>,
25    #[serde(default)]
26    pub aliases: BTreeMap<String, AliasDef>,
27    #[serde(default)]
28    pub alias_tool_calling: BTreeMap<String, AliasToolCallingDef>,
29    #[serde(default)]
30    pub models: BTreeMap<String, ModelDef>,
31    #[serde(default)]
32    pub qc_defaults: BTreeMap<String, String>,
33    #[serde(default)]
34    pub inference_rules: Vec<InferenceRule>,
35    #[serde(default)]
36    pub tier_rules: Vec<TierRule>,
37    #[serde(default)]
38    pub tier_defaults: TierDefaults,
39    #[serde(default)]
40    pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
41    #[serde(default)]
42    pub model_roles: BTreeMap<String, BTreeMap<String, toml::Value>>,
43    #[serde(default)]
44    pub suppress: SuppressDef,
45}
46
47/// Routes hidden from the exported/served provider catalog artifact.
48///
49/// Lets an overlay drop baseline routes that are broken or unusable for the
50/// embedding product (e.g. a dedicated-only serving route, or a local image
51/// with a broken server-side tool parser) without forking the baseline
52/// catalog. Suppression is artifact-level presentation: it removes the model
53/// row, its aliases, and any recommendation variant derived from it, but does
54/// not block runtime resolution of an explicitly requested model id.
55///
56/// Combined with the overlay's whole-row `models` replacement, this also
57/// expresses route renames: define the row under the new id and suppress the
58/// old one.
59#[derive(Debug, Clone, Deserialize, Default, PartialEq, Eq)]
60pub struct SuppressDef {
61    /// `"provider:model_id"` selectors. Split on the FIRST colon only —
62    /// model ids may themselves contain colons (e.g. Ollama image tags such
63    /// as `ollama:qwen3.6:35b-a3b-coding-nvfp4`). Entries without a colon
64    /// match nothing.
65    #[serde(default)]
66    pub routes: Vec<String>,
67}
68
69impl ProvidersConfig {
70    pub fn is_empty(&self) -> bool {
71        self.default_provider.is_none()
72            && self.providers.is_empty()
73            && self.aliases.is_empty()
74            && self.alias_tool_calling.is_empty()
75            && self.models.is_empty()
76            && self.qc_defaults.is_empty()
77            && self.inference_rules.is_empty()
78            && self.tier_rules.is_empty()
79            && self.model_defaults.is_empty()
80            && self.model_roles.is_empty()
81            && self.suppress.routes.is_empty()
82            && self.tier_defaults.default == default_mid()
83    }
84
85    pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
86        for (name, provider) in &overlay.providers {
87            match self.providers.get_mut(name) {
88                Some(existing) => existing.merge_from(provider),
89                None => {
90                    self.providers.insert(name.clone(), provider.clone());
91                }
92            }
93        }
94        self.aliases.extend(overlay.aliases.clone());
95        self.alias_tool_calling
96            .extend(overlay.alias_tool_calling.clone());
97        self.models.extend(overlay.models.clone());
98        self.qc_defaults.extend(overlay.qc_defaults.clone());
99
100        if overlay.default_provider.is_some() {
101            self.default_provider = overlay.default_provider.clone();
102        }
103
104        if !overlay.inference_rules.is_empty() {
105            let mut merged = overlay.inference_rules.clone();
106            merged.extend(self.inference_rules.clone());
107            self.inference_rules = merged;
108        }
109
110        if !overlay.tier_rules.is_empty() {
111            let mut merged = overlay.tier_rules.clone();
112            merged.extend(self.tier_rules.clone());
113            self.tier_rules = merged;
114        }
115
116        if overlay.tier_defaults.default != default_mid() {
117            self.tier_defaults = overlay.tier_defaults.clone();
118        }
119
120        for (pattern, defaults) in &overlay.model_defaults {
121            self.model_defaults
122                .entry(pattern.clone())
123                .or_default()
124                .extend(defaults.clone());
125        }
126
127        for (role, defaults) in &overlay.model_roles {
128            self.model_roles
129                .entry(role.clone())
130                .or_default()
131                .extend(defaults.clone());
132        }
133
134        for route in &overlay.suppress.routes {
135            if !self.suppress.routes.contains(route) {
136                self.suppress.routes.push(route.clone());
137            }
138        }
139    }
140}
141
142#[derive(Debug, Clone)]
143pub struct ProviderDef {
144    pub display_name: Option<String>,
145    pub icon: Option<String>,
146    /// Provider protocol. Omitted providers use Harn's normal HTTP provider
147    /// path; `acp` launches an Agent Client Protocol server and drives it as
148    /// an agent-backed provider.
149    pub protocol: Option<String>,
150    pub base_url: String,
151    pub base_url_env: Option<String>,
152    pub auth_style: String,
153    pub auth_header: Option<String>,
154    pub auth_env: AuthEnv,
155    pub extra_headers: BTreeMap<String, String>,
156    pub chat_endpoint: String,
157    pub completion_endpoint: Option<String>,
158    pub command: Option<String>,
159    pub args: Vec<String>,
160    pub env: BTreeMap<String, String>,
161    pub cwd: Option<String>,
162    pub mcp_servers: Vec<serde_json::Value>,
163    pub healthcheck: Option<HealthcheckDef>,
164    /// Local runtime lifecycle metadata used by `harn local launch/stop`.
165    /// This is intentionally separate from provider process fields such as
166    /// `command`/`args`, which are used for ACP or external provider adapters.
167    pub local_runtime: Option<LocalRuntimeDef>,
168    pub features: Vec<String>,
169    /// Fallback provider name to try if this provider fails.
170    pub fallback: Option<String>,
171    /// Number of retries before falling back (default 0).
172    pub retry_count: Option<u32>,
173    /// Delay between retries in milliseconds (default 1000).
174    pub retry_delay_ms: Option<u64>,
175    /// Maximum requests per minute. None = unlimited.
176    pub rpm: Option<u32>,
177    /// Rich provider quota metadata. `rpm` remains as a legacy shorthand;
178    /// when both are present, this nested shape is the authoritative catalog
179    /// record and callers can still read the flattened `rpm`.
180    pub rate_limits: Option<RateLimitsDef>,
181    /// Provider/catalog pricing in USD per 1k input tokens.
182    pub cost_per_1k_in: Option<f64>,
183    /// Provider/catalog pricing in USD per 1k output tokens.
184    pub cost_per_1k_out: Option<f64>,
185    /// Observed or configured p50 latency in milliseconds.
186    pub latency_p50_ms: Option<u64>,
187    /// Optional provider-level serving performance observations.
188    pub performance: Option<ServingPerformanceDef>,
189    #[doc(hidden)]
190    pub auth_style_explicit: bool,
191}
192
193#[derive(Debug, Clone, Deserialize)]
194struct ProviderDefWire {
195    #[serde(default)]
196    display_name: Option<String>,
197    #[serde(default)]
198    icon: Option<String>,
199    #[serde(default)]
200    protocol: Option<String>,
201    #[serde(default)]
202    base_url: String,
203    #[serde(default)]
204    base_url_env: Option<String>,
205    #[serde(default)]
206    auth_style: Option<String>,
207    #[serde(default)]
208    auth_header: Option<String>,
209    #[serde(default)]
210    auth_env: AuthEnv,
211    #[serde(default)]
212    extra_headers: BTreeMap<String, String>,
213    #[serde(default)]
214    chat_endpoint: String,
215    #[serde(default)]
216    completion_endpoint: Option<String>,
217    #[serde(default)]
218    command: Option<String>,
219    #[serde(default)]
220    args: Vec<String>,
221    #[serde(default)]
222    env: BTreeMap<String, String>,
223    #[serde(default)]
224    cwd: Option<String>,
225    #[serde(default)]
226    mcp_servers: Vec<serde_json::Value>,
227    #[serde(default)]
228    healthcheck: Option<HealthcheckDef>,
229    #[serde(default)]
230    local_runtime: Option<LocalRuntimeDef>,
231    #[serde(default)]
232    features: Vec<String>,
233    #[serde(default)]
234    fallback: Option<String>,
235    #[serde(default)]
236    retry_count: Option<u32>,
237    #[serde(default)]
238    retry_delay_ms: Option<u64>,
239    #[serde(default)]
240    rpm: Option<u32>,
241    #[serde(default)]
242    rate_limits: Option<RateLimitsDef>,
243    #[serde(default)]
244    cost_per_1k_in: Option<f64>,
245    #[serde(default)]
246    cost_per_1k_out: Option<f64>,
247    #[serde(default)]
248    latency_p50_ms: Option<u64>,
249    #[serde(default)]
250    performance: Option<ServingPerformanceDef>,
251}
252
253impl<'de> Deserialize<'de> for ProviderDef {
254    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
255    where
256        D: serde::Deserializer<'de>,
257    {
258        let wire = ProviderDefWire::deserialize(deserializer)?;
259        let auth_style_explicit = wire.auth_style.is_some();
260        Ok(Self {
261            display_name: wire.display_name,
262            icon: wire.icon,
263            protocol: wire.protocol,
264            base_url: wire.base_url,
265            base_url_env: wire.base_url_env,
266            auth_style: wire.auth_style.unwrap_or_else(default_bearer),
267            auth_header: wire.auth_header,
268            auth_env: wire.auth_env,
269            extra_headers: wire.extra_headers,
270            chat_endpoint: wire.chat_endpoint,
271            completion_endpoint: wire.completion_endpoint,
272            command: wire.command,
273            args: wire.args,
274            env: wire.env,
275            cwd: wire.cwd,
276            mcp_servers: wire.mcp_servers,
277            healthcheck: wire.healthcheck,
278            local_runtime: wire.local_runtime,
279            features: wire.features,
280            fallback: wire.fallback,
281            retry_count: wire.retry_count,
282            retry_delay_ms: wire.retry_delay_ms,
283            rpm: wire.rpm,
284            rate_limits: wire.rate_limits,
285            cost_per_1k_in: wire.cost_per_1k_in,
286            cost_per_1k_out: wire.cost_per_1k_out,
287            latency_p50_ms: wire.latency_p50_ms,
288            performance: wire.performance,
289            auth_style_explicit,
290        })
291    }
292}
293
294impl Default for ProviderDef {
295    fn default() -> Self {
296        Self {
297            display_name: None,
298            icon: None,
299            protocol: None,
300            base_url: String::new(),
301            base_url_env: None,
302            auth_style: default_bearer(),
303            auth_header: None,
304            auth_env: AuthEnv::None,
305            extra_headers: BTreeMap::new(),
306            chat_endpoint: String::new(),
307            completion_endpoint: None,
308            command: None,
309            args: Vec::new(),
310            env: BTreeMap::new(),
311            cwd: None,
312            mcp_servers: Vec::new(),
313            healthcheck: None,
314            local_runtime: None,
315            features: Vec::new(),
316            fallback: None,
317            retry_count: None,
318            retry_delay_ms: None,
319            rpm: None,
320            rate_limits: None,
321            cost_per_1k_in: None,
322            cost_per_1k_out: None,
323            latency_p50_ms: None,
324            performance: None,
325            auth_style_explicit: false,
326        }
327    }
328}
329
330impl ProviderDef {
331    fn merge_from(&mut self, overlay: &ProviderDef) {
332        merge_option(&mut self.display_name, &overlay.display_name);
333        merge_option(&mut self.icon, &overlay.icon);
334        merge_option(&mut self.protocol, &overlay.protocol);
335        merge_string(&mut self.base_url, &overlay.base_url);
336        merge_option(&mut self.base_url_env, &overlay.base_url_env);
337        let overlay_uses_default_auth_style = overlay.auth_style == default_bearer();
338        if overlay.auth_style_explicit
339            || !overlay_uses_default_auth_style
340            || self.auth_style == default_bearer()
341        {
342            self.auth_style = overlay.auth_style.clone();
343            self.auth_style_explicit |=
344                overlay.auth_style_explicit || !overlay_uses_default_auth_style;
345        }
346        merge_option(&mut self.auth_header, &overlay.auth_header);
347        if !overlay.auth_env.is_none() {
348            self.auth_env = overlay.auth_env.clone();
349        }
350        self.extra_headers.extend(overlay.extra_headers.clone());
351        merge_string(&mut self.chat_endpoint, &overlay.chat_endpoint);
352        merge_option(&mut self.completion_endpoint, &overlay.completion_endpoint);
353        merge_option(&mut self.command, &overlay.command);
354        merge_vec(&mut self.args, &overlay.args);
355        self.env.extend(overlay.env.clone());
356        merge_option(&mut self.cwd, &overlay.cwd);
357        merge_vec(&mut self.mcp_servers, &overlay.mcp_servers);
358        merge_option(&mut self.healthcheck, &overlay.healthcheck);
359        merge_option(&mut self.local_runtime, &overlay.local_runtime);
360        merge_vec(&mut self.features, &overlay.features);
361        merge_option(&mut self.fallback, &overlay.fallback);
362        merge_option(&mut self.retry_count, &overlay.retry_count);
363        merge_option(&mut self.retry_delay_ms, &overlay.retry_delay_ms);
364        merge_option(&mut self.rpm, &overlay.rpm);
365        merge_option(&mut self.rate_limits, &overlay.rate_limits);
366        merge_option(&mut self.cost_per_1k_in, &overlay.cost_per_1k_in);
367        merge_option(&mut self.cost_per_1k_out, &overlay.cost_per_1k_out);
368        merge_option(&mut self.latency_p50_ms, &overlay.latency_p50_ms);
369        merge_option(&mut self.performance, &overlay.performance);
370    }
371}
372
373fn merge_option<T: Clone>(base: &mut Option<T>, overlay: &Option<T>) {
374    if overlay.is_some() {
375        *base = overlay.clone();
376    }
377}
378
379fn merge_string(base: &mut String, overlay: &str) {
380    if !overlay.is_empty() {
381        *base = overlay.to_string();
382    }
383}
384
385fn merge_vec<T: Clone>(base: &mut Vec<T>, overlay: &[T]) {
386    if !overlay.is_empty() {
387        *base = overlay.to_vec();
388    }
389}
390
391fn default_bearer() -> String {
392    "bearer".to_string()
393}
394
395/// Auth env var name(s) for the provider. Can be a single string or an array
396/// (tried in order until one is set).
397#[derive(Debug, Clone, Deserialize, Default)]
398#[serde(untagged)]
399pub enum AuthEnv {
400    #[default]
401    None,
402    Single(String),
403    Multiple(Vec<String>),
404}
405
406impl AuthEnv {
407    fn is_none(&self) -> bool {
408        matches!(self, AuthEnv::None)
409    }
410}
411
412#[derive(Debug, Clone, Deserialize)]
413pub struct HealthcheckDef {
414    pub method: String,
415    #[serde(default)]
416    pub path: Option<String>,
417    #[serde(default)]
418    pub url: Option<String>,
419    #[serde(default)]
420    pub body: Option<String>,
421}
422
423#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
424pub struct LocalRuntimeDef {
425    /// Lifecycle style: `daemon_api` for runtimes with their own resident
426    /// daemon (Ollama), `managed_process` for Harn-spawned servers.
427    #[serde(default, skip_serializing_if = "Option::is_none")]
428    pub kind: Option<String>,
429    /// Command Harn should execute for managed-process runtimes.
430    #[serde(default, skip_serializing_if = "Option::is_none")]
431    pub command: Option<String>,
432    /// Arguments that must appear immediately after the command, before model
433    /// and server flags. Used by CLIs such as `vllm serve ...`.
434    #[serde(default, skip_serializing_if = "Vec::is_empty")]
435    pub prefix_args: Vec<String>,
436    /// Default model source/path/repo. User overlays may set this; embedded
437    /// catalog rows avoid machine-specific absolute paths except examples.
438    #[serde(default, skip_serializing_if = "Option::is_none")]
439    pub model_source: Option<String>,
440    /// Environment variable that can provide a model source.
441    #[serde(default, skip_serializing_if = "Option::is_none")]
442    pub model_source_env: Option<String>,
443    /// Default port when the provider base URL has none.
444    #[serde(default, skip_serializing_if = "Option::is_none")]
445    pub default_port: Option<u16>,
446    /// Argument names used by the runtime CLI.
447    #[serde(default, skip_serializing_if = "Option::is_none")]
448    pub model_arg: Option<String>,
449    #[serde(default, skip_serializing_if = "Option::is_none")]
450    pub served_model_arg: Option<String>,
451    #[serde(default, skip_serializing_if = "Option::is_none")]
452    pub host_arg: Option<String>,
453    #[serde(default, skip_serializing_if = "Option::is_none")]
454    pub port_arg: Option<String>,
455    #[serde(default, skip_serializing_if = "Option::is_none")]
456    pub ctx_arg: Option<String>,
457    #[serde(default, skip_serializing_if = "Option::is_none")]
458    pub parallel_arg: Option<String>,
459    #[serde(default, skip_serializing_if = "Option::is_none")]
460    pub gpu_layers_arg: Option<String>,
461    #[serde(default, skip_serializing_if = "Option::is_none")]
462    pub cache_type_k_arg: Option<String>,
463    #[serde(default, skip_serializing_if = "Option::is_none")]
464    pub cache_type_v_arg: Option<String>,
465    #[serde(default, skip_serializing_if = "Option::is_none")]
466    pub cache_ram_arg: Option<String>,
467    /// Flag that enables adapter-aware serving for LoRA-capable runtimes.
468    #[serde(default, skip_serializing_if = "Option::is_none")]
469    pub enable_lora_arg: Option<String>,
470    /// Flag that accepts one or more `name=path-or-repo` LoRA module specs.
471    #[serde(default, skip_serializing_if = "Option::is_none")]
472    pub lora_modules_arg: Option<String>,
473    /// Optional rank-limit flag for runtimes that need an explicit ceiling.
474    #[serde(default, skip_serializing_if = "Option::is_none")]
475    pub max_lora_rank_arg: Option<String>,
476    /// Extra arguments Harn applies by default when launching this runtime.
477    #[serde(default, skip_serializing_if = "Vec::is_empty")]
478    pub default_args: Vec<String>,
479    /// Stop strategy: `keep_alive_zero`, `pid`, or `external`.
480    #[serde(default, skip_serializing_if = "Option::is_none")]
481    pub stop: Option<String>,
482    /// Official docs/source URL for the lifecycle contract.
483    #[serde(default, skip_serializing_if = "Option::is_none")]
484    pub source_url: Option<String>,
485    /// YYYY-MM-DD date when the local runtime row was last verified.
486    #[serde(default, skip_serializing_if = "Option::is_none")]
487    pub last_verified: Option<String>,
488    /// Short operational note surfaced by CLI docs/help.
489    #[serde(default, skip_serializing_if = "Option::is_none")]
490    pub notes: Option<String>,
491}
492
493#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
494pub struct LocalMemoryDef {
495    /// Empirical resident memory observed for this route/runtime.
496    #[serde(default, skip_serializing_if = "Option::is_none")]
497    pub measured_resident_gib: Option<f64>,
498    /// Context size used for the empirical measurement.
499    #[serde(default, skip_serializing_if = "Option::is_none")]
500    pub measured_context_window: Option<u64>,
501    /// KV-cache type used for the empirical measurement.
502    #[serde(default, skip_serializing_if = "Option::is_none")]
503    pub measured_cache_type: Option<String>,
504    /// Approximate non-context resident footprint for this model/runtime.
505    #[serde(default, skip_serializing_if = "Option::is_none")]
506    pub base_resident_gib: Option<f64>,
507    /// Approximate GiB consumed by KV cache per 1,000 context tokens at the
508    /// default cache type.
509    #[serde(default, skip_serializing_if = "Option::is_none")]
510    pub kv_cache_gib_per_1k_ctx: Option<f64>,
511    /// Cache-type multiplier relative to `kv_cache_gib_per_1k_ctx`.
512    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
513    pub cache_type_multipliers: BTreeMap<String, f64>,
514    /// Cache type assumed when the launch command does not set K/V cache.
515    #[serde(default, skip_serializing_if = "Option::is_none")]
516    pub default_cache_type: Option<String>,
517    /// Minimum headroom Harn should leave for the OS and other apps.
518    #[serde(default, skip_serializing_if = "Option::is_none")]
519    pub safety_margin_gib: Option<f64>,
520    /// Highest context Harn should recommend automatically from this row.
521    #[serde(default, skip_serializing_if = "Option::is_none")]
522    pub max_recommended_context: Option<u64>,
523    /// Official or empirical source for the sizing row.
524    #[serde(default, skip_serializing_if = "Option::is_none")]
525    pub source_url: Option<String>,
526    /// YYYY-MM-DD date when the sizing row was last verified.
527    #[serde(default, skip_serializing_if = "Option::is_none")]
528    pub last_verified: Option<String>,
529    /// Short operational note surfaced by CLI diagnostics/docs.
530    #[serde(default, skip_serializing_if = "Option::is_none")]
531    pub notes: Option<String>,
532}
533
534impl LocalMemoryDef {
535    pub fn is_empty(&self) -> bool {
536        self.measured_resident_gib.is_none()
537            && self.measured_context_window.is_none()
538            && self.measured_cache_type.is_none()
539            && self.base_resident_gib.is_none()
540            && self.kv_cache_gib_per_1k_ctx.is_none()
541            && self.cache_type_multipliers.is_empty()
542            && self.default_cache_type.is_none()
543            && self.safety_margin_gib.is_none()
544            && self.max_recommended_context.is_none()
545            && self.source_url.is_none()
546            && self.last_verified.is_none()
547            && self.notes.is_none()
548    }
549}
550
551#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
552pub struct AliasDef {
553    pub id: String,
554    pub provider: String,
555    /// Per-model tool format override: "native" or "text". When set, this
556    /// takes precedence over the provider-level default. Models with strong
557    /// tool-calling fine-tuning (Kimi-K2.5, GPT-4o) should use "native";
558    /// models better served by text-based tool calling use "text".
559    #[serde(default)]
560    pub tool_format: Option<String>,
561}
562
563#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
564pub struct AliasToolCallingDef {
565    #[serde(default)]
566    #[serde(skip_serializing_if = "Option::is_none")]
567    pub native: Option<String>,
568    #[serde(default)]
569    #[serde(skip_serializing_if = "Option::is_none")]
570    pub text: Option<String>,
571    #[serde(default)]
572    #[serde(skip_serializing_if = "Option::is_none")]
573    pub streaming_native: Option<String>,
574    #[serde(default)]
575    #[serde(skip_serializing_if = "Option::is_none")]
576    pub fallback_mode: Option<String>,
577    #[serde(default)]
578    #[serde(skip_serializing_if = "Option::is_none")]
579    pub failure_reason: Option<String>,
580    #[serde(default)]
581    #[serde(skip_serializing_if = "Option::is_none")]
582    pub last_probe_at: Option<String>,
583}
584
585#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
586pub struct ModelPricing {
587    pub input_per_mtok: f64,
588    pub output_per_mtok: f64,
589    #[serde(default)]
590    pub cache_read_per_mtok: Option<f64>,
591    #[serde(default)]
592    pub cache_write_per_mtok: Option<f64>,
593}
594
595/// Provider or model quota metadata. Providers publish these along several
596/// axes, and any one exhausted bucket can trigger throttling.
597#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
598pub struct RateLimitsDef {
599    /// Requests per minute.
600    #[serde(default, skip_serializing_if = "Option::is_none")]
601    pub rpm: Option<u32>,
602    /// Requests per hour.
603    #[serde(default, skip_serializing_if = "Option::is_none")]
604    pub rph: Option<u32>,
605    /// Requests per day.
606    #[serde(default, skip_serializing_if = "Option::is_none")]
607    pub rpd: Option<u32>,
608    /// Total tokens per minute.
609    #[serde(default, skip_serializing_if = "Option::is_none")]
610    pub tpm: Option<u64>,
611    /// Total tokens per hour.
612    #[serde(default, skip_serializing_if = "Option::is_none")]
613    pub tph: Option<u64>,
614    /// Total tokens per day.
615    #[serde(default, skip_serializing_if = "Option::is_none")]
616    pub tpd: Option<u64>,
617    /// Input tokens per minute, when the provider splits input/output quotas.
618    #[serde(default, skip_serializing_if = "Option::is_none")]
619    pub input_tpm: Option<u64>,
620    /// Output tokens per minute, when the provider splits input/output quotas.
621    #[serde(default, skip_serializing_if = "Option::is_none")]
622    pub output_tpm: Option<u64>,
623    /// Concurrent in-flight requests, if published.
624    #[serde(default, skip_serializing_if = "Option::is_none")]
625    pub concurrency: Option<u32>,
626    /// Account tier or route class these limits describe.
627    #[serde(default, skip_serializing_if = "Option::is_none")]
628    pub tier: Option<String>,
629    /// Official source URL for the row.
630    #[serde(default, skip_serializing_if = "Option::is_none")]
631    pub source_url: Option<String>,
632    /// YYYY-MM-DD date when the row was last verified.
633    #[serde(default, skip_serializing_if = "Option::is_none")]
634    pub last_verified: Option<String>,
635    /// Free-text caveat for account-dependent or burst limits.
636    #[serde(default, skip_serializing_if = "Option::is_none")]
637    pub notes: Option<String>,
638}
639
640impl RateLimitsDef {
641    pub fn is_empty(&self) -> bool {
642        self.rpm.is_none()
643            && self.rph.is_none()
644            && self.rpd.is_none()
645            && self.tpm.is_none()
646            && self.tph.is_none()
647            && self.tpd.is_none()
648            && self.input_tpm.is_none()
649            && self.output_tpm.is_none()
650            && self.concurrency.is_none()
651            && self.tier.is_none()
652            && self.source_url.is_none()
653            && self.last_verified.is_none()
654            && self.notes.is_none()
655    }
656
657    pub fn with_rpm_fallback(mut self, rpm: Option<u32>) -> Option<Self> {
658        if self.rpm.is_none() {
659            self.rpm = rpm;
660        }
661        (!self.is_empty()).then_some(self)
662    }
663}
664
665/// Optional provider/model serving-performance observation. This records
666/// benchmark or live-probe facts, not a hard runtime contract; callers should
667/// treat missing fields as unknown and stale dates as advisory.
668#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
669pub struct ServingPerformanceDef {
670    /// Observed time-to-first-token in milliseconds.
671    #[serde(default, skip_serializing_if = "Option::is_none")]
672    pub observed_ttft_ms: Option<u64>,
673    /// Observed output generation rate in tokens per second.
674    #[serde(default, skip_serializing_if = "Option::is_none")]
675    pub output_tokens_per_sec: Option<f64>,
676    /// End-to-end time-to-answer in seconds for the cited benchmark, when
677    /// reported separately from TTFT/generation rate.
678    #[serde(default, skip_serializing_if = "Option::is_none")]
679    pub time_to_answer_s: Option<f64>,
680    /// Source label, e.g. `artificial_analysis`, `harn_probe`, or
681    /// `provider_blog`.
682    #[serde(default, skip_serializing_if = "Option::is_none")]
683    pub source: Option<String>,
684    /// Source URL for the observation.
685    #[serde(default, skip_serializing_if = "Option::is_none")]
686    pub source_url: Option<String>,
687    /// YYYY-MM-DD date when the observation was last verified.
688    #[serde(default, skip_serializing_if = "Option::is_none")]
689    pub last_verified: Option<String>,
690    /// Number of requests or benchmark samples behind this row, if known.
691    #[serde(default, skip_serializing_if = "Option::is_none")]
692    pub sample_size: Option<u32>,
693    /// Short caveat such as streaming mode, warm/cold route, or prompt shape.
694    #[serde(default, skip_serializing_if = "Option::is_none")]
695    pub notes: Option<String>,
696}
697
698impl ServingPerformanceDef {
699    pub fn is_empty(&self) -> bool {
700        self.observed_ttft_ms.is_none()
701            && self.output_tokens_per_sec.is_none()
702            && self.time_to_answer_s.is_none()
703            && self.source.is_none()
704            && self.source_url.is_none()
705            && self.last_verified.is_none()
706            && self.sample_size.is_none()
707            && self.notes.is_none()
708    }
709}
710
711/// Logical-model facts separated from provider serving routes. These fields
712/// describe the underlying weights or public model family, not Harn's alias or
713/// provider/model selector.
714#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
715pub struct ModelArchitectureDef {
716    /// Total parameter count in billions.
717    #[serde(default, skip_serializing_if = "Option::is_none")]
718    pub parameter_count_b: Option<f64>,
719    /// Active parameter count in billions for MoE models.
720    #[serde(default, skip_serializing_if = "Option::is_none")]
721    pub active_parameter_count_b: Option<f64>,
722    /// True for mixture-of-experts models.
723    #[serde(default, skip_serializing_if = "Option::is_none")]
724    pub moe: Option<bool>,
725    /// Quantization advertised by this route, if route-specific.
726    #[serde(default, skip_serializing_if = "Option::is_none")]
727    pub quantization: Option<String>,
728    /// Numeric precision advertised by this route, if known.
729    #[serde(default, skip_serializing_if = "Option::is_none")]
730    pub precision: Option<String>,
731    /// License identifier or short label.
732    #[serde(default, skip_serializing_if = "Option::is_none")]
733    pub license: Option<String>,
734    /// Tokenizer family or implementation hint.
735    #[serde(default, skip_serializing_if = "Option::is_none")]
736    pub tokenizer: Option<String>,
737    /// Public knowledge cutoff claim, when published.
738    #[serde(default, skip_serializing_if = "Option::is_none")]
739    pub knowledge_cutoff: Option<String>,
740    /// Official source URL for these facts.
741    #[serde(default, skip_serializing_if = "Option::is_none")]
742    pub source_url: Option<String>,
743    /// YYYY-MM-DD date when these facts were last verified.
744    #[serde(default, skip_serializing_if = "Option::is_none")]
745    pub last_verified: Option<String>,
746}
747
748impl ModelArchitectureDef {
749    pub fn is_empty(&self) -> bool {
750        self.parameter_count_b.is_none()
751            && self.active_parameter_count_b.is_none()
752            && self.moe.is_none()
753            && self.quantization.is_none()
754            && self.precision.is_none()
755            && self.license.is_none()
756            && self.tokenizer.is_none()
757            && self.knowledge_cutoff.is_none()
758            && self.source_url.is_none()
759            && self.last_verified.is_none()
760    }
761}
762
763/// Optional accelerated-serving ("fast mode") tier for a model. Off by
764/// default: its presence only *describes* that the provider offers a
765/// faster, premium-priced serving path running the same weights — callers
766/// must explicitly opt in via the provider's request knob, so nothing here
767/// changes default behavior. Deliberately provider-agnostic: Anthropic
768/// exposes the tier as `speed = "fast"` (beta-gated), while OpenAI uses
769/// `service_tier = "fast"` / `"priority"`. Premium pricing is stored as
770/// absolute per-MTok rates rather than a single multiplier because
771/// providers price the tier asymmetrically (Anthropic Opus 4.8 is 2x
772/// standard; Opus 4.7 fast mode is 6x).
773#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
774pub struct FastModeDef {
775    /// Request field that opts into the fast tier (e.g. "speed" for
776    /// Anthropic, "service_tier" for OpenAI).
777    pub param: String,
778    /// Value to send on `param` (e.g. "fast", "priority").
779    pub value: String,
780    /// Provider beta/feature header required to use the tier, if any
781    /// (e.g. Anthropic "fast-mode-2026-02-01").
782    #[serde(default)]
783    pub beta_header: Option<String>,
784    /// Output-tokens-per-second speedup vs standard serving (e.g. 2.5).
785    #[serde(default)]
786    pub otps_speedup: Option<f64>,
787    /// Lifecycle of the fast tier: "ga" | "research_preview" |
788    /// "deprecated". None when unspecified.
789    #[serde(default)]
790    pub status: Option<String>,
791    /// Premium pricing charged while the fast tier is active (absolute
792    /// per-MTok rates, not a multiplier on standard pricing).
793    #[serde(default)]
794    pub pricing: Option<ModelPricing>,
795    /// Free-text note: constraints, deprecation timeline, etc.
796    #[serde(default)]
797    pub note: Option<String>,
798}
799
800#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
801pub struct ModelDef {
802    pub name: String,
803    pub provider: String,
804    pub context_window: u64,
805    /// Provider-independent logical model id, when multiple serving routes map
806    /// to the same weights or model family.
807    #[serde(default)]
808    pub logical_model: Option<String>,
809    /// Equivalence class for failover/escalation candidates. Entries in the
810    /// same group are capability-compatible alternatives, not byte-identical
811    /// APIs; callers must still re-render transcripts for the target provider.
812    #[serde(default)]
813    pub equivalence_group: Option<String>,
814    /// Serving-route detail such as "serverless", "priority", "fp8", or a
815    /// provider route slug. This is intentionally separate from `name`.
816    #[serde(default)]
817    pub served_variant: Option<String>,
818    /// Provider-native model id to send on the wire. Defaults to the catalog
819    /// key. Required when two providers expose the same native id and Harn
820    /// needs a unique catalog key for each route.
821    #[serde(default)]
822    pub wire_model: Option<String>,
823    /// Preferred API dialect for the route, e.g. `openai_chat`,
824    /// `openai_responses`, `anthropic_messages`, `gemini_generate_content`.
825    #[serde(default)]
826    pub api_dialect: Option<String>,
827    /// Route-specific token/request quota metadata.
828    #[serde(default)]
829    pub rate_limits: Option<RateLimitsDef>,
830    /// Optional route-level serving performance observations.
831    #[serde(default)]
832    pub performance: Option<ServingPerformanceDef>,
833    /// Underlying model architecture facts separated from the provider id.
834    #[serde(default)]
835    pub architecture: Option<ModelArchitectureDef>,
836    /// Local launch memory-sizing hints used by `harn local launch`.
837    #[serde(default)]
838    pub local_memory: Option<LocalMemoryDef>,
839    #[serde(default)]
840    pub runtime_context_window: Option<u64>,
841    #[serde(default)]
842    pub stream_timeout: Option<f64>,
843    #[serde(default)]
844    pub capabilities: Vec<String>,
845    #[serde(default)]
846    pub pricing: Option<ModelPricing>,
847    #[serde(default)]
848    pub deprecated: bool,
849    #[serde(default)]
850    pub deprecation_note: Option<String>,
851    /// Structured replacement pointer: the catalog id of the model that
852    /// supersedes this one (e.g. an older Opus row points at the newest
853    /// Opus). Lets release tooling express "migrate to X" in a
854    /// machine-readable way instead of burying it in `deprecation_note`
855    /// free text. A model may be superseded without being `deprecated`
856    /// (a newer option exists but this one is still fully supported);
857    /// pair it with `deprecated = true` once a sunset is announced.
858    #[serde(default)]
859    pub superseded_by: Option<String>,
860    /// Accelerated-serving ("fast mode") tier metadata, when the model's
861    /// provider offers one. Off by default — see [`FastModeDef`]. None for
862    /// models with no faster serving path.
863    #[serde(default)]
864    pub fast_mode: Option<FastModeDef>,
865    #[serde(default)]
866    pub quality_tags: Vec<String>,
867    /// Whether the model can be reached over a normal API-key serverless call,
868    /// or only via a dedicated/provisioned endpoint that the caller must spin
869    /// up out-of-band. Providers like Together list dedicated-only routes
870    /// alongside serverless ones in `/v1/models`, so this metadata lets clients
871    /// avoid presenting them as one-click options.
872    #[serde(default)]
873    pub availability: ModelAvailability,
874    /// Popular-consensus tier label. Enum-typed string: "small" | "mid" |
875    /// "frontier" | "reasoning". Self-declared per model (no pattern-matched
876    /// rule table) so the catalog is the single source of truth. When None
877    /// the resolver returns the catalog default ("mid"). Use the richer
878    /// `strengths` + `benchmarks` fields to pick models for specific
879    /// workloads — `tier` exists only as a coarse popular-consensus shortcut.
880    #[serde(default)]
881    pub tier: Option<String>,
882    /// True when the model weights are downloadable / self-hostable
883    /// (open-weight / open-source license, regardless of commercial-use
884    /// restrictions). False when weights are closed (Anthropic, OpenAI,
885    /// Google, etc.). None when the catalog row predates the migration.
886    #[serde(default)]
887    pub open_weight: Option<bool>,
888    /// Workload-shaped strength tags. Conventional values include
889    /// `coding`, `summarization`, `long_context`, `tool_use`, `reasoning`,
890    /// `vision`, `speed`, `cheap`, `agentic`. Selectors should treat
891    /// missing entries as "no claim" rather than "no strength."
892    #[serde(default)]
893    pub strengths: Vec<String>,
894    /// Public benchmark numbers, keyed by a snake_case identifier
895    /// (`swe_bench_verified`, `humaneval`, `aa_intelligence_index`, etc.).
896    /// Values are the raw published scores. The selector layer is free
897    /// to normalize per benchmark; the catalog records the canonical
898    /// score so future readers can audit the source.
899    #[serde(default)]
900    pub benchmarks: BTreeMap<String, f64>,
901    /// Normalized model-family token used as a diversity signal for
902    /// reviewer selection. Distinct from provider: hosted wrappers should
903    /// keep the underlying family (for example OpenRouter-hosted Claude
904    /// still uses `anthropic-claude`).
905    #[serde(default)]
906    pub family: Option<String>,
907    /// Narrower family lineage used by option-pack calibration.
908    #[serde(default)]
909    pub lineage: Option<String>,
910    /// Preferred reviewer families for critique/review workloads.
911    #[serde(default)]
912    pub complementary_with: Vec<String>,
913    /// Author families, lineages, model ids, or provider/model selectors
914    /// this row should not review.
915    #[serde(default)]
916    pub avoid_as_reviewer_for: Vec<String>,
917}
918
919#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Default)]
920#[serde(rename_all = "snake_case")]
921pub enum ModelAvailability {
922    /// Reachable through the provider's normal API-key path with no extra
923    /// setup. The default for cataloged hosted/local models: by cataloging a
924    /// row we are claiming the route works out of the box.
925    #[default]
926    Serverless,
927    /// Requires the caller to provision a dedicated endpoint before requests
928    /// will succeed. The catalog row exists for selection/pricing UI, but
929    /// hosts must not auto-route to it.
930    Dedicated,
931    /// Availability is not known ahead of time. Used for routes that were
932    /// surfaced dynamically (e.g. through `/v1/models`) without a static
933    /// claim from Harn or the user.
934    Unknown,
935}
936
937impl ModelAvailability {
938    pub fn as_str(self) -> &'static str {
939        match self {
940            Self::Serverless => "serverless",
941            Self::Dedicated => "dedicated",
942            Self::Unknown => "unknown",
943        }
944    }
945
946    pub fn parse(value: &str) -> Option<Self> {
947        match value {
948            "serverless" => Some(Self::Serverless),
949            "dedicated" => Some(Self::Dedicated),
950            "unknown" => Some(Self::Unknown),
951            _ => None,
952        }
953    }
954}
955
956#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
957pub struct ResolvedModel {
958    pub id: String,
959    pub provider: String,
960    pub alias: Option<String>,
961    pub tool_format: String,
962    pub tier: String,
963    pub family: String,
964    pub lineage: String,
965}
966
967#[derive(Debug, Clone, PartialEq)]
968pub struct ComplementaryReviewerOptions {
969    pub author_model: String,
970    pub author_provider: Option<String>,
971    pub intent: ComplementaryReviewerIntent,
972    pub max_price_multiplier: Option<f64>,
973}
974
975#[derive(Debug, Clone, Copy, PartialEq, Eq)]
976pub enum ComplementaryReviewerIntent {
977    Review,
978    Critique,
979    PlanReview,
980}
981
982impl ComplementaryReviewerIntent {
983    pub fn parse(value: &str) -> Option<Self> {
984        match value {
985            "review" => Some(Self::Review),
986            "critique" => Some(Self::Critique),
987            "plan_review" => Some(Self::PlanReview),
988            _ => None,
989        }
990    }
991
992    pub fn as_str(self) -> &'static str {
993        match self {
994            Self::Review => "review",
995            Self::Critique => "critique",
996            Self::PlanReview => "plan_review",
997        }
998    }
999}
1000
1001#[derive(Debug, Clone, Serialize, PartialEq)]
1002pub struct ComplementaryReviewerSelection {
1003    pub intent: String,
1004    pub author: ComplementaryModelIdentity,
1005    pub reviewer: ComplementaryModelIdentity,
1006    pub fallback: bool,
1007    pub fallback_reason: Option<String>,
1008    /// Machine-readable reason a caller can branch on when `fallback` is
1009    /// `true`, distinct from the human-readable `fallback_reason`/`reason`
1010    /// prose. `None` on the success path. Lets a caller hard-fail an
1011    /// independent-review step rather than silently degrade to self-review.
1012    /// See [`ReviewerFallbackCode`] for the stable set of values.
1013    #[serde(skip_serializing_if = "Option::is_none")]
1014    pub fallback_code: Option<String>,
1015    pub reason: String,
1016    pub estimated_incremental_cost: Option<ComplementaryCostEstimate>,
1017}
1018
1019/// Stable, machine-readable reasons `pick_complementary_reviewer` falls back
1020/// to the author model. Serialized as the `fallback_code` string so harn
1021/// pipelines and Rust callers can branch deterministically instead of parsing
1022/// prose. New variants are additive; existing codes are append-only contract.
1023#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1024pub enum ReviewerFallbackCode {
1025    /// The author model's family could not be resolved, so no independent
1026    /// family comparison is possible.
1027    UnknownAuthorFamily,
1028    /// Different-family candidates exist but none satisfy `max_price_multiplier`.
1029    NoDiffFamilyWithinPrice,
1030    /// No active, serverless, different-family reviewer is cataloged at all.
1031    NoDiffFamilyServerless,
1032    /// Different-family candidates exist but were all excluded (e.g. every
1033    /// one declares `avoid_as_reviewer_for` the author).
1034    AllDiffFamilyExcluded,
1035}
1036
1037impl ReviewerFallbackCode {
1038    pub fn as_code(self) -> &'static str {
1039        match self {
1040            Self::UnknownAuthorFamily => "unknown_author_family",
1041            Self::NoDiffFamilyWithinPrice => "no_diff_family_within_price",
1042            Self::NoDiffFamilyServerless => "no_diff_family_serverless",
1043            Self::AllDiffFamilyExcluded => "all_diff_family_excluded",
1044        }
1045    }
1046}
1047
1048#[derive(Debug, Clone, Serialize, PartialEq)]
1049pub struct ComplementaryModelIdentity {
1050    pub id: String,
1051    pub provider: String,
1052    pub family: String,
1053    pub lineage: String,
1054    pub tier: String,
1055    #[serde(skip_serializing_if = "Option::is_none")]
1056    pub pricing: Option<ModelPricing>,
1057}
1058
1059#[derive(Debug, Clone, Serialize, PartialEq)]
1060pub struct ComplementaryCostEstimate {
1061    pub input_per_mtok: f64,
1062    pub output_per_mtok: f64,
1063    pub total_per_mtok: f64,
1064    #[serde(skip_serializing_if = "Option::is_none")]
1065    pub multiplier_vs_author: Option<f64>,
1066}
1067
1068#[derive(Debug, Clone, Deserialize)]
1069pub struct InferenceRule {
1070    #[serde(default)]
1071    pub pattern: Option<String>,
1072    #[serde(default)]
1073    pub contains: Option<String>,
1074    #[serde(default)]
1075    pub exact: Option<String>,
1076    pub provider: String,
1077}
1078
1079#[derive(Debug, Clone, Deserialize)]
1080pub struct TierRule {
1081    #[serde(default)]
1082    pub pattern: Option<String>,
1083    #[serde(default)]
1084    pub contains: Option<String>,
1085    #[serde(default)]
1086    pub exact: Option<String>,
1087    pub tier: String,
1088}
1089
1090#[derive(Debug, Clone, Deserialize)]
1091pub struct TierDefaults {
1092    #[serde(default = "default_mid")]
1093    pub default: String,
1094}
1095
1096impl Default for TierDefaults {
1097    fn default() -> Self {
1098        Self {
1099            default: default_mid(),
1100        }
1101    }
1102}
1103
1104fn default_mid() -> String {
1105    "mid".to_string()
1106}
1107
1108/// Load and cache the providers config. Called once at VM startup.
1109pub fn load_config() -> &'static ProvidersConfig {
1110    CONFIG.get_or_init(|| {
1111        let mut config = default_config();
1112        let verbose_config_logging = matches!(
1113            std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
1114            Some("1" | "true" | "TRUE" | "yes" | "YES")
1115        ) || matches!(
1116            std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
1117            Some("1" | "true" | "TRUE" | "yes" | "YES")
1118        );
1119        if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
1120            if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
1121                config.merge_from(&overlay);
1122                let _ = CONFIG_PATH.set(path);
1123                return config;
1124            }
1125        }
1126        if should_load_home_config() {
1127            if let Some(home) = dirs_or_home() {
1128                let path = format!("{home}/.config/harn/providers.toml");
1129                if let Some(overlay) = read_external_config(&path, false) {
1130                    config.merge_from(&overlay);
1131                    let _ = CONFIG_PATH.set(path);
1132                    return config;
1133                }
1134            }
1135        }
1136        config
1137    })
1138}
1139
1140fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
1141    match std::fs::read_to_string(path) {
1142        Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
1143            Ok(config) => {
1144                if verbose {
1145                    eprintln!(
1146                        "[llm_config] Loaded {} providers, {} aliases from {}",
1147                        config.providers.len(),
1148                        config.aliases.len(),
1149                        path
1150                    );
1151                }
1152                Some(config)
1153            }
1154            Err(error) => {
1155                eprintln!("[llm_config] TOML parse error in {path}: {error}");
1156                None
1157            }
1158        },
1159        Err(error) => {
1160            if verbose {
1161                eprintln!("[llm_config] Cannot read {path}: {error}");
1162            }
1163            None
1164        }
1165    }
1166}
1167
1168fn should_load_home_config() -> bool {
1169    // Unit tests should cover embedded defaults plus explicit overlays, not
1170    // whichever provider file happens to exist on the developer machine.
1171    !cfg!(test)
1172}
1173
1174/// Parse a provider/model catalog overlay in the same shape as
1175/// `providers.toml` or `[llm]` package-manifest sections.
1176pub fn parse_config_toml(src: &str) -> Result<ProvidersConfig, toml::de::Error> {
1177    toml::from_str::<ProvidersConfig>(src)
1178}
1179
1180/// Returns the filesystem path of the currently-loaded providers config, if
1181/// any. Returns `None` when built-in defaults are active.
1182pub fn loaded_config_path() -> Option<std::path::PathBuf> {
1183    // Force lazy init so CONFIG_PATH is populated if a file was loaded.
1184    let _ = load_config();
1185    CONFIG_PATH.get().map(std::path::PathBuf::from)
1186}
1187
1188/// Install per-run provider config overlays. The overlay uses the same shape as
1189/// `providers.toml`, but lives under `[llm]` in `harn.toml` and package
1190/// manifests. Passing `None` clears the overlay.
1191pub fn set_user_overrides(config: Option<ProvidersConfig>) {
1192    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
1193}
1194
1195/// Clear per-run provider config overlays.
1196pub fn clear_user_overrides() {
1197    set_user_overrides(None);
1198}
1199
1200/// Install the process-wide runtime catalog overlay used by
1201/// `provider_catalog::refresh_runtime_catalog`. Per-run user overlays still
1202/// merge last so project-local provider config can override hosted catalog
1203/// updates.
1204pub fn set_runtime_catalog_overlay(config: Option<ProvidersConfig>) {
1205    *runtime_catalog_overlay()
1206        .write()
1207        .expect("runtime catalog overlay poisoned") = config;
1208}
1209
1210pub fn clear_runtime_catalog_overlay() {
1211    set_runtime_catalog_overlay(None);
1212}
1213
1214pub(crate) fn effective_config() -> ProvidersConfig {
1215    let user_overrides = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1216    effective_config_with_user_overrides(user_overrides.as_ref())
1217}
1218
1219/// Provider config built purely from the compiled-in `EMBEDDED_PROVIDERS_TOML`
1220/// snapshot, ignoring every ambient layer: the developer's
1221/// `~/.config/harn/providers.toml`, `HARN_PROVIDERS_CONFIG`, the process
1222/// runtime-catalog overlay, and thread-local user overrides.
1223///
1224/// This is the hermetic source of truth for *generating* the checked-in
1225/// `spec/provider-catalog/*` artifacts. Artifact generation must be a pure
1226/// function of the source tree so a developer's personal aliases/providers
1227/// never leak into shipped artifacts (which then makes clean CI flag drift).
1228/// Runtime catalog presentation must keep using [`effective_config`] /
1229/// [`effective_config_with_user_overrides`], which legitimately reflect the
1230/// host's live configuration.
1231///
1232/// An optional explicit overlay (e.g. a `--overlay` file named on the command
1233/// line) is merged on top of the embedded base. Unlike the home file and env
1234/// layers, that overlay is a declared, reproducible input rather than ambient
1235/// machine state, so it is safe to honor while staying hermetic.
1236pub fn embedded_config(explicit_overlay: Option<&ProvidersConfig>) -> ProvidersConfig {
1237    let mut config = default_config();
1238    if let Some(overlay) = explicit_overlay {
1239        config.merge_from(overlay);
1240    }
1241    config
1242}
1243
1244pub(crate) fn effective_config_with_user_overrides(
1245    user_overrides: Option<&ProvidersConfig>,
1246) -> ProvidersConfig {
1247    let mut merged = load_config().clone();
1248    if let Some(overlay) = runtime_catalog_overlay()
1249        .read()
1250        .expect("runtime catalog overlay poisoned")
1251        .as_ref()
1252    {
1253        merged.merge_from(overlay);
1254    }
1255    if let Some(overlay) = user_overrides {
1256        merged.merge_from(overlay);
1257    }
1258    merged
1259}
1260
1261fn runtime_catalog_overlay() -> &'static RwLock<Option<ProvidersConfig>> {
1262    RUNTIME_CATALOG_OVERLAY.get_or_init(|| RwLock::new(None))
1263}
1264
1265/// Resolve a model alias to (model_id, provider_name).
1266pub fn resolve_model(alias: &str) -> (String, Option<String>) {
1267    let config = effective_config();
1268    if let Some(a) = config.aliases.get(alias) {
1269        return (a.id.clone(), Some(a.provider.clone()));
1270    }
1271    (normalize_model_id(alias), None)
1272}
1273
1274/// Strip host/provider selector prefixes that identify transport, not the
1275/// provider-native model id. This mirrors the host's existing normalization so
1276/// `ollama:qwen3:30b` reaches Ollama as `qwen3:30b` instead of an invalid
1277/// model named `ollama`. Cerebras follows the same convention but uses a
1278/// slash separator (`cerebras/gpt-oss-120b`) because its own /v1/models
1279/// endpoint returns bare names that overlap OpenAI's families.
1280pub fn normalize_model_id(raw: &str) -> String {
1281    for prefix in PROVIDER_SELECTOR_PREFIXES {
1282        if let Some(stripped) = raw.strip_prefix(prefix) {
1283            return stripped.to_string();
1284        }
1285    }
1286    raw.to_string()
1287}
1288
1289const PROVIDER_SELECTOR_PREFIXES: &[&str] =
1290    &["ollama:", "local:", "huggingface:", "hf:", "cerebras/"];
1291
1292/// Resolve an alias or selector into the complete catalog identity hosts need:
1293/// provider inference, prefix-normalized model id, default tool format, and tier.
1294pub fn resolve_model_info(selector: &str) -> ResolvedModel {
1295    let config = effective_config();
1296    if let Some(alias) = config.aliases.get(selector) {
1297        let id = alias.id.clone();
1298        let provider = alias.provider.clone();
1299        let requested = alias
1300            .tool_format
1301            .clone()
1302            .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
1303        let tool_format = guard_tool_format(&provider, &id, &requested, Some(selector));
1304        return ResolvedModel {
1305            tier: model_tier_with_config(&config, &id),
1306            family: model_family_with_config(&config, &provider, &id),
1307            lineage: model_lineage_with_config(&config, &provider, &id),
1308            id,
1309            provider,
1310            alias: Some(selector.to_string()),
1311            tool_format,
1312        };
1313    }
1314
1315    let id = normalize_model_id(selector);
1316    let inference = infer_provider_with_config(&config, selector);
1317    let source = inference.source;
1318    let provider = inference.provider;
1319    let requested = default_tool_format_with_config(&config, &id, &provider);
1320    let tool_format = guard_tool_format(&provider, &id, &requested, None);
1321    let tier = model_tier_with_config(&config, &id);
1322    let family = model_family_with_inference_source(&config, &provider, &id, source);
1323    let lineage = model_lineage_with_inference_source(&config, &provider, &id, source);
1324    ResolvedModel {
1325        id,
1326        provider,
1327        alias: None,
1328        tool_format,
1329        tier,
1330        family,
1331        lineage,
1332    }
1333}
1334
1335/// Run the requested `tool_format` through the capability registry's
1336/// dialect-validity gate, returning the safe format to actually use. When the
1337/// registry auto-corrects a known-broken combo (e.g. a `native` pin on a
1338/// `native_unreliable` route that silently drops to unparsed DSML text), the
1339/// correction is logged once at resolution time so a harness developer sees
1340/// *why* their pinned format was not honored — never a silent vanishing.
1341fn guard_tool_format(provider: &str, model: &str, requested: &str, alias: Option<&str>) -> String {
1342    let decision = crate::llm::capabilities::validate_tool_format(provider, model, requested);
1343    if let Some(reason) = &decision.correction {
1344        tracing::warn!(
1345            target: "harn::llm::tool_format",
1346            alias = alias.unwrap_or(""),
1347            "{reason}"
1348        );
1349    }
1350    decision.effective
1351}
1352
1353/// Infer provider from a model ID using inference rules.
1354pub fn infer_provider(model_id: &str) -> String {
1355    infer_provider_detail(model_id).provider
1356}
1357
1358/// Infer provider from a model ID and retain whether the configured default was used.
1359pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
1360    let config = effective_config();
1361    infer_provider_with_config(&config, model_id)
1362}
1363
1364fn infer_provider_with_config(
1365    config: &ProvidersConfig,
1366    model_id: &str,
1367) -> crate::llm::provider::ProviderInference {
1368    if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
1369        return crate::llm::provider::ProviderInference::builtin("ollama");
1370    }
1371    if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
1372        return crate::llm::provider::ProviderInference::builtin("huggingface");
1373    }
1374    // Exact catalog rows are the most authoritative declaration of where
1375    // a model is hosted: any pattern-based inference rule is necessarily
1376    // less specific than `[models."<id>"].provider = "<name>"`. Catalogs
1377    // include user overlays, so users can still re-home a model by
1378    // setting a catalog entry in their own providers.toml.
1379    let normalized_id = normalize_model_id(model_id);
1380    if let Some(model) = config
1381        .models
1382        .get(model_id)
1383        .or_else(|| config.models.get(&normalized_id))
1384    {
1385        return crate::llm::provider::ProviderInference::builtin(model.provider.clone());
1386    }
1387    for rule in &config.inference_rules {
1388        if let Some(exact) = &rule.exact {
1389            if model_id == exact {
1390                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1391            }
1392        }
1393        if let Some(pattern) = &rule.pattern {
1394            if glob_match(pattern, model_id) {
1395                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1396            }
1397        }
1398        if let Some(substr) = &rule.contains {
1399            if model_id.contains(substr.as_str()) {
1400                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1401            }
1402        }
1403    }
1404    crate::llm::provider::infer_provider_from_model_id(
1405        model_id,
1406        &default_provider_with_config(config),
1407    )
1408}
1409
1410pub fn default_provider() -> String {
1411    let config = effective_config();
1412    default_provider_with_config(&config)
1413}
1414
1415fn default_provider_with_config(config: &ProvidersConfig) -> String {
1416    std::env::var("HARN_DEFAULT_PROVIDER")
1417        .ok()
1418        .map(|value| value.trim().to_string())
1419        .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1420        .or_else(|| {
1421            config
1422                .default_provider
1423                .as_deref()
1424                .map(str::trim)
1425                .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1426                .map(str::to_string)
1427        })
1428        .unwrap_or_else(|| auto_select_provider(config))
1429}
1430
1431/// Provider assumed when nothing is configured and no credentials are found.
1432/// Anthropic is Harn's documented default; [`auto_select_provider`] only falls
1433/// back to it after probing for a credentialed or local provider, and warns
1434/// once so adopters without Anthropic credentials get a clear nudge instead of
1435/// a raw auth failure.
1436const FALLBACK_PROVIDER: &str = "anthropic";
1437
1438static AUTO_PROVIDER_WARNED: AtomicBool = AtomicBool::new(false);
1439
1440/// True when any of the provider's auth env vars holds a non-empty value.
1441fn provider_has_credentials(def: &ProviderDef) -> bool {
1442    auth_env_names(&def.auth_env)
1443        .iter()
1444        .any(|name| std::env::var(name).is_ok_and(|value| !value.trim().is_empty()))
1445}
1446
1447/// True when the provider can serve without cloud credentials — a managed
1448/// local runtime (`harn local`) or an auth-free endpoint such as Ollama.
1449fn provider_is_local(def: &ProviderDef) -> bool {
1450    def.local_runtime.is_some() || matches!(def.auth_env, AuthEnv::None)
1451}
1452
1453/// Emit a provider auto-selection notice at most once per process.
1454fn warn_auto_provider_once(message: &str) {
1455    if !AUTO_PROVIDER_WARNED.swap(true, Ordering::Relaxed) {
1456        crate::events::log_warn("llm_config", message);
1457    }
1458}
1459
1460/// Choose a provider when neither `HARN_DEFAULT_PROVIDER` nor
1461/// `config.default_provider` is set. Prefers a credentialed cloud provider,
1462/// then a locally-available one, and only then falls back to the documented
1463/// default. Detection is portable: it reads provider `auth_env` variables and
1464/// `local_runtime` metadata from the catalog — never hardcoded paths or ports.
1465fn auto_select_provider(config: &ProvidersConfig) -> String {
1466    // Well-known providers first for a stable, predictable choice; then any
1467    // other configured provider (BTreeMap iteration is sorted/deterministic).
1468    const PREFERRED: &[&str] = &[
1469        "anthropic",
1470        "openai",
1471        "google",
1472        "azure-openai",
1473        "groq",
1474        "mistral",
1475        "deepseek",
1476        "xai",
1477        "openrouter",
1478    ];
1479    for name in PREFERRED {
1480        if config
1481            .providers
1482            .get(*name)
1483            .is_some_and(provider_has_credentials)
1484        {
1485            if *name != FALLBACK_PROVIDER {
1486                warn_auto_provider_once(&format!(
1487                    "no default provider configured; using '{name}' (its API key is set). \
1488                     Set HARN_DEFAULT_PROVIDER or `default_provider` to silence this."
1489                ));
1490            }
1491            return (*name).to_string();
1492        }
1493    }
1494    for (name, def) in &config.providers {
1495        if provider_has_credentials(def) {
1496            warn_auto_provider_once(&format!(
1497                "no default provider configured; using '{name}' (its API key is set). \
1498                 Set HARN_DEFAULT_PROVIDER or `default_provider` to silence this."
1499            ));
1500            return name.clone();
1501        }
1502    }
1503    // No cloud credentials: prefer something that runs locally with no key.
1504    for (name, def) in &config.providers {
1505        if provider_is_local(def) {
1506            warn_auto_provider_once(&format!(
1507                "no provider API keys found; using local provider '{name}'. \
1508                 Set an API key + HARN_DEFAULT_PROVIDER to use a cloud provider."
1509            ));
1510            return name.clone();
1511        }
1512    }
1513    // Nothing detected. Fall back to the documented default and say how to fix.
1514    warn_auto_provider_once(&format!(
1515        "no LLM provider configured and no API keys detected; defaulting to \
1516         '{FALLBACK_PROVIDER}'. Set ANTHROPIC_API_KEY (or another provider's key plus \
1517         HARN_DEFAULT_PROVIDER), or run a local model with `harn local launch`."
1518    ));
1519    FALLBACK_PROVIDER.to_string()
1520}
1521
1522/// Get model tier ("small", "mid", "frontier").
1523pub fn model_tier(model_id: &str) -> String {
1524    let config = effective_config();
1525    model_tier_with_config(&config, model_id)
1526}
1527
1528pub(crate) fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
1529    // Per-model self-declared tier wins. This is the only path.
1530    if let Some(model) = config.models.get(model_id) {
1531        if let Some(tier) = model.tier.as_deref() {
1532            let trimmed = tier.trim();
1533            if !trimmed.is_empty() {
1534                return trimmed.to_string();
1535            }
1536        }
1537    }
1538    // Legacy pattern-rules: still consulted while we finish migrating the
1539    // long tail of models to per-row `tier = "..."`. Newly added rows
1540    // should set `tier` directly; the rule table is a fallback only.
1541    for rule in &config.tier_rules {
1542        if let Some(exact) = &rule.exact {
1543            if model_id == exact {
1544                return rule.tier.clone();
1545            }
1546        }
1547        if let Some(pattern) = &rule.pattern {
1548            if glob_match(pattern, model_id) {
1549                return rule.tier.clone();
1550            }
1551        }
1552        if let Some(substr) = &rule.contains {
1553            if model_id.contains(substr.as_str()) {
1554                return rule.tier.clone();
1555            }
1556        }
1557    }
1558    config.tier_defaults.default.clone()
1559}
1560
1561/// Return the normalized model-family token used for cross-family review.
1562pub fn model_family(provider: &str, model_id: &str) -> String {
1563    let config = effective_config();
1564    model_family_with_config(&config, provider, model_id)
1565}
1566
1567pub(crate) fn model_family_with_config(
1568    config: &ProvidersConfig,
1569    provider: &str,
1570    model_id: &str,
1571) -> String {
1572    catalog_family_token(config, model_id)
1573        .unwrap_or_else(|| derive_model_family(provider, model_id))
1574}
1575
1576fn model_family_with_inference_source(
1577    config: &ProvidersConfig,
1578    provider: &str,
1579    model_id: &str,
1580    source: crate::llm::provider::ProviderInferenceSource,
1581) -> String {
1582    if let Some(family) = catalog_family_token(config, model_id) {
1583        return family;
1584    }
1585    let id_family = derive_model_family("", model_id);
1586    if id_family != "unknown" {
1587        return id_family;
1588    }
1589    if matches!(
1590        source,
1591        crate::llm::provider::ProviderInferenceSource::DefaultFallback
1592    ) {
1593        return "unknown".to_string();
1594    }
1595    derive_model_family(provider, model_id)
1596}
1597
1598/// Return the narrower lineage token used for model-aware option packs.
1599pub fn model_lineage(provider: &str, model_id: &str) -> String {
1600    let config = effective_config();
1601    model_lineage_with_config(&config, provider, model_id)
1602}
1603
1604pub(crate) fn model_lineage_with_config(
1605    config: &ProvidersConfig,
1606    provider: &str,
1607    model_id: &str,
1608) -> String {
1609    catalog_lineage_token(config, model_id)
1610        .unwrap_or_else(|| derive_model_lineage(provider, model_id))
1611}
1612
1613fn model_lineage_with_inference_source(
1614    config: &ProvidersConfig,
1615    provider: &str,
1616    model_id: &str,
1617    source: crate::llm::provider::ProviderInferenceSource,
1618) -> String {
1619    if let Some(lineage) = catalog_lineage_token(config, model_id) {
1620        return lineage;
1621    }
1622    let id_lineage = derive_model_lineage("", model_id);
1623    if id_lineage != "unknown" {
1624        return id_lineage;
1625    }
1626    if matches!(
1627        source,
1628        crate::llm::provider::ProviderInferenceSource::DefaultFallback
1629    ) {
1630        return "unknown".to_string();
1631    }
1632    derive_model_lineage(provider, model_id)
1633}
1634
1635fn catalog_family_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1636    config
1637        .models
1638        .get(model_id)
1639        .and_then(|model| normalized_catalog_token(model.family.as_deref()))
1640}
1641
1642fn catalog_lineage_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1643    config
1644        .models
1645        .get(model_id)
1646        .and_then(|model| normalized_catalog_token(model.lineage.as_deref()))
1647}
1648
1649fn normalized_catalog_token(value: Option<&str>) -> Option<String> {
1650    value
1651        .map(str::trim)
1652        .filter(|value| !value.is_empty())
1653        .map(|value| value.to_ascii_lowercase().replace('_', "-"))
1654}
1655
1656fn derive_model_family(provider: &str, model_id: &str) -> String {
1657    let id = model_id.to_ascii_lowercase();
1658    if contains_any(&id, &["claude", "anthropic.claude"]) {
1659        return "anthropic-claude".to_string();
1660    }
1661    if contains_any(&id, &["gemini", "google/gemini"]) {
1662        return "google-gemini".to_string();
1663    }
1664    if contains_any(&id, &["deepseek"]) {
1665        return "deepseek".to_string();
1666    }
1667    if contains_any(&id, &["qwen"]) {
1668        return "qwen".to_string();
1669    }
1670    if contains_any(&id, &["kimi", "moonshot"]) {
1671        return "kimi".to_string();
1672    }
1673    if contains_any(&id, &["glm", "z-ai/glm", "zhipu"]) {
1674        return "glm".to_string();
1675    }
1676    if contains_any(&id, &["mistral", "mixtral", "devstral"]) {
1677        return "mistral".to_string();
1678    }
1679    if contains_any(&id, &["minimax"]) {
1680        return "minimax".to_string();
1681    }
1682    if contains_any(&id, &["llama"]) {
1683        return "llama".to_string();
1684    }
1685    if contains_any(&id, &["gemma"]) {
1686        return "gemma".to_string();
1687    }
1688    if is_openai_reasoning_model(&id) {
1689        return "openai-reasoning".to_string();
1690    }
1691    if contains_any(&id, &["gpt-oss", "openai/gpt", "gpt-"]) {
1692        return "openai-gpt".to_string();
1693    }
1694    match provider {
1695        "anthropic" | "bedrock" | "vertex-anthropic" => "anthropic-claude".to_string(),
1696        "openai" | "azure" | "azure_openai" => "openai-gpt".to_string(),
1697        "gemini" | "vertex" | "google" => "google-gemini".to_string(),
1698        "deepseek" => "deepseek".to_string(),
1699        "zai" => "glm".to_string(),
1700        "minimax" => "minimax".to_string(),
1701        other if !other.is_empty() => normalize_identifier_token(other),
1702        _ => "unknown".to_string(),
1703    }
1704}
1705
1706fn derive_model_lineage(provider: &str, model_id: &str) -> String {
1707    let id = model_id.to_ascii_lowercase();
1708    if contains_any(&id, &["haiku"]) {
1709        return "claude-haiku".to_string();
1710    }
1711    if contains_any(&id, &["opus-4-7", "opus-4-8", "opus-mythos"]) {
1712        return "claude-opus-adaptive".to_string();
1713    }
1714    if contains_any(&id, &["claude"]) {
1715        return "claude-sonnet-opus".to_string();
1716    }
1717    if contains_any(&id, &["gpt-5"]) {
1718        return "openai-gpt5".to_string();
1719    }
1720    if is_openai_reasoning_model(&id) {
1721        return "openai-reasoning".to_string();
1722    }
1723    if contains_any(&id, &["gpt-", "gpt_"]) {
1724        return "openai-legacy".to_string();
1725    }
1726    if contains_any(&id, &["gemini"]) {
1727        if contains_any(&id, &["flash"]) {
1728            return "gemini-flash".to_string();
1729        }
1730        return "gemini-pro".to_string();
1731    }
1732    if contains_any(&id, &["qwen3", "qwen/qwen3"]) {
1733        return "qwen3".to_string();
1734    }
1735    if contains_any(&id, &["gemma4", "gemma-4"]) {
1736        return "gemma4".to_string();
1737    }
1738    let family = derive_model_family(provider, model_id);
1739    if family == "unknown" {
1740        "unknown".to_string()
1741    } else {
1742        family
1743    }
1744}
1745
1746fn contains_any(haystack: &str, needles: &[&str]) -> bool {
1747    needles.iter().any(|needle| haystack.contains(needle))
1748}
1749
1750fn starts_with_any(haystack: &str, prefixes: &[&str]) -> bool {
1751    prefixes.iter().any(|prefix| haystack.starts_with(prefix))
1752}
1753
1754fn is_openai_reasoning_model(id: &str) -> bool {
1755    starts_with_any(id, &["o1", "o3", "o4"])
1756        || contains_any(
1757            id,
1758            &[
1759                "/o1", "/o3", "/o4", ":o1", ":o3", ":o4", ".o1", ".o3", ".o4",
1760            ],
1761        )
1762}
1763
1764fn normalize_identifier_token(value: &str) -> String {
1765    value
1766        .trim()
1767        .to_ascii_lowercase()
1768        .chars()
1769        .map(|ch| {
1770            if ch.is_ascii_alphanumeric() || ch == '-' {
1771                ch
1772            } else {
1773                '-'
1774            }
1775        })
1776        .collect::<String>()
1777        .split('-')
1778        .filter(|part| !part.is_empty())
1779        .collect::<Vec<_>>()
1780        .join("-")
1781}
1782
1783/// Get provider config for resolving base_url, auth, etc.
1784pub fn provider_config(name: &str) -> Option<ProviderDef> {
1785    effective_config().providers.get(name).cloned()
1786}
1787
1788pub fn provider_protocol(name: &str) -> Option<String> {
1789    provider_config(name).and_then(|def| def.protocol)
1790}
1791
1792pub fn provider_uses_acp(name: &str) -> bool {
1793    provider_protocol(name)
1794        .as_deref()
1795        .is_some_and(|protocol| protocol.eq_ignore_ascii_case("acp"))
1796}
1797
1798/// Get model-specific default parameters (temperature, etc.).
1799/// Matches glob patterns in model_defaults keys.
1800pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
1801    let config = effective_config();
1802    let mut params = BTreeMap::new();
1803    for (pattern, defaults) in &config.model_defaults {
1804        if glob_match(pattern, model_id) {
1805            for (k, v) in defaults {
1806                params.insert(k.clone(), v.clone());
1807            }
1808        }
1809    }
1810    params
1811}
1812
1813/// Get per-role LLM defaults, e.g. `[model_roles.merge]`.
1814///
1815/// Role defaults are intentionally shaped like ordinary `llm_call` options:
1816/// callers can pin `provider`/`model`, install `route_policy` or `prefer`,
1817/// and tune budget/latency knobs without creating a parallel routing stack.
1818/// Environment variables provide a lightweight operational override for
1819/// merge/fast-apply workers:
1820///
1821/// - `HARN_LLM_MERGE_PROVIDER`, `HARN_LLM_MERGE_MODEL`,
1822///   `HARN_LLM_MERGE_ROUTE_POLICY`
1823/// - `HARN_LLM_FAST_APPLY_PROVIDER`, `HARN_LLM_FAST_APPLY_MODEL`,
1824///   `HARN_LLM_FAST_APPLY_ROUTE_POLICY`
1825/// - `HARN_LLM_ROLE_<ROLE>_PROVIDER`, `_MODEL`, `_ROUTE_POLICY`
1826pub fn model_role_defaults(role: &str) -> BTreeMap<String, toml::Value> {
1827    let normalized = normalize_model_role_name(role);
1828    if normalized.is_empty() {
1829        return BTreeMap::new();
1830    }
1831    let config = effective_config();
1832    let mut params = BTreeMap::new();
1833    for key in role_lookup_keys(&normalized) {
1834        extend_model_role_defaults(&config, &key, &mut params);
1835    }
1836    apply_model_role_env_overrides(&normalized, &mut params);
1837    params
1838}
1839
1840fn extend_model_role_defaults(
1841    config: &ProvidersConfig,
1842    role: &str,
1843    params: &mut BTreeMap<String, toml::Value>,
1844) {
1845    for (configured_role, defaults) in &config.model_roles {
1846        if normalize_model_role_name(configured_role) == role {
1847            params.extend(defaults.clone());
1848        }
1849    }
1850    if let Some(defaults) = config.model_roles.get(role) {
1851        params.extend(defaults.clone());
1852    }
1853}
1854
1855fn normalize_model_role_name(role: &str) -> String {
1856    role.trim().to_ascii_lowercase().replace('-', "_")
1857}
1858
1859fn role_lookup_keys(role: &str) -> Vec<String> {
1860    if role == "merge" {
1861        vec!["fast_apply".to_string(), "merge".to_string()]
1862    } else if role == "fast_apply" {
1863        vec!["merge".to_string(), "fast_apply".to_string()]
1864    } else {
1865        vec![role.to_string()]
1866    }
1867}
1868
1869fn role_env_token(role: &str) -> String {
1870    role.chars()
1871        .map(|ch| {
1872            if ch.is_ascii_alphanumeric() {
1873                ch.to_ascii_uppercase()
1874            } else {
1875                '_'
1876            }
1877        })
1878        .collect::<String>()
1879        .split('_')
1880        .filter(|part| !part.is_empty())
1881        .collect::<Vec<_>>()
1882        .join("_")
1883}
1884
1885fn apply_model_role_env_overrides(role: &str, params: &mut BTreeMap<String, toml::Value>) {
1886    for alias in role_env_aliases(role) {
1887        apply_model_role_env_var(&format!("HARN_LLM_{alias}_PROVIDER"), "provider", params);
1888        apply_model_role_env_var(&format!("HARN_LLM_{alias}_MODEL"), "model", params);
1889        apply_model_role_env_var(
1890            &format!("HARN_LLM_{alias}_ROUTE_POLICY"),
1891            "route_policy",
1892            params,
1893        );
1894        apply_model_role_env_var(
1895            &format!("HARN_LLM_ROLE_{alias}_PROVIDER"),
1896            "provider",
1897            params,
1898        );
1899        apply_model_role_env_var(&format!("HARN_LLM_ROLE_{alias}_MODEL"), "model", params);
1900        apply_model_role_env_var(
1901            &format!("HARN_LLM_ROLE_{alias}_ROUTE_POLICY"),
1902            "route_policy",
1903            params,
1904        );
1905    }
1906}
1907
1908fn role_env_aliases(role: &str) -> Vec<String> {
1909    let token = role_env_token(role);
1910    if token.is_empty() {
1911        return Vec::new();
1912    }
1913    if token == "MERGE" {
1914        vec!["FAST_APPLY".to_string(), "MERGE".to_string()]
1915    } else if token == "FAST_APPLY" {
1916        vec!["MERGE".to_string(), "FAST_APPLY".to_string()]
1917    } else {
1918        vec![token]
1919    }
1920}
1921
1922fn apply_model_role_env_var(
1923    env_name: &str,
1924    option_name: &str,
1925    params: &mut BTreeMap<String, toml::Value>,
1926) {
1927    let Ok(value) = std::env::var(env_name) else {
1928        return;
1929    };
1930    let trimmed = value.trim();
1931    if trimmed.is_empty() {
1932        return;
1933    }
1934    params.insert(
1935        option_name.to_string(),
1936        toml::Value::String(trimmed.to_string()),
1937    );
1938}
1939
1940/// Get list of configured provider names.
1941pub fn provider_names() -> Vec<String> {
1942    effective_config().providers.keys().cloned().collect()
1943}
1944
1945/// Return every configured alias name, sorted deterministically.
1946pub fn known_model_names() -> Vec<String> {
1947    effective_config().aliases.keys().cloned().collect()
1948}
1949
1950pub fn alias_entries() -> Vec<(String, AliasDef)> {
1951    effective_config().aliases.into_iter().collect()
1952}
1953
1954pub fn alias_tool_calling_entry(alias: &str) -> Option<AliasToolCallingDef> {
1955    effective_config().alias_tool_calling.get(alias).cloned()
1956}
1957
1958/// Return every configured model-catalog entry, sorted by provider then id.
1959pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
1960    let config = effective_config();
1961    model_catalog_entries_with_config(&config)
1962}
1963
1964pub(crate) fn model_catalog_entries_with_config(
1965    config: &ProvidersConfig,
1966) -> Vec<(String, ModelDef)> {
1967    sorted_model_entries_with_config(config)
1968        .into_iter()
1969        .map(|(id, model)| {
1970            let provider = model.provider.clone();
1971            (
1972                id.clone(),
1973                with_effective_capability_tags(id, provider, model),
1974            )
1975        })
1976        .collect()
1977}
1978
1979pub(crate) fn sorted_model_entries_with_config(
1980    config: &ProvidersConfig,
1981) -> Vec<(String, ModelDef)> {
1982    let mut entries: Vec<_> = config
1983        .models
1984        .iter()
1985        .map(|(id, model)| (id.clone(), model.clone()))
1986        .collect();
1987    entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
1988        model_a
1989            .provider
1990            .cmp(&model_b.provider)
1991            .then_with(|| id_a.cmp(id_b))
1992    });
1993    entries
1994}
1995
1996pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
1997    effective_config()
1998        .models
1999        .get(model_id)
2000        .cloned()
2001        .map(|model| {
2002            let provider = model.provider.clone();
2003            with_effective_capability_tags(model_id.to_string(), provider, model)
2004        })
2005}
2006
2007pub fn model_rate_limits(model_id: &str) -> Option<RateLimitsDef> {
2008    model_catalog_entry(model_id).and_then(|model| model.rate_limits)
2009}
2010
2011pub fn wire_model_id(model_id: &str) -> String {
2012    model_catalog_entry(model_id)
2013        .and_then(|model| model.wire_model)
2014        .unwrap_or_else(|| model_id.to_string())
2015}
2016
2017pub fn provider_rate_limits(provider: &str) -> Option<RateLimitsDef> {
2018    provider_config(provider).and_then(|provider| {
2019        provider
2020            .rate_limits
2021            .unwrap_or_default()
2022            .with_rpm_fallback(provider.rpm)
2023    })
2024}
2025
2026pub fn model_equivalence_group(model_id: &str) -> Option<String> {
2027    model_catalog_entry(model_id).and_then(|model| {
2028        model
2029            .equivalence_group
2030            .or(model.logical_model)
2031            .filter(|group| !group.trim().is_empty())
2032    })
2033}
2034
2035/// Return same-logical-model routes that can be considered for explicit
2036/// failover or cross-provider experiments. Equivalence is a catalog assertion
2037/// about compatible model weights/family, not wire-level identity.
2038pub fn equivalent_model_catalog_entries(selector: &str) -> Vec<(String, ModelDef)> {
2039    let resolved = resolve_model_info(selector);
2040    let Some(group) = model_equivalence_group(&resolved.id) else {
2041        return Vec::new();
2042    };
2043    let config = effective_config();
2044    let Some(source) = config.models.get(&resolved.id) else {
2045        return Vec::new();
2046    };
2047    let source_caps = crate::llm::capabilities::lookup(&source.provider, &resolved.id);
2048    let source_context = source
2049        .runtime_context_window
2050        .unwrap_or(source.context_window);
2051
2052    sorted_model_entries_with_config(&config)
2053        .into_iter()
2054        .filter(|(id, model)| !(id == &resolved.id && model.provider == resolved.provider))
2055        .filter(|(_, model)| !model.deprecated)
2056        .filter(|(_, model)| model.availability != ModelAvailability::Dedicated)
2057        .filter(|(_, model)| {
2058            model.equivalence_group.as_deref() == Some(group.as_str())
2059                || model.logical_model.as_deref() == Some(group.as_str())
2060        })
2061        .filter(|(id, model)| {
2062            let caps = crate::llm::capabilities::lookup(&model.provider, id);
2063            let candidate_context = model.runtime_context_window.unwrap_or(model.context_window);
2064            candidate_context >= source_context
2065                && (!source_caps.native_tools || caps.native_tools)
2066                && (!source_caps.text_tool_wire_format_supported
2067                    || caps.text_tool_wire_format_supported)
2068                && (!source_caps.reasoning_effort_supported || caps.reasoning_effort_supported)
2069                && source_caps.structured_output_mode == caps.structured_output_mode
2070        })
2071        .map(|(id, model)| {
2072            let provider = model.provider.clone();
2073            (
2074                id.clone(),
2075                with_effective_capability_tags(id, provider, model),
2076            )
2077        })
2078        .collect()
2079}
2080
2081pub fn qc_default_model(provider: &str) -> Option<String> {
2082    std::env::var("BURIN_QC_MODEL")
2083        .ok()
2084        .filter(|value| !value.trim().is_empty())
2085        .or_else(|| {
2086            effective_config()
2087                .qc_defaults
2088                .get(&provider.to_lowercase())
2089                .cloned()
2090        })
2091}
2092
2093pub fn default_model_for_provider(provider: &str) -> String {
2094    if provider_uses_acp(provider) {
2095        return "default".to_string();
2096    }
2097    match provider {
2098        "local" => std::env::var("LOCAL_LLM_MODEL")
2099            .or_else(|_| std::env::var("HARN_LLM_MODEL"))
2100            .unwrap_or_else(|_| "gemma-4-26b-a4b-it".to_string()),
2101        "mlx" => std::env::var("MLX_MODEL_ID")
2102            .unwrap_or_else(|_| "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit".to_string()),
2103        "openai" => "gpt-4o-mini".to_string(),
2104        "ollama" => "llama3.2".to_string(),
2105        "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
2106        _ => "claude-sonnet-4-6".to_string(),
2107    }
2108}
2109
2110pub fn qc_defaults() -> BTreeMap<String, String> {
2111    effective_config().qc_defaults
2112}
2113
2114pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
2115    effective_config()
2116        .models
2117        .get(model_id)
2118        .and_then(|model| model.pricing.clone())
2119}
2120
2121/// Premium per-MTok pricing for a model's accelerated-serving ("fast mode")
2122/// tier, when the catalog declares one. Returns `None` for models with no
2123/// fast tier or a tier that omits explicit pricing — callers fall back to
2124/// standard pricing in that case.
2125pub fn model_fast_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
2126    effective_config()
2127        .models
2128        .get(model_id)
2129        .and_then(|model| model.fast_mode.as_ref())
2130        .and_then(|fast_mode| fast_mode.pricing.clone())
2131}
2132
2133pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
2134    model_pricing_per_mtok(model_id)
2135        .map(|pricing| {
2136            (
2137                pricing.input_per_mtok / 1000.0,
2138                pricing.output_per_mtok / 1000.0,
2139            )
2140        })
2141        .or_else(|| {
2142            let (input, output, _) = provider_economics(provider);
2143            match (input, output) {
2144                (Some(input), Some(output)) => Some((input, output)),
2145                _ => None,
2146            }
2147        })
2148}
2149
2150pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
2151    match auth_env {
2152        AuthEnv::None => Vec::new(),
2153        AuthEnv::Single(name) => vec![name.clone()],
2154        AuthEnv::Multiple(names) => names.clone(),
2155    }
2156}
2157
2158pub fn provider_key_available(provider: &str) -> bool {
2159    let Some(pdef) = provider_config(provider) else {
2160        return provider == "ollama";
2161    };
2162    if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
2163        return true;
2164    }
2165    auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
2166        std::env::var(env_name)
2167            .ok()
2168            .is_some_and(|value| !value.trim().is_empty())
2169    })
2170}
2171
2172pub fn available_provider_names() -> Vec<String> {
2173    provider_names()
2174        .into_iter()
2175        .filter(|provider| provider_key_available(provider))
2176        .collect()
2177}
2178
2179/// Check if a provider advertises a legacy provider-level feature.
2180pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
2181    provider_config(provider)
2182        .map(|p| p.features.iter().any(|f| f == feature))
2183        .unwrap_or(false)
2184}
2185
2186/// Provider-level catalog pricing/latency. Model-specific catalog pricing
2187/// wins when available; this is the adapter-level fallback used by routing
2188/// and portal summaries when a model has no explicit catalog entry.
2189pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
2190    provider_config(provider)
2191        .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
2192        .unwrap_or((None, None, None))
2193}
2194
2195/// The tool-call channel a `tool_format` string addresses.
2196///
2197/// `native` is the provider JSON tool-calling channel; `text` (the canonical
2198/// tagged/heredoc grammar) and `json` (fenced-JSON) are both TEXT-channel
2199/// formats — they ride in the assistant's visible content and parse with a
2200/// text parser. This is the single source of truth for "is this format a
2201/// text-channel format?" so the parity gates, native-tools resolution, and
2202/// tool-result message role all agree.
2203#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2204pub enum ToolFormatChannel {
2205    /// Provider native JSON tool calling.
2206    Native,
2207    /// A text-channel grammar carried in assistant content (`text` or `json`).
2208    Text,
2209}
2210
2211/// Classify a `tool_format` string into its channel, or `None` for an unknown
2212/// value (a typo, or a not-yet-wired format). Callers use this to reject
2213/// unknown formats loudly instead of silently defaulting.
2214///
2215/// EXHAUSTIVE-MATCH GUARD: this `match` is the canonical place tool_format is
2216/// switched. Adding a new format requires a branch here, so a half-wired
2217/// format fails to compile rather than silently reading as text.
2218pub fn tool_format_channel(format: &str) -> Option<ToolFormatChannel> {
2219    match format {
2220        "native" => Some(ToolFormatChannel::Native),
2221        "text" | "json" => Some(ToolFormatChannel::Text),
2222        _ => None,
2223    }
2224}
2225
2226/// True when `format` is a tool_format Harn understands (`native`, `text`, or
2227/// `json`). Used to gate the capability-matrix `preferred_tool_format` so a
2228/// pinned format is honored, while an unknown value falls through to the
2229/// native/text heuristic.
2230pub fn is_known_tool_format(format: &str) -> bool {
2231    tool_format_channel(format).is_some()
2232}
2233
2234/// Resolve the default tool format for a model+provider combination.
2235/// Priority: alias `tool_format` (matched by model ID) > provider/model
2236/// capability matrix > legacy provider feature > "json" (the global
2237/// text-channel default; heredoc "text" is opt-in via a pin or explicit
2238/// request).
2239pub fn default_tool_format(model: &str, provider: &str) -> String {
2240    let config = effective_config();
2241    default_tool_format_with_config(&config, model, provider)
2242}
2243
2244fn default_tool_format_with_config(
2245    config: &ProvidersConfig,
2246    model: &str,
2247    provider: &str,
2248) -> String {
2249    // Aliases match by model ID + provider, or by alias name.
2250    for (name, alias) in &config.aliases {
2251        let matches = (alias.id == model && alias.provider == provider) || name == model;
2252        if matches {
2253            if let Some(ref fmt) = alias.tool_format {
2254                return fmt.clone();
2255            }
2256        }
2257    }
2258    let capabilities = crate::llm::capabilities::lookup(provider, model);
2259    if let Some(format) = capabilities.preferred_tool_format.as_deref() {
2260        // A capability row may pin any known tool_format, including `text`
2261        // (heredoc) — the reverse safety valve a regressing model uses to pin
2262        // OFF the global json default. `json` is also honored when a row sets
2263        // it. The exhaustive match below is the EXHAUSTIVE-MATCH GUARD: a new
2264        // tool_format that isn't classified here fails loudly rather than
2265        // silently falling through to the native/json heuristic.
2266        if is_known_tool_format(format) {
2267            return format.to_string();
2268        }
2269    }
2270    let capability_matrix_native = capabilities.native_tools;
2271    let legacy_provider_native = config
2272        .providers
2273        .get(provider)
2274        .map(|p| p.features.iter().any(|f| f == "native_tools"))
2275        .unwrap_or(false);
2276    if capability_matrix_native || legacy_provider_native {
2277        "native".to_string()
2278    } else {
2279        // GLOBAL DEFAULT: a text-channel model with no pinned format resolves
2280        // to fenced-json (`json`), not heredoc (`text`). The win is STRUCTURAL
2281        // — a JSON string can't carry a raw newline, so a `<<EOF` content
2282        // delimiter never collides with the call wrapper (heredoc's known
2283        // production defect: models leak `<<EOF` into file content → the
2284        // `line 0: <<` thrash). Fenced-json swept a clean 1.0/1.0/1.0
2285        // (compliance/parse-determinism/expressiveness) across every model
2286        // measured, and the structural guarantee generalizes to unmeasured
2287        // models. Heredoc (`text`) stays selectable explicitly and via a
2288        // per-model `preferred_tool_format = "text"` pin (the reverse valve).
2289        "json".to_string()
2290    }
2291}
2292
2293fn with_effective_capability_tags(
2294    model_id: String,
2295    provider: String,
2296    mut model: ModelDef,
2297) -> ModelDef {
2298    model.capabilities = effective_model_capability_tags(&provider, &model_id);
2299    model
2300}
2301
2302/// Legacy display tags derived from the canonical provider/model capability
2303/// matrix. The matrix is the source of truth; `models.*.capabilities` in
2304/// providers.toml is accepted only for backwards-compatible parsing.
2305pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
2306    let caps = crate::llm::capabilities::lookup(provider, model_id);
2307    capability_tags_from_capabilities(&caps)
2308}
2309
2310pub(crate) fn capability_tags_from_capabilities(
2311    caps: &crate::llm::capabilities::Capabilities,
2312) -> Vec<String> {
2313    let mut tags = Vec::new();
2314    // Today all Harn chat providers expose streaming. Keep this as a
2315    // transport baseline rather than a duplicated per-model declaration.
2316    tags.push("streaming".to_string());
2317    if caps.native_tools || caps.text_tool_wire_format_supported {
2318        tags.push("tools".to_string());
2319    }
2320    if !caps.tool_search.is_empty() {
2321        tags.push("tool_search".to_string());
2322    }
2323    if caps.vision || caps.vision_supported {
2324        tags.push("vision".to_string());
2325    }
2326    if caps.audio {
2327        tags.push("audio".to_string());
2328    }
2329    if caps.pdf {
2330        tags.push("pdf".to_string());
2331    }
2332    if caps.video {
2333        tags.push("video".to_string());
2334    }
2335    if caps.files_api_supported {
2336        tags.push("files".to_string());
2337    }
2338    if caps.prompt_caching {
2339        tags.push("prompt_caching".to_string());
2340    }
2341    if !caps.thinking_modes.is_empty() {
2342        tags.push("thinking".to_string());
2343    }
2344    if caps.interleaved_thinking_supported
2345        || caps
2346            .thinking_modes
2347            .iter()
2348            .any(|mode| mode == "adaptive" || mode == "effort")
2349    {
2350        tags.push("extended_thinking".to_string());
2351    }
2352    if caps.structured_output.is_some() || caps.json_schema.is_some() {
2353        tags.push("structured_output".to_string());
2354    }
2355    tags
2356}
2357
2358/// Resolve a tier or alias into a concrete model/provider pair.
2359pub fn resolve_tier_model(
2360    target: &str,
2361    preferred_provider: Option<&str>,
2362) -> Option<(String, String)> {
2363    let config = effective_config();
2364
2365    let candidate_aliases = if let Some(provider) = preferred_provider {
2366        vec![
2367            format!("{provider}/{target}"),
2368            format!("{provider}:{target}"),
2369            format!("tier/{target}"),
2370            target.to_string(),
2371        ]
2372    } else {
2373        vec![format!("tier/{target}"), target.to_string()]
2374    };
2375
2376    for alias_name in candidate_aliases {
2377        if let Some(alias) = config.aliases.get(&alias_name) {
2378            return Some((alias.id.clone(), alias.provider.clone()));
2379        }
2380    }
2381
2382    None
2383}
2384
2385/// Return all configured alias-backed model/provider pairs whose resolved
2386/// model falls into the requested capability tier. The result is de-duplicated
2387/// and sorted deterministically by provider then model id.
2388pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
2389    let config = effective_config();
2390    let mut seen = std::collections::BTreeSet::new();
2391    let mut candidates = Vec::new();
2392
2393    for alias in config.aliases.values() {
2394        let pair = (alias.id.clone(), alias.provider.clone());
2395        if seen.contains(&pair) {
2396            continue;
2397        }
2398        if model_tier(&alias.id) == target {
2399            seen.insert(pair.clone());
2400            candidates.push(pair);
2401        }
2402    }
2403
2404    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2405        provider_a
2406            .cmp(provider_b)
2407            .then_with(|| model_a.cmp(model_b))
2408    });
2409    candidates
2410}
2411
2412/// Return all configured alias-backed model/provider pairs. Used by routing
2413/// policies that need to compare alternatives across tiers.
2414pub fn all_model_candidates() -> Vec<(String, String)> {
2415    let config = effective_config();
2416    let mut seen = std::collections::BTreeSet::new();
2417    let mut candidates = Vec::new();
2418
2419    for alias in config.aliases.values() {
2420        let pair = (alias.id.clone(), alias.provider.clone());
2421        if seen.insert(pair.clone()) {
2422            candidates.push(pair);
2423        }
2424    }
2425
2426    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2427        provider_a
2428            .cmp(provider_b)
2429            .then_with(|| model_a.cmp(model_b))
2430    });
2431    candidates
2432}
2433
2434pub fn pick_complementary_reviewer(
2435    options: ComplementaryReviewerOptions,
2436) -> ComplementaryReviewerSelection {
2437    let config = effective_config();
2438    let mut author = resolve_model_info(&options.author_model);
2439    if let Some(provider) = options
2440        .author_provider
2441        .as_deref()
2442        .map(str::trim)
2443        .filter(|provider| !provider.is_empty())
2444    {
2445        author.provider = provider.to_string();
2446        author.family = model_family_with_config(&config, &author.provider, &author.id);
2447        author.lineage = model_lineage_with_config(&config, &author.provider, &author.id);
2448        author.tool_format = default_tool_format_with_config(&config, &author.id, &author.provider);
2449    }
2450    let author_entry = config.models.get(&author.id);
2451    let author_identity = complementary_identity(
2452        author.id.clone(),
2453        author.provider.clone(),
2454        author.family.clone(),
2455        author.lineage.clone(),
2456        author.tier.clone(),
2457        author_entry.and_then(|model| model.pricing.clone()),
2458    );
2459
2460    let fallback =
2461        |code: ReviewerFallbackCode, fallback_reason: String| ComplementaryReviewerSelection {
2462            intent: options.intent.as_str().to_string(),
2463            reviewer: author_identity.clone(),
2464            estimated_incremental_cost: cost_estimate(
2465                author_identity.pricing.as_ref(),
2466                author_identity.pricing.as_ref(),
2467            ),
2468            author: author_identity.clone(),
2469            fallback: true,
2470            reason: format!(
2471                "using author model {} because {fallback_reason}",
2472                author_identity.id
2473            ),
2474            fallback_reason: Some(fallback_reason),
2475            fallback_code: Some(code.as_code().to_string()),
2476        };
2477
2478    if author_identity.family == "unknown" {
2479        return fallback(
2480            ReviewerFallbackCode::UnknownAuthorFamily,
2481            "author model family is unknown".to_string(),
2482        );
2483    }
2484
2485    let preferred_families = author_entry
2486        .map(|model| model.complementary_with.clone())
2487        .unwrap_or_default();
2488    let author_refs = reviewer_match_refs(&author_identity);
2489    let mut rejected_by_price = 0usize;
2490    let mut diff_family_seen = 0usize;
2491    let mut candidates = Vec::new();
2492
2493    for (id, model) in config.models.iter() {
2494        if id == &author_identity.id && model.provider == author_identity.provider {
2495            continue;
2496        }
2497        if model.deprecated || model.availability != ModelAvailability::Serverless {
2498            continue;
2499        }
2500        let family = model_family_with_config(&config, &model.provider, id);
2501        if family == "unknown" || family == author_identity.family {
2502            continue;
2503        }
2504        diff_family_seen += 1;
2505        let lineage = model_lineage_with_config(&config, &model.provider, id);
2506        let candidate_identity = complementary_identity(
2507            id.clone(),
2508            model.provider.clone(),
2509            family,
2510            lineage,
2511            model_tier_with_config(&config, id),
2512            model.pricing.clone(),
2513        );
2514        if model
2515            .avoid_as_reviewer_for
2516            .iter()
2517            .any(|selector| refs_contain_selector(&author_refs, selector))
2518        {
2519            continue;
2520        }
2521        if exceeds_price_cap(
2522            author_identity.pricing.as_ref(),
2523            candidate_identity.pricing.as_ref(),
2524            options.max_price_multiplier,
2525        ) {
2526            rejected_by_price += 1;
2527            continue;
2528        }
2529        let score = reviewer_score(
2530            &options,
2531            &author_identity,
2532            &candidate_identity,
2533            model,
2534            &preferred_families,
2535        );
2536        candidates.push(ReviewerCandidate {
2537            identity: candidate_identity,
2538            score,
2539        });
2540    }
2541
2542    candidates.sort_by(|left, right| {
2543        right
2544            .score
2545            .partial_cmp(&left.score)
2546            .unwrap_or(std::cmp::Ordering::Equal)
2547            .then_with(|| left.identity.provider.cmp(&right.identity.provider))
2548            .then_with(|| left.identity.id.cmp(&right.identity.id))
2549    });
2550
2551    let Some(best) = candidates.into_iter().next() else {
2552        if rejected_by_price > 0 {
2553            let cap = options.max_price_multiplier.unwrap_or_default();
2554            return fallback(
2555                ReviewerFallbackCode::NoDiffFamilyWithinPrice,
2556                format!("no different-family reviewer satisfied max_price_multiplier {cap}"),
2557            );
2558        }
2559        if diff_family_seen == 0 {
2560            return fallback(
2561                ReviewerFallbackCode::NoDiffFamilyServerless,
2562                "no active serverless different-family reviewer is cataloged".to_string(),
2563            );
2564        }
2565        return fallback(
2566            ReviewerFallbackCode::AllDiffFamilyExcluded,
2567            "all different-family reviewer candidates were excluded".to_string(),
2568        );
2569    };
2570
2571    let estimate = cost_estimate(
2572        best.identity.pricing.as_ref(),
2573        author_identity.pricing.as_ref(),
2574    );
2575    ComplementaryReviewerSelection {
2576        intent: options.intent.as_str().to_string(),
2577        reason: reviewer_reason(&author_identity, &best.identity, estimate.as_ref()),
2578        estimated_incremental_cost: estimate,
2579        author: author_identity,
2580        reviewer: best.identity,
2581        fallback: false,
2582        fallback_reason: None,
2583        fallback_code: None,
2584    }
2585}
2586
2587#[derive(Debug, Clone)]
2588struct ReviewerCandidate {
2589    identity: ComplementaryModelIdentity,
2590    score: f64,
2591}
2592
2593fn complementary_identity(
2594    id: String,
2595    provider: String,
2596    family: String,
2597    lineage: String,
2598    tier: String,
2599    pricing: Option<ModelPricing>,
2600) -> ComplementaryModelIdentity {
2601    ComplementaryModelIdentity {
2602        id,
2603        provider,
2604        family,
2605        lineage,
2606        tier,
2607        pricing,
2608    }
2609}
2610
2611fn reviewer_score(
2612    options: &ComplementaryReviewerOptions,
2613    author: &ComplementaryModelIdentity,
2614    candidate: &ComplementaryModelIdentity,
2615    model: &ModelDef,
2616    preferred_families: &[String],
2617) -> f64 {
2618    let candidate_refs = reviewer_match_refs(candidate);
2619    let mut score = 0.0;
2620    if let Some(rank) = preferred_families
2621        .iter()
2622        .position(|selector| refs_contain_selector(&candidate_refs, selector))
2623    {
2624        score += 1_000.0 - rank as f64;
2625    }
2626    if candidate.provider != author.provider {
2627        score += 100.0;
2628    }
2629    score += match tier_distance(&author.tier, &candidate.tier) {
2630        0 => 80.0,
2631        1 => 45.0,
2632        2 => 15.0,
2633        _ => 0.0,
2634    };
2635    for strength in intent_strengths(options.intent) {
2636        if model.strengths.iter().any(|tag| tag == strength) {
2637            score += 8.0;
2638        }
2639    }
2640    if model.capabilities.iter().any(|tag| tag == "tools") {
2641        score += 4.0;
2642    }
2643    if let (Some(author_total), Some(candidate_total)) = (
2644        pricing_total(author.pricing.as_ref()),
2645        pricing_total(candidate.pricing.as_ref()),
2646    ) {
2647        if author_total > 0.0 {
2648            let ratio = candidate_total / author_total;
2649            if ratio <= 1.0 {
2650                score += 20.0;
2651            }
2652            score -= (ratio - 1.0).abs().min(10.0) * 8.0;
2653        }
2654    }
2655    score
2656}
2657
2658fn intent_strengths(intent: ComplementaryReviewerIntent) -> &'static [&'static str] {
2659    match intent {
2660        ComplementaryReviewerIntent::Review => &["reasoning", "coding", "tool_use"],
2661        ComplementaryReviewerIntent::Critique => &["reasoning", "long_context", "tool_use"],
2662        ComplementaryReviewerIntent::PlanReview => {
2663            &["reasoning", "coding", "agentic", "long_context", "tool_use"]
2664        }
2665    }
2666}
2667
2668fn tier_distance(left: &str, right: &str) -> u8 {
2669    let left = tier_rank(left);
2670    let right = tier_rank(right);
2671    left.abs_diff(right)
2672}
2673
2674fn tier_rank(tier: &str) -> u8 {
2675    match tier {
2676        "small" => 0,
2677        "mid" => 1,
2678        "frontier" | "reasoning" => 2,
2679        _ => 1,
2680    }
2681}
2682
2683fn exceeds_price_cap(
2684    author_pricing: Option<&ModelPricing>,
2685    candidate_pricing: Option<&ModelPricing>,
2686    max_price_multiplier: Option<f64>,
2687) -> bool {
2688    let Some(max_price_multiplier) = max_price_multiplier else {
2689        return false;
2690    };
2691    let Some(author_total) = pricing_total(author_pricing) else {
2692        return false;
2693    };
2694    let Some(candidate_total) = pricing_total(candidate_pricing) else {
2695        return true;
2696    };
2697    author_total > 0.0 && candidate_total > author_total * max_price_multiplier
2698}
2699
2700fn cost_estimate(
2701    reviewer_pricing: Option<&ModelPricing>,
2702    author_pricing: Option<&ModelPricing>,
2703) -> Option<ComplementaryCostEstimate> {
2704    let reviewer_pricing = reviewer_pricing?;
2705    let total_per_mtok = reviewer_pricing.input_per_mtok + reviewer_pricing.output_per_mtok;
2706    let multiplier_vs_author = pricing_total(author_pricing)
2707        .filter(|author_total| *author_total > 0.0)
2708        .map(|author_total| total_per_mtok / author_total);
2709    Some(ComplementaryCostEstimate {
2710        input_per_mtok: reviewer_pricing.input_per_mtok,
2711        output_per_mtok: reviewer_pricing.output_per_mtok,
2712        total_per_mtok,
2713        multiplier_vs_author,
2714    })
2715}
2716
2717fn pricing_total(pricing: Option<&ModelPricing>) -> Option<f64> {
2718    pricing.map(|pricing| pricing.input_per_mtok + pricing.output_per_mtok)
2719}
2720
2721fn reviewer_reason(
2722    author: &ComplementaryModelIdentity,
2723    reviewer: &ComplementaryModelIdentity,
2724    estimate: Option<&ComplementaryCostEstimate>,
2725) -> String {
2726    let cost = estimate
2727        .and_then(|estimate| estimate.multiplier_vs_author)
2728        .map(|multiplier| format!("{multiplier:.2}x the author model price"))
2729        .unwrap_or_else(|| "price ratio unavailable".to_string());
2730    format!(
2731        "selected {} via {} because family {} differs from author family {}, tier {} matches author tier {}, and {}",
2732        reviewer.id,
2733        reviewer.provider,
2734        reviewer.family,
2735        author.family,
2736        reviewer.tier,
2737        author.tier,
2738        cost
2739    )
2740}
2741
2742fn reviewer_match_refs(identity: &ComplementaryModelIdentity) -> BTreeSet<String> {
2743    BTreeSet::from([
2744        identity.id.to_ascii_lowercase(),
2745        identity.provider.to_ascii_lowercase(),
2746        format!("{}/{}", identity.provider, identity.id).to_ascii_lowercase(),
2747        format!("{}:{}", identity.provider, identity.id).to_ascii_lowercase(),
2748        identity.family.to_ascii_lowercase(),
2749        identity.lineage.to_ascii_lowercase(),
2750    ])
2751}
2752
2753fn refs_contain_selector(refs: &BTreeSet<String>, selector: &str) -> bool {
2754    normalized_catalog_token(Some(selector))
2755        .or_else(|| Some(selector.trim().to_ascii_lowercase()))
2756        .is_some_and(|selector| refs.contains(&selector))
2757}
2758
2759// Model-pattern matching for forms like "claude-*", "qwen/*", "ollama:*".
2760// Shared workspace semantics live in `harn-glob`.
2761use harn_glob::match_name as glob_match;
2762
2763fn dirs_or_home() -> Option<String> {
2764    crate::user_dirs::home_dir().map(|home| home.to_string_lossy().into_owned())
2765}
2766
2767/// Resolve the effective base URL for a provider, checking the `base_url_env`
2768/// override first, then falling back to the configured `base_url`.
2769pub fn resolve_base_url(pdef: &ProviderDef) -> String {
2770    if let Some(env_name) = &pdef.base_url_env {
2771        if let Ok(val) = std::env::var(env_name) {
2772            // Strip surrounding quotes that some .env parsers leave intact.
2773            let trimmed = val.trim().trim_matches('"').trim_matches('\'');
2774            if !trimmed.is_empty() {
2775                return trimmed.to_string();
2776            }
2777        }
2778    }
2779    pdef.base_url.clone()
2780}
2781
2782/// Embedded copy of generated `llm/providers.toml`, built from
2783/// `llm/catalog_sources/**/*.toml` by `harn provider catalog build-config`.
2784/// Edit the fragments, not this generated snapshot or this string.
2785const EMBEDDED_PROVIDERS_TOML: &str = include_str!("llm/providers.toml");
2786
2787/// Parse the embedded generated `providers.toml` into the runtime
2788/// `ProvidersConfig`.
2789///
2790/// Hosts overlay this base via `HARN_PROVIDERS_CONFIG`,
2791/// `~/.config/harn/providers.toml`, `harn.toml`, package-manifest
2792/// `[llm]` sections, and per-run `set_user_overrides(...)`. The same
2793/// Serde shape applies at every layer, so there is exactly one schema to
2794/// keep coherent — no parallel Rust-literal catalog.
2795///
2796/// We `expect` on parse failure because the file is bundled into the
2797/// binary at compile time; a malformed embedded catalog is a build-time
2798/// invariant violation that should fail every test, not silently
2799/// degrade in production.
2800fn default_config() -> ProvidersConfig {
2801    parse_config_toml(EMBEDDED_PROVIDERS_TOML)
2802        .expect("embedded providers.toml must parse — invariant checked by harn-vm tests")
2803}
2804
2805#[cfg(test)]
2806fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
2807    let mut config = default_config();
2808    config.merge_from(&overlay);
2809    config
2810}
2811
2812#[cfg(test)]
2813mod tests {
2814    use super::*;
2815
2816    fn reset_overrides() {
2817        clear_user_overrides();
2818    }
2819
2820    #[test]
2821    fn resolve_model_info_guards_bad_native_pin_on_unreliable_route() {
2822        reset_overrides();
2823        // An alias that pins tool_format = "native" for DeepSeek V3.2 on
2824        // OpenRouter — a route the capability registry knows is
2825        // native_unreliable (drops to unparsed DSML text). Before the
2826        // footgun-removal gate this bad pin survived resolution verbatim and
2827        // produced vanishing tool calls; now it is steered to the route's safe
2828        // text-channel format.
2829        let overlay = parse_config_toml(
2830            "[aliases.guard-ds]\nid = \"deepseek/deepseek-v3.2\"\nprovider = \"openrouter\"\ntool_format = \"native\"\n",
2831        )
2832        .expect("overlay parses");
2833        set_user_overrides(Some(overlay));
2834        let resolved = resolve_model_info("guard-ds");
2835        assert_eq!(
2836            resolved.tool_format, "text",
2837            "a native pin on a native_unreliable route must be auto-corrected to text"
2838        );
2839        clear_user_overrides();
2840
2841        // A safe native pin (a route with no adverse parity) is untouched.
2842        let overlay_ok = parse_config_toml(
2843            "[aliases.guard-ds-ok]\nid = \"deepseek/deepseek-v3-base\"\nprovider = \"openrouter\"\ntool_format = \"native\"\n",
2844        )
2845        .expect("overlay parses");
2846        set_user_overrides(Some(overlay_ok));
2847        let resolved_ok = resolve_model_info("guard-ds-ok");
2848        assert_eq!(resolved_ok.tool_format, "native");
2849        clear_user_overrides();
2850    }
2851
2852    #[test]
2853    fn auto_select_prefers_local_provider_without_cloud_credentials() {
2854        // A catalog whose only provider is local and auth-free resolves to it
2855        // regardless of ambient cloud API keys: no preferred/credentialed cloud
2856        // provider is present, so the local fallback wins deterministically.
2857        let config = parse_config_toml(
2858            "[providers.ollama]\nbase_url = \"http://localhost:11434\"\nchat_endpoint = \"/v1/chat/completions\"\n",
2859        )
2860        .expect("config parses");
2861        assert!(provider_is_local(config.providers.get("ollama").unwrap()));
2862        assert_eq!(auto_select_provider(&config), "ollama");
2863    }
2864
2865    #[test]
2866    fn auto_select_falls_back_to_documented_default_when_empty() {
2867        let config = parse_config_toml("").expect("config parses");
2868        assert_eq!(auto_select_provider(&config), FALLBACK_PROVIDER);
2869    }
2870
2871    #[test]
2872    fn suppress_routes_parse_and_merge_dedupe() {
2873        let mut base =
2874            parse_config_toml("[suppress]\nroutes = [\"together:Qwen/Qwen3-Coder-Next-FP8\"]\n")
2875                .expect("base parses");
2876        assert!(!base.is_empty(), "a suppress-only overlay is not empty");
2877        let overlay = parse_config_toml(
2878            "[suppress]\nroutes = [\"together:Qwen/Qwen3-Coder-Next-FP8\", \"ollama:img:tag\"]\n",
2879        )
2880        .expect("overlay parses");
2881        base.merge_from(&overlay);
2882        assert_eq!(
2883            base.suppress.routes,
2884            vec![
2885                "together:Qwen/Qwen3-Coder-Next-FP8".to_string(),
2886                "ollama:img:tag".to_string(),
2887            ],
2888            "merge appends new selectors without duplicating existing ones"
2889        );
2890    }
2891
2892    #[test]
2893    fn test_glob_match_prefix() {
2894        assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
2895        assert!(glob_match("gpt-*", "gpt-4o"));
2896        assert!(!glob_match("claude-*", "gpt-4o"));
2897    }
2898
2899    #[test]
2900    fn test_glob_match_suffix() {
2901        assert!(glob_match("*-latest", "llama3.2-latest"));
2902        assert!(!glob_match("*-latest", "llama3.2"));
2903    }
2904
2905    #[test]
2906    fn test_glob_match_middle() {
2907        assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
2908        assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
2909    }
2910
2911    #[test]
2912    fn test_glob_match_exact() {
2913        assert!(glob_match("gpt-4o", "gpt-4o"));
2914        assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
2915    }
2916
2917    #[test]
2918    fn test_infer_provider_from_defaults() {
2919        let _guard = crate::llm::env_guard();
2920        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2921        unsafe {
2922            std::env::remove_var("HARN_DEFAULT_PROVIDER");
2923        }
2924
2925        assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
2926        assert_eq!(infer_provider("gpt-4o"), "openai");
2927        assert_eq!(infer_provider("o1-preview"), "openai");
2928        assert_eq!(infer_provider("o3-mini"), "openai");
2929        assert_eq!(infer_provider("o4-mini"), "openai");
2930        assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
2931        assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
2932        assert_eq!(infer_provider("llama3.2:latest"), "ollama");
2933        assert_eq!(infer_provider("unknown-model"), "anthropic");
2934
2935        unsafe {
2936            match prev_default_provider {
2937                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2938                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2939            }
2940        }
2941    }
2942
2943    #[test]
2944    fn test_infer_provider_prefix_rules() {
2945        assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
2946        assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
2947        // Even when the id also contains `/`, the local transport prefix wins.
2948        assert_eq!(infer_provider("local:owner/model"), "ollama");
2949        assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
2950    }
2951
2952    #[test]
2953    fn test_openrouter_inference_requires_one_slash() {
2954        let _guard = crate::llm::env_guard();
2955        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2956        unsafe {
2957            std::env::remove_var("HARN_DEFAULT_PROVIDER");
2958        }
2959
2960        assert_eq!(infer_provider("org/model"), "openrouter");
2961        assert_eq!(infer_provider("org/team/model"), "anthropic");
2962
2963        unsafe {
2964            match prev_default_provider {
2965                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2966                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2967            }
2968        }
2969    }
2970
2971    #[test]
2972    fn test_cerebras_inference_beats_openrouter_slash_fallback() {
2973        let _guard = crate::llm::env_guard();
2974        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2975        unsafe {
2976            std::env::remove_var("HARN_DEFAULT_PROVIDER");
2977        }
2978
2979        assert_eq!(infer_provider("cerebras/gpt-oss-120b"), "cerebras");
2980        assert_eq!(infer_provider("cerebras/zai-glm-4.7"), "cerebras");
2981        assert_eq!(infer_provider("cerebras/llama-3.3-70b"), "cerebras");
2982
2983        unsafe {
2984            match prev_default_provider {
2985                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2986                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2987            }
2988        }
2989    }
2990
2991    #[test]
2992    fn test_direct_catalog_model_id_resolves_to_catalog_provider() {
2993        // Bare model IDs that the embedded catalog hosts on Cerebras must
2994        // not be misrouted by the generic `gpt-*` / single-slash inference
2995        // fallbacks. Regression for harn#2142 (model-info routed
2996        // `gpt-oss-120b` to openai, breaking host TUI credential checks).
2997        let _guard = crate::llm::env_guard();
2998        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2999        unsafe {
3000            std::env::remove_var("HARN_DEFAULT_PROVIDER");
3001        }
3002
3003        for model in ["gpt-oss-120b", "zai-glm-4.7", "llama-3.3-70b"] {
3004            assert_eq!(
3005                infer_provider(model),
3006                "cerebras",
3007                "{model} should route to its catalog provider"
3008            );
3009            let resolved = resolve_model_info(model);
3010            assert_eq!(resolved.id, model);
3011            assert_eq!(resolved.provider, "cerebras");
3012        }
3013
3014        unsafe {
3015            match prev_default_provider {
3016                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3017                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3018            }
3019        }
3020    }
3021
3022    #[test]
3023    fn test_equivalent_model_catalog_entries_use_capability_compatible_routes() {
3024        reset_overrides();
3025
3026        assert_eq!(
3027            wire_model_id("groq/openai/gpt-oss-120b"),
3028            "openai/gpt-oss-120b"
3029        );
3030        assert_eq!(wire_model_id("gpt-oss-120b"), "gpt-oss-120b");
3031
3032        let equivalents = equivalent_model_catalog_entries("gpt-oss-120b");
3033        let ids = equivalents
3034            .iter()
3035            .map(|(id, _)| id.as_str())
3036            .collect::<Vec<_>>();
3037
3038        assert!(
3039            ids.contains(&"groq/openai/gpt-oss-120b"),
3040            "Cerebras GPT-OSS should surface the Groq serving variant"
3041        );
3042        assert!(
3043            !ids.contains(&"gpt-oss-120b"),
3044            "equivalence results should not include the source row"
3045        );
3046        assert!(equivalents.iter().all(|(_, model)| {
3047            model.equivalence_group.as_deref() == Some("openai-gpt-oss-120b")
3048        }));
3049    }
3050
3051    #[test]
3052    fn fireworks_gpt_oss_route_has_real_context_window() {
3053        // Regression: the Fireworks-served `accounts/fireworks/models/gpt-oss-120b`
3054        // wire id had NO catalog row, so its context window resolved to None and
3055        // the agent's auto-compaction budget had nothing to enforce — the prompt
3056        // grew until Fireworks rejected the turn with HTTP 400 [context_overflow]
3057        // (session 019ee303: 197467 tokens > 131071 max). Cataloging the real
3058        // 131072 window lets compaction trigger before the hard limit.
3059        reset_overrides();
3060
3061        let entry = model_catalog_entry("accounts/fireworks/models/gpt-oss-120b")
3062            .expect("Fireworks gpt-oss-120b must be in the model catalog");
3063        assert_eq!(entry.context_window, 131_072);
3064        assert_eq!(entry.provider, "fireworks");
3065        assert_eq!(
3066            entry.equivalence_group.as_deref(),
3067            Some("openai-gpt-oss-120b"),
3068        );
3069    }
3070
3071    #[test]
3072    fn test_user_catalog_overlay_re_homes_model_provider() {
3073        // Users can re-home a built-in model by overlaying a catalog row;
3074        // the exact-match catalog lookup must honor overlays as well as the
3075        // embedded TOML.
3076        reset_overrides();
3077        let mut overlay = ProvidersConfig::default();
3078        overlay.models.insert(
3079            "gpt-4o".to_string(),
3080            ModelDef {
3081                name: "GPT-4o via OpenRouter".to_string(),
3082                provider: "openrouter".to_string(),
3083                context_window: 128_000,
3084                logical_model: None,
3085                equivalence_group: None,
3086                served_variant: None,
3087                wire_model: None,
3088                api_dialect: None,
3089                rate_limits: None,
3090                performance: None,
3091                architecture: None,
3092                local_memory: None,
3093                runtime_context_window: None,
3094                stream_timeout: None,
3095                capabilities: Vec::new(),
3096                pricing: None,
3097                deprecated: false,
3098                deprecation_note: None,
3099                superseded_by: None,
3100                fast_mode: None,
3101                quality_tags: Vec::new(),
3102                availability: ModelAvailability::default(),
3103                tier: None,
3104                open_weight: None,
3105                strengths: Vec::new(),
3106                benchmarks: std::collections::BTreeMap::new(),
3107                family: None,
3108                lineage: None,
3109                complementary_with: Vec::new(),
3110                avoid_as_reviewer_for: Vec::new(),
3111            },
3112        );
3113        set_user_overrides(Some(overlay));
3114
3115        assert_eq!(infer_provider("gpt-4o"), "openrouter");
3116
3117        reset_overrides();
3118    }
3119
3120    #[test]
3121    fn test_resolve_model_info_normalizes_provider_prefixes() {
3122        let local = resolve_model_info("local:gemma-4-e4b-it");
3123        assert_eq!(local.id, "gemma-4-e4b-it");
3124        assert_eq!(local.provider, "ollama");
3125
3126        let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
3127        assert_eq!(ollama.id, "qwen3:30b-a3b");
3128        assert_eq!(ollama.provider, "ollama");
3129
3130        let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
3131        assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
3132        assert_eq!(hf.provider, "huggingface");
3133
3134        let cerebras = resolve_model_info("cerebras/gpt-oss-120b");
3135        assert_eq!(cerebras.id, "gpt-oss-120b");
3136        assert_eq!(cerebras.provider, "cerebras");
3137
3138        let cerebras_glm = resolve_model_info("cerebras/zai-glm-4.7");
3139        assert_eq!(cerebras_glm.id, "zai-glm-4.7");
3140        assert_eq!(cerebras_glm.provider, "cerebras");
3141    }
3142
3143    #[test]
3144    fn test_model_tier_from_defaults() {
3145        // Tier is now self-declared per model row in providers.toml.
3146        // Models that match an entry use the declared value; unknown
3147        // model ids fall through to `tier_defaults.default` ("mid").
3148        assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
3149        assert_eq!(model_tier("gpt-4o"), "frontier");
3150        assert_eq!(model_tier("Qwen/Qwen3.5-9B"), "small");
3151        assert_eq!(model_tier("deepseek-v4-flash"), "mid");
3152        assert_eq!(model_tier("deepseek-v4-pro"), "frontier");
3153        assert_eq!(model_tier("MiniMax-M2.7"), "frontier");
3154        assert_eq!(model_tier("glm-5.1"), "frontier");
3155        // Unknown ids resolve to the default.
3156        assert_eq!(model_tier("definitely-not-a-real-model"), "mid");
3157    }
3158
3159    #[test]
3160    fn test_model_family_preserves_underlying_hosted_lineage() {
3161        assert_eq!(
3162            model_family("openrouter", "anthropic/claude-sonnet-4-6"),
3163            "anthropic-claude"
3164        );
3165        assert_eq!(
3166            model_family("openrouter", "google/gemini-2.5-flash"),
3167            "google-gemini"
3168        );
3169        assert_eq!(
3170            model_family("openrouter", "openai/o3-mini"),
3171            "openai-reasoning"
3172        );
3173        assert_eq!(model_lineage("openrouter", "openai/gpt-5.5"), "openai-gpt5");
3174        assert_eq!(
3175            model_lineage("openrouter", "openai/o3-mini"),
3176            "openai-reasoning"
3177        );
3178        assert_eq!(
3179            model_lineage("anthropic", "claude-opus-4-8"),
3180            "claude-opus-adaptive"
3181        );
3182        assert_eq!(model_lineage("llamacpp", "qwen3.6-35b-a3b"), "qwen3");
3183    }
3184
3185    #[test]
3186    fn test_complementary_reviewer_uses_different_family() {
3187        let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
3188            author_model: "claude-sonnet-4-6".to_string(),
3189            author_provider: None,
3190            intent: ComplementaryReviewerIntent::PlanReview,
3191            max_price_multiplier: Some(3.0),
3192        });
3193
3194        assert!(!selection.fallback, "{selection:?}");
3195        assert_eq!(selection.author.family, "anthropic-claude");
3196        assert_ne!(selection.reviewer.family, selection.author.family);
3197        assert_eq!(selection.reviewer.tier, "frontier");
3198        assert!(selection.estimated_incremental_cost.is_some());
3199        // Success path carries no machine-readable fallback code, so a caller
3200        // can treat `fallback_code.is_some()` as "must not self-review".
3201        assert_eq!(selection.fallback_code, None, "{selection:?}");
3202    }
3203
3204    #[test]
3205    fn test_complementary_reviewer_falls_back_deterministically_on_price_cap() {
3206        let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
3207            author_model: "gpt-4o-mini".to_string(),
3208            author_provider: Some("openai".to_string()),
3209            intent: ComplementaryReviewerIntent::Review,
3210            max_price_multiplier: Some(0.01),
3211        });
3212
3213        assert!(selection.fallback, "{selection:?}");
3214        assert_eq!(selection.reviewer.id, "gpt-4o-mini");
3215        assert_eq!(selection.reviewer.family, selection.author.family);
3216        assert!(selection
3217            .fallback_reason
3218            .as_deref()
3219            .is_some_and(|reason| reason.contains("max_price_multiplier")));
3220        // The machine-readable code is stable regardless of the prose; a caller
3221        // hard-fails an independent-review step by branching on this, never by
3222        // parsing `fallback_reason`.
3223        assert_eq!(
3224            selection.fallback_code.as_deref(),
3225            Some(ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code()),
3226            "{selection:?}"
3227        );
3228        assert_eq!(
3229            ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code(),
3230            "no_diff_family_within_price"
3231        );
3232    }
3233
3234    #[test]
3235    fn test_reviewer_fallback_codes_are_stable_strings() {
3236        // Append-only contract: harn pipelines and Rust callers branch on these
3237        // exact strings, so changing one is a breaking change.
3238        assert_eq!(
3239            ReviewerFallbackCode::UnknownAuthorFamily.as_code(),
3240            "unknown_author_family"
3241        );
3242        assert_eq!(
3243            ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code(),
3244            "no_diff_family_within_price"
3245        );
3246        assert_eq!(
3247            ReviewerFallbackCode::NoDiffFamilyServerless.as_code(),
3248            "no_diff_family_serverless"
3249        );
3250        assert_eq!(
3251            ReviewerFallbackCode::AllDiffFamilyExcluded.as_code(),
3252            "all_diff_family_excluded"
3253        );
3254    }
3255
3256    #[test]
3257    fn test_resolve_model_unknown_alias() {
3258        let (id, provider) = resolve_model("gpt-4o");
3259        assert_eq!(id, "gpt-4o");
3260        assert!(provider.is_none());
3261    }
3262
3263    #[test]
3264    fn test_provider_names() {
3265        let names = provider_names();
3266        assert!(names.len() >= 7);
3267        assert!(names.contains(&"anthropic".to_string()));
3268        assert!(names.contains(&"together".to_string()));
3269        assert!(names.contains(&"local".to_string()));
3270        assert!(names.contains(&"mlx".to_string()));
3271        assert!(names.contains(&"openai".to_string()));
3272        assert!(names.contains(&"ollama".to_string()));
3273        assert!(names.contains(&"bedrock".to_string()));
3274        assert!(names.contains(&"azure_openai".to_string()));
3275        assert!(names.contains(&"vertex".to_string()));
3276    }
3277
3278    #[test]
3279    fn global_provider_file_is_an_overlay_on_builtin_defaults() {
3280        let mut overlay = ProvidersConfig {
3281            default_provider: Some("ollama".to_string()),
3282            ..Default::default()
3283        };
3284        overlay.aliases.insert(
3285            "quickstart".to_string(),
3286            AliasDef {
3287                id: "llama3.2".to_string(),
3288                provider: "ollama".to_string(),
3289                tool_format: None,
3290            },
3291        );
3292
3293        let merged = merge_global_config(overlay);
3294
3295        assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
3296        assert!(merged.providers.contains_key("anthropic"));
3297        assert!(merged.providers.contains_key("ollama"));
3298        assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
3299    }
3300
3301    #[test]
3302    fn partial_provider_overlay_preserves_builtin_provider_metadata() {
3303        let overlay = parse_config_toml(
3304            r#"
3305            [providers.ollama]
3306            base_url = "http://localhost:11435"
3307            extra_headers = { "x-local" = "1" }
3308            "#,
3309        )
3310        .expect("provider overlay parses");
3311
3312        let merged = merge_global_config(overlay);
3313        let ollama = merged
3314            .providers
3315            .get("ollama")
3316            .expect("ollama remains configured");
3317
3318        assert_eq!(ollama.base_url, "http://localhost:11435");
3319        assert_eq!(ollama.auth_style, "none");
3320        assert_eq!(ollama.chat_endpoint, "/api/chat");
3321        assert_eq!(ollama.completion_endpoint.as_deref(), Some("/api/generate"));
3322        assert_eq!(ollama.cost_per_1k_in, Some(0.0));
3323        assert_eq!(ollama.cost_per_1k_out, Some(0.0));
3324        assert_eq!(
3325            ollama
3326                .healthcheck
3327                .as_ref()
3328                .and_then(|healthcheck| healthcheck.path.as_deref()),
3329            Some("/api/tags")
3330        );
3331        assert_eq!(
3332            ollama.extra_headers.get("x-local").map(String::as_str),
3333            Some("1")
3334        );
3335    }
3336
3337    #[test]
3338    fn partial_provider_overlay_can_explicitly_replace_default_auth_style() {
3339        let overlay = parse_config_toml(
3340            r#"
3341            [providers.ollama]
3342            auth_style = "bearer"
3343            auth_env = "OLLAMA_API_KEY"
3344            "#,
3345        )
3346        .expect("provider overlay parses");
3347
3348        let merged = merge_global_config(overlay);
3349        let ollama = merged
3350            .providers
3351            .get("ollama")
3352            .expect("ollama remains configured");
3353
3354        assert_eq!(ollama.auth_style, "bearer");
3355        assert_eq!(auth_env_names(&ollama.auth_env), vec!["OLLAMA_API_KEY"]);
3356        assert_eq!(ollama.chat_endpoint, "/api/chat");
3357    }
3358
3359    #[test]
3360    fn test_resolve_tier_model_default_aliases() {
3361        // Exercise the alias-resolution machinery, not the specific catalog
3362        // value: the model under each tier alias evolves as the embedded
3363        // providers.toml is updated. The invariants worth pinning are the
3364        // provider routing + catalog-registration of the resolved model.
3365        let (model, provider) = resolve_tier_model("frontier", None)
3366            .expect("frontier alias must resolve from the embedded catalog");
3367        assert_eq!(provider, "anthropic");
3368        assert!(
3369            model_catalog_entry(&model)
3370                .is_some_and(|entry| entry.provider == "anthropic" && !entry.deprecated),
3371            "frontier alias must point at a registered, non-deprecated anthropic model (got {model})"
3372        );
3373
3374        let (model, provider) = resolve_tier_model("small", None)
3375            .expect("small alias must resolve from the embedded catalog");
3376        assert!(
3377            [
3378                "openrouter",
3379                "huggingface",
3380                "local",
3381                "llamacpp",
3382                "mlx",
3383                "ollama"
3384            ]
3385            .contains(&provider.as_str()),
3386            "small tier should resolve to an open-weight provider (got {provider} / {model})"
3387        );
3388
3389        let (model, provider) = resolve_tier_model("mid", None)
3390            .expect("mid alias must resolve from the embedded catalog");
3391        assert_eq!(provider, "openrouter");
3392        assert_eq!(model, "qwen/qwen3.6-flash");
3393    }
3394
3395    #[test]
3396    fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
3397        // tier/<provider> takes precedence over generic tier when the
3398        // caller scopes by provider. Don't pin the specific model — the
3399        // catalog evolves.
3400        let (model, provider) = resolve_tier_model("mid", Some("openai"))
3401            .expect("mid tier scoped to openai must resolve");
3402        assert_eq!(provider, "openai");
3403        assert_eq!(model, "gpt-5.4-mini");
3404        assert!(
3405            model_catalog_entry(&model).is_some(),
3406            "mid/openai alias must point at a registered model (got {model})"
3407        );
3408    }
3409
3410    #[test]
3411    fn test_provider_config_anthropic() {
3412        let pdef = provider_config("anthropic").unwrap();
3413        assert_eq!(pdef.auth_style, "header");
3414        assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
3415    }
3416
3417    #[test]
3418    fn test_provider_config_mlx() {
3419        let pdef = provider_config("mlx").unwrap();
3420        assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
3421        assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
3422        assert_eq!(
3423            pdef.healthcheck.unwrap().path.as_deref(),
3424            Some("/v1/models")
3425        );
3426
3427        let (model, provider) = resolve_model("mlx-qwen36-27b");
3428        assert_eq!(model, "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit");
3429        assert_eq!(provider.as_deref(), Some("mlx"));
3430    }
3431
3432    #[test]
3433    fn test_enterprise_provider_defaults_and_inference() {
3434        let bedrock = provider_config("bedrock").unwrap();
3435        assert_eq!(bedrock.auth_style, "aws_sigv4");
3436        assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
3437        assert_eq!(
3438            infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
3439            "bedrock"
3440        );
3441        assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
3442
3443        let azure = provider_config("azure_openai").unwrap();
3444        assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
3445        assert_eq!(
3446            auth_env_names(&azure.auth_env),
3447            vec![
3448                "AZURE_OPENAI_API_KEY".to_string(),
3449                "AZURE_OPENAI_AD_TOKEN".to_string(),
3450                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
3451            ]
3452        );
3453
3454        let vertex = provider_config("vertex").unwrap();
3455        assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
3456        assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
3457    }
3458
3459    #[test]
3460    fn test_default_provider_env_override_for_unknown_model() {
3461        let _guard = crate::llm::env_guard();
3462        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3463        unsafe {
3464            std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
3465        }
3466
3467        let inference = infer_provider_detail("unknown-model");
3468
3469        unsafe {
3470            match prev_default_provider {
3471                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3472                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3473            }
3474        }
3475
3476        assert_eq!(inference.provider, "openai");
3477        assert_eq!(
3478            inference.source,
3479            crate::llm::provider::ProviderInferenceSource::DefaultFallback
3480        );
3481    }
3482
3483    #[test]
3484    fn test_unknown_model_family_ignores_default_provider_fallback() {
3485        let _guard = crate::llm::env_guard();
3486        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3487        unsafe {
3488            std::env::set_var("HARN_DEFAULT_PROVIDER", "ollama");
3489        }
3490
3491        let unknown = resolve_model_info("mystery-model-xyz");
3492        let known_family = resolve_model_info("deepseek-mystery-model");
3493
3494        unsafe {
3495            match prev_default_provider {
3496                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3497                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3498            }
3499        }
3500
3501        assert_eq!(unknown.provider, "ollama");
3502        assert_eq!(unknown.family, "unknown");
3503        assert_eq!(unknown.lineage, "unknown");
3504        assert_eq!(known_family.family, "deepseek");
3505        assert_eq!(known_family.lineage, "deepseek");
3506    }
3507
3508    #[test]
3509    fn test_resolve_base_url_no_env() {
3510        let pdef = ProviderDef {
3511            base_url: "https://example.com".to_string(),
3512            ..Default::default()
3513        };
3514        assert_eq!(resolve_base_url(&pdef), "https://example.com");
3515    }
3516
3517    #[test]
3518    fn test_default_config_roundtrip() {
3519        let config = default_config();
3520        assert!(!config.providers.is_empty());
3521        assert!(!config.inference_rules.is_empty());
3522        // Tier is now declared on each model row; tier_rules is allowed
3523        // to be empty (the rule table is a legacy fallback only).
3524        assert_eq!(config.tier_defaults.default, "mid");
3525        // At least the new open-weight frontiers should have explicit tiers.
3526        let frontiers = config
3527            .models
3528            .iter()
3529            .filter(|(_, m)| m.tier.as_deref() == Some("frontier"))
3530            .count();
3531        assert!(
3532            frontiers >= 4,
3533            "expected at least 4 frontier-tagged models, got {frontiers}"
3534        );
3535    }
3536
3537    #[test]
3538    fn test_local_ollama_catalog_metadata() {
3539        reset_overrides();
3540
3541        let devstral =
3542            model_catalog_entry("devstral-small-2:24b").expect("devstral-small-2 catalog entry");
3543        assert_eq!(devstral.context_window, 262_144);
3544        assert!(!devstral.capabilities.iter().any(|cap| cap == "vision"));
3545
3546        let gemma4 = model_catalog_entry("gemma4:26b").expect("gemma4 catalog entry");
3547        assert_eq!(gemma4.context_window, 262_144);
3548        assert!(gemma4.capabilities.iter().any(|cap| cap == "vision"));
3549    }
3550
3551    #[test]
3552    fn local_gemma4_source_tags_match_structured_capability_tags() {
3553        reset_overrides();
3554        let config = default_config();
3555        for id in [
3556            "gemma-4-e2b-it",
3557            "gemma-4-e4b-it",
3558            "gemma-4-12b-it",
3559            "gemma-4-26b-a4b-it",
3560            "gemma-4-31b-it",
3561        ] {
3562            let source = config
3563                .models
3564                .get(id)
3565                .unwrap_or_else(|| panic!("{id} should be in the embedded catalog"));
3566            let derived = effective_model_capability_tags(&source.provider, id);
3567            assert_eq!(
3568                source.capabilities, derived,
3569                "{}/{} source capabilities must match derived capability_tags",
3570                source.provider, id
3571            );
3572        }
3573    }
3574
3575    #[test]
3576    fn capability_tags_include_structured_capability_flags() {
3577        let caps = crate::llm::capabilities::Capabilities {
3578            native_tools: true,
3579            tool_search: vec!["web".to_string()],
3580            vision_supported: true,
3581            audio: true,
3582            pdf: true,
3583            video: true,
3584            files_api_supported: true,
3585            prompt_caching: true,
3586            thinking_modes: vec!["enabled".to_string()],
3587            structured_output: Some("native".to_string()),
3588            ..Default::default()
3589        };
3590
3591        assert_eq!(
3592            capability_tags_from_capabilities(&caps),
3593            vec![
3594                "streaming",
3595                "tools",
3596                "tool_search",
3597                "vision",
3598                "audio",
3599                "pdf",
3600                "video",
3601                "files",
3602                "prompt_caching",
3603                "thinking",
3604                "structured_output",
3605            ]
3606        );
3607    }
3608
3609    #[test]
3610    fn test_external_config_overlays_default_catalog() {
3611        let mut config = default_config();
3612        let mut overlay = ProvidersConfig {
3613            default_provider: Some("ollama".to_string()),
3614            ..Default::default()
3615        };
3616        overlay.providers.insert(
3617            "custom".to_string(),
3618            ProviderDef {
3619                base_url: "https://llm.example.test/v1".to_string(),
3620                chat_endpoint: "/chat/completions".to_string(),
3621                ..Default::default()
3622            },
3623        );
3624
3625        config.merge_from(&overlay);
3626
3627        assert_eq!(config.default_provider.as_deref(), Some("ollama"));
3628        assert!(config.providers.contains_key("custom"));
3629        assert!(config.providers.contains_key("anthropic"));
3630        assert!(config.providers.contains_key("ollama"));
3631    }
3632
3633    #[test]
3634    fn test_model_params_empty() {
3635        let params = model_params("claude-sonnet-4-20250514");
3636        assert!(params.is_empty());
3637    }
3638
3639    #[test]
3640    fn test_user_overrides_add_provider_and_alias() {
3641        reset_overrides();
3642        let mut overlay = ProvidersConfig::default();
3643        overlay.providers.insert(
3644            "acme".to_string(),
3645            ProviderDef {
3646                base_url: "https://llm.acme.test/v1".to_string(),
3647                chat_endpoint: "/chat/completions".to_string(),
3648                ..Default::default()
3649            },
3650        );
3651        overlay.aliases.insert(
3652            "acme-fast".to_string(),
3653            AliasDef {
3654                id: "acme/model-fast".to_string(),
3655                provider: "acme".to_string(),
3656                tool_format: Some("native".to_string()),
3657            },
3658        );
3659        set_user_overrides(Some(overlay));
3660
3661        let (model, provider) = resolve_model("acme-fast");
3662        assert_eq!(model, "acme/model-fast");
3663        assert_eq!(provider.as_deref(), Some("acme"));
3664        assert!(provider_names().contains(&"acme".to_string()));
3665        assert_eq!(
3666            provider_config("acme").map(|provider| provider.base_url),
3667            Some("https://llm.acme.test/v1".to_string())
3668        );
3669
3670        reset_overrides();
3671    }
3672
3673    #[test]
3674    fn test_default_tool_format_uses_capability_matrix() {
3675        reset_overrides();
3676
3677        assert_eq!(
3678            default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
3679            "native"
3680        );
3681        // devstral dropped its stale heredoc `text` pin (it has no reserved-token
3682        // constraint, so there was no structural reason to stay on heredoc) and
3683        // now inherits the global `json` text-channel default. Heredoc is still
3684        // reachable via an explicit `preferred_tool_format = "text"` pin.
3685        assert_eq!(
3686            default_tool_format("devstral-small-2:24b", "ollama"),
3687            "json"
3688        );
3689        // vLLM/SGLang-served Gemma 4 exposes OpenAI-compatible function calling,
3690        // so the local route declares native tools (matching every hosted gemma-4
3691        // sibling) rather than degrading to a text tool format.
3692        assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "native");
3693        // deepseek-v3.2 and qwen3-coder both pin `text` in the capability
3694        // matrix, so they keep heredoc rather than inheriting the json default.
3695        assert_eq!(
3696            default_tool_format("deepseek/deepseek-v3.2", "openrouter"),
3697            "text"
3698        );
3699        assert_eq!(
3700            default_tool_format("qwen/qwen3-coder-flash", "openrouter"),
3701            "text"
3702        );
3703        assert_eq!(
3704            default_tool_format("qwen/qwen3.6-flash", "openrouter"),
3705            "native"
3706        );
3707        assert_eq!(default_tool_format("z-ai/glm-5.2", "openrouter"), "text");
3708        // GPT-OSS tool defaults are provider-specific: aggregate OpenRouter and
3709        // Fireworks use Harn's heredoc text tools, as does DeepInfra — its
3710        // native Harmony channel drops tool calls into the private reasoning
3711        // channel (footgun), so it is pinned to text. Native-reliable hosts
3712        // (Cerebras, Groq) stay on provider-native tool calls.
3713        assert_eq!(
3714            default_tool_format("openai/gpt-oss-120b", "openrouter"),
3715            "text"
3716        );
3717        assert_eq!(
3718            default_tool_format("accounts/fireworks/models/gpt-oss-120b", "fireworks"),
3719            "text"
3720        );
3721        assert_eq!(default_tool_format("gpt-oss-120b", "cerebras"), "native");
3722        assert_eq!(
3723            default_tool_format("openai/gpt-oss-120b", "deepinfra"),
3724            "text"
3725        );
3726        assert_eq!(default_tool_format("openai/gpt-oss-120b", "groq"), "native");
3727    }
3728
3729    #[test]
3730    fn test_default_tool_format_unpinned_text_channel_is_json() {
3731        reset_overrides();
3732
3733        // GLOBAL DEFAULT FLIP: a model with no capability-matrix pin and no
3734        // native tool support resolves to fenced-json (`json`), not heredoc
3735        // (`text`). This is the behavior change — an unknown text-channel model
3736        // gets the delimiter-safe default. (Native-capable unknowns still get
3737        // `native`; pinned models still honor their pin, covered above.)
3738        assert_eq!(default_tool_format("mystery-model-xyz", "ollama"), "json");
3739    }
3740
3741    #[test]
3742    fn test_claude_family_defaults_native_without_host_pin() {
3743        reset_overrides();
3744
3745        // Unpinned claude-family routes on first-class tool-calling providers
3746        // resolve `native` from the capability matrix alone — no host alias
3747        // pin required. The openrouter rows exercise the family-level
3748        // catch-all: a dated slug, an unparseable version segment, and a new
3749        // family name have no versioned rule and previously fell through to
3750        // the global text-channel `json` default.
3751        for (model, provider) in [
3752            ("claude-sonnet-4-6", "anthropic"),
3753            ("claude-sonnet-5", "anthropic"),
3754            ("anthropic/claude-nova-1", "anthropic"),
3755            ("anthropic/claude-sonnet-4.6", "openrouter"),
3756            ("anthropic/claude-sonnet-5", "openrouter"),
3757            ("anthropic/claude-opus-4-5-20251101", "openrouter"),
3758            ("anthropic/claude-sonnet-next", "openrouter"),
3759            ("anthropic/claude-nova-1", "openrouter"),
3760            ("anthropic.claude-sonnet-4-6", "bedrock"),
3761        ] {
3762            assert_eq!(
3763                default_tool_format(model, provider),
3764                "native",
3765                "{provider}:{model} must default native without a host pin"
3766            );
3767        }
3768
3769        // An unpinned host alias resolves native end-to-end through
3770        // `resolve_model_info` (alias -> id -> capability matrix -> dialect
3771        // guard) — the exact seam hosts consume via `llm_resolve_model`.
3772        let overlay = parse_config_toml(
3773            "[aliases.probe-sonnet]\nid = \"claude-sonnet-4-6\"\nprovider = \"anthropic\"\n",
3774        )
3775        .expect("overlay parses");
3776        set_user_overrides(Some(overlay));
3777        let resolved = resolve_model_info("probe-sonnet");
3778        assert_eq!(resolved.provider, "anthropic");
3779        assert_eq!(
3780            resolved.tool_format, "native",
3781            "an unpinned claude alias must inherit the family-level native default"
3782        );
3783        clear_user_overrides();
3784
3785        // An explicit host pin still wins over the family default: a
3786        // text-channel `json` pin on a native-capable claude route survives
3787        // resolution (the dialect guard only corrects known-broken combos).
3788        let overlay = parse_config_toml(
3789            "[aliases.probe-sonnet-json]\nid = \"claude-sonnet-4-6\"\nprovider = \"anthropic\"\ntool_format = \"json\"\n",
3790        )
3791        .expect("overlay parses");
3792        set_user_overrides(Some(overlay));
3793        let pinned = resolve_model_info("probe-sonnet-json");
3794        assert_eq!(
3795            pinned.tool_format, "json",
3796            "an explicit host pin must win over the claude family default"
3797        );
3798        clear_user_overrides();
3799
3800        // Non-claude models keep the global text-channel `json` default —
3801        // the catch-all is family-scoped, not a provider-wide flip.
3802        assert_eq!(
3803            default_tool_format("mystery-model-xyz", "openrouter"),
3804            "json"
3805        );
3806    }
3807
3808    #[test]
3809    fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
3810        reset_overrides();
3811        let mut overlay = ProvidersConfig::default();
3812        overlay.models.insert(
3813            "acme/model-fast".to_string(),
3814            ModelDef {
3815                name: "Acme Fast".to_string(),
3816                provider: "acme".to_string(),
3817                context_window: 65_536,
3818                logical_model: None,
3819                equivalence_group: None,
3820                served_variant: None,
3821                wire_model: None,
3822                api_dialect: None,
3823                rate_limits: None,
3824                performance: None,
3825                architecture: None,
3826                local_memory: None,
3827                runtime_context_window: None,
3828                stream_timeout: Some(42.0),
3829                capabilities: vec!["tools".to_string(), "streaming".to_string()],
3830                pricing: Some(ModelPricing {
3831                    input_per_mtok: 1.25,
3832                    output_per_mtok: 2.5,
3833                    cache_read_per_mtok: Some(0.25),
3834                    cache_write_per_mtok: None,
3835                }),
3836                deprecated: false,
3837                deprecation_note: None,
3838                superseded_by: None,
3839                fast_mode: None,
3840                quality_tags: Vec::new(),
3841                availability: ModelAvailability::default(),
3842                tier: None,
3843                open_weight: None,
3844                strengths: Vec::new(),
3845                benchmarks: std::collections::BTreeMap::new(),
3846                family: None,
3847                lineage: None,
3848                complementary_with: Vec::new(),
3849                avoid_as_reviewer_for: Vec::new(),
3850            },
3851        );
3852        overlay
3853            .qc_defaults
3854            .insert("acme".to_string(), "acme/model-cheap".to_string());
3855        set_user_overrides(Some(overlay));
3856
3857        let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
3858        assert_eq!(entry.context_window, 65_536);
3859        assert_eq!(
3860            entry.capabilities,
3861            vec!["streaming".to_string(), "tools".to_string()]
3862        );
3863        assert_eq!(
3864            entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
3865            Some(1.25)
3866        );
3867        assert_eq!(
3868            pricing_per_1k_for("acme", "acme/model-fast"),
3869            Some((0.00125, 0.0025))
3870        );
3871        assert_eq!(
3872            qc_default_model("acme").as_deref(),
3873            Some("acme/model-cheap")
3874        );
3875
3876        reset_overrides();
3877    }
3878
3879    #[test]
3880    fn test_user_overrides_prepend_inference_rules() {
3881        reset_overrides();
3882        let mut overlay = ProvidersConfig::default();
3883        overlay.inference_rules.push(InferenceRule {
3884            pattern: Some("internal-*".to_string()),
3885            contains: None,
3886            exact: None,
3887            provider: "openai".to_string(),
3888        });
3889        set_user_overrides(Some(overlay));
3890
3891        assert_eq!(infer_provider("internal-foo"), "openai");
3892
3893        reset_overrides();
3894    }
3895
3896    // ── Embedded providers.toml invariants ───────────────────────────────────
3897    // These tests pin properties of the *system* — TOML parses, every
3898    // alias resolves, every deprecated model has a note — without
3899    // pinning specific catalog values. They survive future catalog
3900    // churn and surface real schema breakage.
3901
3902    #[test]
3903    fn embedded_providers_toml_parses_and_is_not_trivially_empty() {
3904        let config = default_config();
3905        assert!(
3906            config.providers.len() >= 10,
3907            "expected >=10 providers in embedded catalog, got {}",
3908            config.providers.len()
3909        );
3910        assert!(
3911            config.models.len() >= 20,
3912            "expected >=20 models in embedded catalog, got {}",
3913            config.models.len()
3914        );
3915        assert!(
3916            config.aliases.len() >= 15,
3917            "expected >=15 aliases in embedded catalog, got {}",
3918            config.aliases.len()
3919        );
3920        assert_eq!(config.default_provider.as_deref(), Some("anthropic"));
3921    }
3922
3923    #[test]
3924    fn embedded_catalog_every_deprecated_model_has_a_note() {
3925        let config = default_config();
3926        let offenders: Vec<&str> = config
3927            .models
3928            .iter()
3929            .filter(|(_, model)| {
3930                model.deprecated
3931                    && model
3932                        .deprecation_note
3933                        .as_deref()
3934                        .unwrap_or("")
3935                        .trim()
3936                        .is_empty()
3937            })
3938            .map(|(id, _)| id.as_str())
3939            .collect();
3940        assert!(
3941            offenders.is_empty(),
3942            "deprecated models missing a deprecation_note: {offenders:?}"
3943        );
3944    }
3945
3946    #[test]
3947    fn embedded_cerebras_catalog_separates_public_and_dedicated_routes() {
3948        let config = default_config();
3949        for id in ["gpt-oss-120b", "zai-glm-4.7"] {
3950            let model = config.models.get(id).expect("current public Cerebras row");
3951            assert_eq!(model.provider, "cerebras");
3952            assert_eq!(model.availability, ModelAvailability::Serverless);
3953            assert!(!model.deprecated);
3954        }
3955
3956        let llama = config
3957            .models
3958            .get("llama-3.3-70b")
3959            .expect("legacy Cerebras row");
3960        assert_eq!(llama.provider, "cerebras");
3961        assert_eq!(llama.availability, ModelAvailability::Dedicated);
3962        assert!(llama.deprecated);
3963    }
3964
3965    #[test]
3966    fn embedded_openrouter_gpt_oss_120b_has_no_fragment_bleed() {
3967        // Regression for the provider-catalog leading-key bleed: the openrouter
3968        // `openai/gpt-oss-120b` row was the last model in its fragment with no
3969        // inline tier/open_weight/strengths, so the next fragment's leading bare
3970        // keys reattached to it after raw-text concatenation — mislabeling it as
3971        // `open_weight = false` with a spurious `vision` strength. It must now be
3972        // self-described: open weight, no vision, and a tier consistent with the
3973        // rest of its equivalence group.
3974        let config = default_config();
3975        let model = config
3976            .models
3977            .get("openai/gpt-oss-120b")
3978            .expect("openrouter gpt-oss-120b row");
3979        assert_eq!(model.provider, "openrouter");
3980        assert_eq!(
3981            model.open_weight,
3982            Some(true),
3983            "gpt-oss-120b is Apache-2.0 open weight, not the bled-in open_weight=false"
3984        );
3985        assert!(
3986            !model.strengths.iter().any(|s| s == "vision"),
3987            "gpt-oss-120b is text-only; the bled-in `vision` strength must be gone: {:?}",
3988            model.strengths
3989        );
3990        assert!(
3991            !model.strengths.is_empty(),
3992            "gpt-oss-120b must carry its own strengths, not None"
3993        );
3994
3995        // tier is a property of the logical model: every active row in the
3996        // openai-gpt-oss-120b equivalence group must agree.
3997        let group_tiers: std::collections::BTreeSet<_> = config
3998            .models
3999            .values()
4000            .filter(|m| {
4001                m.equivalence_group.as_deref() == Some("openai-gpt-oss-120b") && !m.deprecated
4002            })
4003            .map(|m| m.tier.clone())
4004            .collect();
4005        assert_eq!(
4006            group_tiers.len(),
4007            1,
4008            "openai-gpt-oss-120b group must share one tier, got {group_tiers:?}"
4009        );
4010    }
4011
4012    #[test]
4013    fn embedded_catalog_every_model_targets_a_registered_provider() {
4014        let config = default_config();
4015        let known: std::collections::BTreeSet<&str> =
4016            config.providers.keys().map(String::as_str).collect();
4017        let orphans: Vec<(&str, &str)> = config
4018            .models
4019            .iter()
4020            .filter(|(_, model)| !known.contains(model.provider.as_str()))
4021            .map(|(id, model)| (id.as_str(), model.provider.as_str()))
4022            .collect();
4023        assert!(
4024            orphans.is_empty(),
4025            "models reference unknown providers: {orphans:?}"
4026        );
4027    }
4028
4029    #[test]
4030    fn embedded_catalog_every_alias_targets_a_registered_provider() {
4031        let config = default_config();
4032        let known: std::collections::BTreeSet<&str> =
4033            config.providers.keys().map(String::as_str).collect();
4034        let orphans: Vec<(&str, &str)> = config
4035            .aliases
4036            .iter()
4037            .filter(|(_, alias)| !known.contains(alias.provider.as_str()))
4038            .map(|(name, alias)| (name.as_str(), alias.provider.as_str()))
4039            .collect();
4040        assert!(
4041            orphans.is_empty(),
4042            "aliases reference unknown providers: {orphans:?}"
4043        );
4044    }
4045
4046    #[test]
4047    fn embedded_catalog_every_qc_default_targets_a_known_model() {
4048        let config = default_config();
4049        let orphans: Vec<(&str, &str)> = config
4050            .qc_defaults
4051            .iter()
4052            .filter(|(_, model_id)| !config.models.contains_key(model_id.as_str()))
4053            .map(|(provider, model_id)| (provider.as_str(), model_id.as_str()))
4054            .collect();
4055        assert!(
4056            orphans.is_empty(),
4057            "qc_defaults reference unknown models: {orphans:?}"
4058        );
4059    }
4060
4061    #[test]
4062    fn embedded_catalog_pricing_rates_are_non_negative() {
4063        let config = default_config();
4064        for (id, model) in &config.models {
4065            let Some(pricing) = &model.pricing else {
4066                continue;
4067            };
4068            assert!(
4069                pricing.input_per_mtok >= 0.0 && pricing.output_per_mtok >= 0.0,
4070                "{id}: negative pricing — in={} out={}",
4071                pricing.input_per_mtok,
4072                pricing.output_per_mtok
4073            );
4074            if let Some(rate) = pricing.cache_read_per_mtok {
4075                assert!(rate >= 0.0, "{id}: negative cache_read rate {rate}");
4076            }
4077            if let Some(rate) = pricing.cache_write_per_mtok {
4078                assert!(rate >= 0.0, "{id}: negative cache_write rate {rate}");
4079            }
4080        }
4081    }
4082
4083    #[test]
4084    fn model_availability_parses_known_strings() {
4085        assert_eq!(
4086            ModelAvailability::parse("serverless"),
4087            Some(ModelAvailability::Serverless)
4088        );
4089        assert_eq!(
4090            ModelAvailability::parse("dedicated"),
4091            Some(ModelAvailability::Dedicated)
4092        );
4093        assert_eq!(
4094            ModelAvailability::parse("unknown"),
4095            Some(ModelAvailability::Unknown)
4096        );
4097        assert_eq!(ModelAvailability::parse("provisioned"), None);
4098        for value in [
4099            ModelAvailability::Serverless,
4100            ModelAvailability::Dedicated,
4101            ModelAvailability::Unknown,
4102        ] {
4103            assert_eq!(ModelAvailability::parse(value.as_str()), Some(value));
4104        }
4105    }
4106
4107    #[test]
4108    fn embedded_catalog_marks_together_dedicated_route_as_dedicated() {
4109        let config = default_config();
4110        let model = config
4111            .models
4112            .get("Qwen/Qwen3-Coder-Next-FP8")
4113            .expect("Together Qwen3 Coder Next FP8 is cataloged");
4114        assert_eq!(model.provider, "together");
4115        assert_eq!(model.availability, ModelAvailability::Dedicated);
4116    }
4117
4118    #[test]
4119    fn embedded_catalog_dedicated_models_are_not_targeted_by_tier_aliases() {
4120        // A dedicated-only model behind a tier alias would silently fail
4121        // every serverless caller; the catalog must keep those routes
4122        // separated.
4123        let config = default_config();
4124        let dedicated: std::collections::BTreeSet<(&str, &str)> = config
4125            .models
4126            .iter()
4127            .filter(|(_, model)| model.availability == ModelAvailability::Dedicated)
4128            .map(|(id, model)| (model.provider.as_str(), id.as_str()))
4129            .collect();
4130        for (name, alias) in &config.aliases {
4131            if matches!(
4132                name.as_str(),
4133                "frontier"
4134                    | "mid"
4135                    | "small"
4136                    | "tier/frontier"
4137                    | "tier/mid"
4138                    | "tier/small"
4139                    | "sonnet"
4140                    | "opus"
4141                    | "haiku"
4142            ) {
4143                assert!(
4144                    !dedicated.contains(&(alias.provider.as_str(), alias.id.as_str())),
4145                    "tier alias `{name}` targets dedicated-only route `{}/{}`",
4146                    alias.provider,
4147                    alias.id,
4148                );
4149            }
4150        }
4151    }
4152
4153    #[test]
4154    fn embedded_catalog_tier_aliases_resolve_to_active_models() {
4155        // The three canonical tier aliases (frontier / mid / small) MUST
4156        // resolve to non-deprecated catalog entries; a default that
4157        // routes the loop into a sunsetted model is a release blocker.
4158        for alias in ["frontier", "mid", "small"] {
4159            let (model, _provider) = resolve_tier_model(alias, None)
4160                .unwrap_or_else(|| panic!("tier alias `{alias}` must resolve"));
4161            let entry = model_catalog_entry(&model).unwrap_or_else(|| {
4162                panic!("tier alias `{alias}` -> `{model}` must be a registered catalog entry")
4163            });
4164            assert!(
4165                !entry.deprecated,
4166                "tier alias `{alias}` resolves to deprecated model `{model}` ({:?})",
4167                entry.deprecation_note
4168            );
4169        }
4170    }
4171
4172    #[test]
4173    fn opus_alias_tracks_claude_opus_4_8_with_fast_mode() {
4174        // The `opus` alias must follow the newest Opus release, and that
4175        // release advertises its (off-by-default) fast-mode tier.
4176        let (model, provider) = resolve_model("opus");
4177        assert_eq!(model, "claude-opus-4-8");
4178        assert_eq!(provider.as_deref(), Some("anthropic"));
4179
4180        let opus48 = model_catalog_entry("claude-opus-4-8").expect("opus 4.8 catalog entry");
4181        assert!(!opus48.deprecated, "newest Opus must not be deprecated");
4182        let fast = opus48.fast_mode.expect("opus 4.8 advertises fast mode");
4183        assert_eq!(fast.param, "speed");
4184        assert_eq!(fast.value, "fast");
4185        assert_eq!(fast.status.as_deref(), Some("research_preview"));
4186        let fast_pricing = fast.pricing.expect("fast mode carries premium pricing");
4187        let standard = opus48.pricing.expect("opus 4.8 standard pricing");
4188        assert!(
4189            fast_pricing.input_per_mtok > standard.input_per_mtok,
4190            "fast mode must be premium-priced relative to standard"
4191        );
4192    }
4193
4194    #[test]
4195    fn superseded_opus_models_point_at_claude_opus_4_8() {
4196        // Earlier Opus rows are deprecated and carry a structured
4197        // `superseded_by` pointer to the current flagship.
4198        for model in ["claude-opus-4-7", "claude-opus-4-6"] {
4199            let entry =
4200                model_catalog_entry(model).unwrap_or_else(|| panic!("{model} catalog entry"));
4201            assert!(entry.deprecated, "{model} should be deprecated");
4202            assert_eq!(
4203                entry.superseded_by.as_deref(),
4204                Some("claude-opus-4-8"),
4205                "{model} should be superseded by claude-opus-4-8"
4206            );
4207        }
4208    }
4209
4210    #[test]
4211    fn opus_46_no_longer_advertises_fast_mode() {
4212        let opus46 = model_catalog_entry("claude-opus-4-6").expect("opus 4.6 catalog entry");
4213        assert!(
4214            opus46.fast_mode.is_none(),
4215            "Anthropic removed Opus 4.6 fast mode on 2026-06-29; Harn should not advertise it"
4216        );
4217
4218        let opus47 = model_catalog_entry("claude-opus-4-7").expect("opus 4.7 catalog entry");
4219        assert!(
4220            opus47.fast_mode.is_some(),
4221            "Opus 4.7 still advertises its own fast-mode tier"
4222        );
4223    }
4224
4225    #[test]
4226    fn gpt_5_5_fast_mode_rides_service_tier() {
4227        // Fast mode is provider-agnostic: OpenAI exposes it through the
4228        // `service_tier` knob rather than Anthropic's `speed`.
4229        let entry = model_catalog_entry("gpt-5.5").expect("gpt-5.5 catalog entry");
4230        let fast = entry.fast_mode.expect("gpt-5.5 advertises a fast tier");
4231        assert_eq!(fast.param, "service_tier");
4232        assert_eq!(fast.status.as_deref(), Some("ga"));
4233    }
4234}
harn_vm/llm_config.rs

harn_vm/
llm_config.rs