harn_vm/
llm_config.rs

1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::{BTreeMap, BTreeSet};
4use std::sync::atomic::{AtomicBool, Ordering};
5use std::sync::{OnceLock, RwLock};
6
7static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
8static CONFIG_PATH: OnceLock<String> = OnceLock::new();
9static RUNTIME_CATALOG_OVERLAY: OnceLock<RwLock<Option<ProvidersConfig>>> = OnceLock::new();
10
11thread_local! {
12    /// Thread-local provider config overlays installed by the CLI after it
13    /// reads the nearest `harn.toml` plus any installed package manifests.
14    /// Kept thread-local so tests and multi-VM hosts can scope extensions to
15    /// the current run without mutating the process-wide default config.
16    static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
17}
18
19#[derive(Debug, Clone, Deserialize, Default)]
20pub struct ProvidersConfig {
21    #[serde(default)]
22    pub default_provider: Option<String>,
23    #[serde(default)]
24    pub providers: BTreeMap<String, ProviderDef>,
25    #[serde(default)]
26    pub aliases: BTreeMap<String, AliasDef>,
27    #[serde(default)]
28    pub alias_tool_calling: BTreeMap<String, AliasToolCallingDef>,
29    #[serde(default)]
30    pub models: BTreeMap<String, ModelDef>,
31    #[serde(default)]
32    pub qc_defaults: BTreeMap<String, String>,
33    #[serde(default)]
34    pub inference_rules: Vec<InferenceRule>,
35    #[serde(default)]
36    pub tier_rules: Vec<TierRule>,
37    #[serde(default)]
38    pub tier_defaults: TierDefaults,
39    #[serde(default)]
40    pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
41    #[serde(default)]
42    pub model_roles: BTreeMap<String, BTreeMap<String, toml::Value>>,
43    #[serde(default)]
44    pub suppress: SuppressDef,
45    #[serde(default)]
46    pub patch: PatchDef,
47}
48
49/// Field-wise catalog patches applied on top of merged model rows.
50///
51/// Overlays have three complementary tools for adjusting the baseline
52/// catalog, from coarsest to finest:
53///
54/// 1. **Whole-row replace** — `[models.<id>]` replaces the entire model row.
55///    Use it to add a new route or when the overlay intentionally owns every
56///    field of the row.
57/// 2. **Field patch** — `[patch.models.<id>]` merges individual fields into
58///    the existing row, leaving every unmentioned field at its baseline
59///    value. Use it to tweak one knob (a `stream_timeout`, one pricing rate)
60///    without copying the row verbatim and silently freezing the rest of its
61///    fields against upstream catalog updates.
62/// 3. **Route suppression** — `[suppress]` hides baseline routes from the
63///    exported/served artifact entirely (see [`SuppressDef`]).
64///
65/// Patch semantics:
66/// - Nested tables merge recursively; scalars **and arrays** replace the
67///   base value wholesale (there is deliberately no per-element array merge).
68/// - Within a single overlay, `[models.<id>]` whole-row replacement applies
69///   BEFORE `[patch.models.<id>]`, so patch fields win over the same
70///   overlay's whole-row fields.
71/// - Patches are STICKY across layers: once accumulated, a patch re-applies
72///   after every later layer's merge, including a later layer's whole-row
73///   replacement of the same id. A patch means "always tweak this field",
74///   not "tweak it once".
75/// - A patch whose target row does not exist yet stays in the accumulator
76///   silently and applies as soon as a later layer contributes the row;
77///   [`ProvidersConfig::dangling_model_patches`] reports the leftovers for
78///   doctor/export validation.
79/// - A patch that produces a type-invalid row warns once per process and
80///   keeps the unpatched row.
81#[derive(Debug, Clone, Deserialize, Default, PartialEq)]
82pub struct PatchDef {
83    /// `[patch.models.<id>]` tables: partial `ModelDef` field sets merged
84    /// field-wise into the model row with the same catalog id.
85    #[serde(default)]
86    pub models: BTreeMap<String, toml::Value>,
87}
88
89/// Routes hidden from the exported/served provider catalog artifact.
90///
91/// Lets an overlay drop baseline routes that are broken or unusable for the
92/// embedding product (e.g. a dedicated-only serving route, or a local image
93/// with a broken server-side tool parser) without forking the baseline
94/// catalog. Suppression is artifact-level presentation: it removes the model
95/// row, its aliases, and any recommendation variant derived from it, but does
96/// not block runtime resolution of an explicitly requested model id.
97///
98/// This is one of three overlay tools (see [`PatchDef`] for the full set):
99/// whole-row `[models.<id>]` replacement, field-wise `[patch.models.<id>]`
100/// patches, and `[suppress]` route suppression. Combined with whole-row
101/// `models` replacement, suppression also expresses route renames: define
102/// the row under the new id and suppress the old one.
103#[derive(Debug, Clone, Deserialize, Default, PartialEq, Eq)]
104pub struct SuppressDef {
105    /// `"provider:model_id"` selectors. Split on the FIRST colon only —
106    /// model ids may themselves contain colons (e.g. Ollama image tags such
107    /// as `ollama:qwen3.6:35b-a3b-coding-nvfp4`). Entries without a colon
108    /// match nothing.
109    #[serde(default)]
110    pub routes: Vec<String>,
111}
112
113impl ProvidersConfig {
114    pub fn is_empty(&self) -> bool {
115        self.default_provider.is_none()
116            && self.providers.is_empty()
117            && self.aliases.is_empty()
118            && self.alias_tool_calling.is_empty()
119            && self.models.is_empty()
120            && self.qc_defaults.is_empty()
121            && self.inference_rules.is_empty()
122            && self.tier_rules.is_empty()
123            && self.model_defaults.is_empty()
124            && self.model_roles.is_empty()
125            && self.suppress.routes.is_empty()
126            && self.patch.models.is_empty()
127            && self.tier_defaults.default == default_mid()
128    }
129
130    /// `[patch.models]` ids with no matching model row in the merged config.
131    ///
132    /// Dangling patches are not an error at merge time — the row may arrive
133    /// from a later layer — but doctor/export surfaces can report leftovers
134    /// so a typo'd id doesn't silently patch nothing.
135    pub fn dangling_model_patches(&self) -> Vec<&str> {
136        self.patch
137            .models
138            .keys()
139            .filter(|id| !self.models.contains_key(*id))
140            .map(String::as_str)
141            .collect()
142    }
143
144    pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
145        for (name, provider) in &overlay.providers {
146            match self.providers.get_mut(name) {
147                Some(existing) => existing.merge_from(provider),
148                None => {
149                    self.providers.insert(name.clone(), provider.clone());
150                }
151            }
152        }
153        self.aliases.extend(overlay.aliases.clone());
154        self.alias_tool_calling
155            .extend(overlay.alias_tool_calling.clone());
156        self.models.extend(overlay.models.clone());
157        self.qc_defaults.extend(overlay.qc_defaults.clone());
158
159        // `[patch.models]` field-wise patches. Two deliberate ordering rules
160        // (see [`PatchDef`]):
161        //   1. Within one overlay, the whole-row `models` replacement above
162        //      lands first, then patches — so `[patch.models.X]` fields win
163        //      over the same overlay's `[models.X]` row.
164        //   2. Patches are sticky: the accumulator re-applies after EVERY
165        //      layer's merge, so a later layer's whole-row replacement still
166        //      gets earlier layers' field tweaks re-applied on top. A patch
167        //      means "always tweak this field", not "tweak it once".
168        // Per-id patches from later layers deep-merge into the accumulator
169        // (later layer wins per field), so two layers patching different
170        // fields of the same row both stay sticky.
171        // Short-circuit when no layer has contributed a patch so existing
172        // patch-free configs pay nothing here.
173        if !overlay.patch.models.is_empty() || !self.patch.models.is_empty() {
174            for (id, patch) in &overlay.patch.models {
175                match self.patch.models.get_mut(id) {
176                    Some(existing) => deep_merge_toml(existing, patch),
177                    None => {
178                        self.patch.models.insert(id.clone(), patch.clone());
179                    }
180                }
181            }
182            apply_model_patches(&mut self.models, &self.patch.models);
183        }
184
185        if overlay.default_provider.is_some() {
186            self.default_provider = overlay.default_provider.clone();
187        }
188
189        if !overlay.inference_rules.is_empty() {
190            let mut merged = overlay.inference_rules.clone();
191            merged.extend(self.inference_rules.clone());
192            self.inference_rules = merged;
193        }
194
195        if !overlay.tier_rules.is_empty() {
196            let mut merged = overlay.tier_rules.clone();
197            merged.extend(self.tier_rules.clone());
198            self.tier_rules = merged;
199        }
200
201        if overlay.tier_defaults.default != default_mid() {
202            self.tier_defaults = overlay.tier_defaults.clone();
203        }
204
205        for (pattern, defaults) in &overlay.model_defaults {
206            self.model_defaults
207                .entry(pattern.clone())
208                .or_default()
209                .extend(defaults.clone());
210        }
211
212        for (role, defaults) in &overlay.model_roles {
213            self.model_roles
214                .entry(role.clone())
215                .or_default()
216                .extend(defaults.clone());
217        }
218
219        for route in &overlay.suppress.routes {
220            if !self.suppress.routes.contains(route) {
221                self.suppress.routes.push(route.clone());
222            }
223        }
224    }
225}
226
227/// Recursively merge `overlay` into `base`. Tables merge key-by-key; every
228/// other value shape — scalars AND arrays — replaces the base value
229/// wholesale. Replacing arrays instead of merging them is the documented
230/// convention: there is no sane universal element-wise merge for lists like
231/// `capabilities` or `strengths`, so a patch that names an array owns it.
232fn deep_merge_toml(base: &mut toml::Value, overlay: &toml::Value) {
233    match (base, overlay) {
234        (toml::Value::Table(base_table), toml::Value::Table(overlay_table)) => {
235            for (key, overlay_value) in overlay_table {
236                match base_table.get_mut(key) {
237                    Some(base_value) => deep_merge_toml(base_value, overlay_value),
238                    None => {
239                        base_table.insert(key.clone(), overlay_value.clone());
240                    }
241                }
242            }
243        }
244        (base_slot, overlay_value) => *base_slot = overlay_value.clone(),
245    }
246}
247
248/// True once a type-invalid `[patch.models]` entry has been reported.
249/// Patches re-apply on every layer merge (stickiness), so an unconditional
250/// eprintln would repeat the same diagnostic once per layer per process.
251static MODEL_PATCH_TYPE_ERROR_WARNED: AtomicBool = AtomicBool::new(false);
252
253/// Apply every accumulated `[patch.models]` entry to its matching model row.
254///
255/// Patch application is `ModelDef -> toml::Value -> deep merge -> ModelDef`,
256/// so a patch can only express states the row schema can represent. Ids with
257/// no matching row are skipped (see
258/// [`ProvidersConfig::dangling_model_patches`]). A patch that produces a
259/// type-invalid row warns once (matching the `read_external_config` eprintln
260/// precedent) and keeps the unpatched row, so one bad overlay field can't
261/// take out the whole catalog entry.
262fn apply_model_patches(
263    models: &mut BTreeMap<String, ModelDef>,
264    patches: &BTreeMap<String, toml::Value>,
265) {
266    for (id, patch) in patches {
267        let Some(base) = models.get(id) else {
268            continue;
269        };
270        match patched_model_row(base, patch) {
271            Ok(patched) => {
272                models.insert(id.clone(), patched);
273            }
274            Err(error) => {
275                if !MODEL_PATCH_TYPE_ERROR_WARNED.swap(true, Ordering::Relaxed) {
276                    eprintln!(
277                        "[llm_config] invalid [patch.models.\"{id}\"] overlay \
278                         (keeping the unpatched row): {error}"
279                    );
280                }
281            }
282        }
283    }
284}
285
286/// Produce the patched version of one model row, or a description of why the
287/// patch does not typecheck against the row schema.
288fn patched_model_row(base: &ModelDef, patch: &toml::Value) -> Result<ModelDef, String> {
289    let mut value = toml::Value::try_from(base)
290        .map_err(|error| format!("serialize base row for patching: {error}"))?;
291    deep_merge_toml(&mut value, patch);
292    ModelDef::deserialize(value).map_err(|error| error.to_string())
293}
294
295#[derive(Debug, Clone)]
296pub struct ProviderDef {
297    pub display_name: Option<String>,
298    pub icon: Option<String>,
299    /// Provider protocol. Omitted providers use Harn's normal HTTP provider
300    /// path; `acp` launches an Agent Client Protocol server and drives it as
301    /// an agent-backed provider.
302    pub protocol: Option<String>,
303    pub base_url: String,
304    pub base_url_env: Option<String>,
305    pub auth_style: String,
306    pub auth_header: Option<String>,
307    pub auth_env: AuthEnv,
308    pub extra_headers: BTreeMap<String, String>,
309    pub chat_endpoint: String,
310    pub completion_endpoint: Option<String>,
311    pub command: Option<String>,
312    pub args: Vec<String>,
313    pub env: BTreeMap<String, String>,
314    pub cwd: Option<String>,
315    pub mcp_servers: Vec<serde_json::Value>,
316    pub healthcheck: Option<HealthcheckDef>,
317    /// Local runtime lifecycle metadata used by `harn local launch/stop`.
318    /// This is intentionally separate from provider process fields such as
319    /// `command`/`args`, which are used for ACP or external provider adapters.
320    pub local_runtime: Option<LocalRuntimeDef>,
321    pub features: Vec<String>,
322    /// Fallback provider name to try if this provider fails.
323    pub fallback: Option<String>,
324    /// Number of retries before falling back (default 0).
325    pub retry_count: Option<u32>,
326    /// Delay between retries in milliseconds (default 1000).
327    pub retry_delay_ms: Option<u64>,
328    /// Maximum requests per minute. None = unlimited.
329    pub rpm: Option<u32>,
330    /// Rich provider quota metadata. `rpm` remains as a legacy shorthand;
331    /// when both are present, this nested shape is the authoritative catalog
332    /// record and callers can still read the flattened `rpm`.
333    pub rate_limits: Option<RateLimitsDef>,
334    /// Provider/catalog pricing in USD per 1k input tokens.
335    pub cost_per_1k_in: Option<f64>,
336    /// Provider/catalog pricing in USD per 1k output tokens.
337    pub cost_per_1k_out: Option<f64>,
338    /// Observed or configured p50 latency in milliseconds.
339    pub latency_p50_ms: Option<u64>,
340    /// Optional provider-level serving performance observations.
341    pub performance: Option<ServingPerformanceDef>,
342    #[doc(hidden)]
343    pub auth_style_explicit: bool,
344}
345
346#[derive(Debug, Clone, Deserialize)]
347struct ProviderDefWire {
348    #[serde(default)]
349    display_name: Option<String>,
350    #[serde(default)]
351    icon: Option<String>,
352    #[serde(default)]
353    protocol: Option<String>,
354    #[serde(default)]
355    base_url: String,
356    #[serde(default)]
357    base_url_env: Option<String>,
358    #[serde(default)]
359    auth_style: Option<String>,
360    #[serde(default)]
361    auth_header: Option<String>,
362    #[serde(default)]
363    auth_env: AuthEnv,
364    #[serde(default)]
365    extra_headers: BTreeMap<String, String>,
366    #[serde(default)]
367    chat_endpoint: String,
368    #[serde(default)]
369    completion_endpoint: Option<String>,
370    #[serde(default)]
371    command: Option<String>,
372    #[serde(default)]
373    args: Vec<String>,
374    #[serde(default)]
375    env: BTreeMap<String, String>,
376    #[serde(default)]
377    cwd: Option<String>,
378    #[serde(default)]
379    mcp_servers: Vec<serde_json::Value>,
380    #[serde(default)]
381    healthcheck: Option<HealthcheckDef>,
382    #[serde(default)]
383    local_runtime: Option<LocalRuntimeDef>,
384    #[serde(default)]
385    features: Vec<String>,
386    #[serde(default)]
387    fallback: Option<String>,
388    #[serde(default)]
389    retry_count: Option<u32>,
390    #[serde(default)]
391    retry_delay_ms: Option<u64>,
392    #[serde(default)]
393    rpm: Option<u32>,
394    #[serde(default)]
395    rate_limits: Option<RateLimitsDef>,
396    #[serde(default)]
397    cost_per_1k_in: Option<f64>,
398    #[serde(default)]
399    cost_per_1k_out: Option<f64>,
400    #[serde(default)]
401    latency_p50_ms: Option<u64>,
402    #[serde(default)]
403    performance: Option<ServingPerformanceDef>,
404}
405
406impl<'de> Deserialize<'de> for ProviderDef {
407    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
408    where
409        D: serde::Deserializer<'de>,
410    {
411        let wire = ProviderDefWire::deserialize(deserializer)?;
412        let auth_style_explicit = wire.auth_style.is_some();
413        Ok(Self {
414            display_name: wire.display_name,
415            icon: wire.icon,
416            protocol: wire.protocol,
417            base_url: wire.base_url,
418            base_url_env: wire.base_url_env,
419            auth_style: wire.auth_style.unwrap_or_else(default_bearer),
420            auth_header: wire.auth_header,
421            auth_env: wire.auth_env,
422            extra_headers: wire.extra_headers,
423            chat_endpoint: wire.chat_endpoint,
424            completion_endpoint: wire.completion_endpoint,
425            command: wire.command,
426            args: wire.args,
427            env: wire.env,
428            cwd: wire.cwd,
429            mcp_servers: wire.mcp_servers,
430            healthcheck: wire.healthcheck,
431            local_runtime: wire.local_runtime,
432            features: wire.features,
433            fallback: wire.fallback,
434            retry_count: wire.retry_count,
435            retry_delay_ms: wire.retry_delay_ms,
436            rpm: wire.rpm,
437            rate_limits: wire.rate_limits,
438            cost_per_1k_in: wire.cost_per_1k_in,
439            cost_per_1k_out: wire.cost_per_1k_out,
440            latency_p50_ms: wire.latency_p50_ms,
441            performance: wire.performance,
442            auth_style_explicit,
443        })
444    }
445}
446
447impl Default for ProviderDef {
448    fn default() -> Self {
449        Self {
450            display_name: None,
451            icon: None,
452            protocol: None,
453            base_url: String::new(),
454            base_url_env: None,
455            auth_style: default_bearer(),
456            auth_header: None,
457            auth_env: AuthEnv::None,
458            extra_headers: BTreeMap::new(),
459            chat_endpoint: String::new(),
460            completion_endpoint: None,
461            command: None,
462            args: Vec::new(),
463            env: BTreeMap::new(),
464            cwd: None,
465            mcp_servers: Vec::new(),
466            healthcheck: None,
467            local_runtime: None,
468            features: Vec::new(),
469            fallback: None,
470            retry_count: None,
471            retry_delay_ms: None,
472            rpm: None,
473            rate_limits: None,
474            cost_per_1k_in: None,
475            cost_per_1k_out: None,
476            latency_p50_ms: None,
477            performance: None,
478            auth_style_explicit: false,
479        }
480    }
481}
482
483impl ProviderDef {
484    fn merge_from(&mut self, overlay: &ProviderDef) {
485        merge_option(&mut self.display_name, &overlay.display_name);
486        merge_option(&mut self.icon, &overlay.icon);
487        merge_option(&mut self.protocol, &overlay.protocol);
488        merge_string(&mut self.base_url, &overlay.base_url);
489        merge_option(&mut self.base_url_env, &overlay.base_url_env);
490        let overlay_uses_default_auth_style = overlay.auth_style == default_bearer();
491        if overlay.auth_style_explicit
492            || !overlay_uses_default_auth_style
493            || self.auth_style == default_bearer()
494        {
495            self.auth_style = overlay.auth_style.clone();
496            self.auth_style_explicit |=
497                overlay.auth_style_explicit || !overlay_uses_default_auth_style;
498        }
499        merge_option(&mut self.auth_header, &overlay.auth_header);
500        if !overlay.auth_env.is_none() {
501            self.auth_env = overlay.auth_env.clone();
502        }
503        self.extra_headers.extend(overlay.extra_headers.clone());
504        merge_string(&mut self.chat_endpoint, &overlay.chat_endpoint);
505        merge_option(&mut self.completion_endpoint, &overlay.completion_endpoint);
506        merge_option(&mut self.command, &overlay.command);
507        merge_vec(&mut self.args, &overlay.args);
508        self.env.extend(overlay.env.clone());
509        merge_option(&mut self.cwd, &overlay.cwd);
510        merge_vec(&mut self.mcp_servers, &overlay.mcp_servers);
511        merge_option(&mut self.healthcheck, &overlay.healthcheck);
512        merge_option(&mut self.local_runtime, &overlay.local_runtime);
513        merge_vec(&mut self.features, &overlay.features);
514        merge_option(&mut self.fallback, &overlay.fallback);
515        merge_option(&mut self.retry_count, &overlay.retry_count);
516        merge_option(&mut self.retry_delay_ms, &overlay.retry_delay_ms);
517        merge_option(&mut self.rpm, &overlay.rpm);
518        merge_option(&mut self.rate_limits, &overlay.rate_limits);
519        merge_option(&mut self.cost_per_1k_in, &overlay.cost_per_1k_in);
520        merge_option(&mut self.cost_per_1k_out, &overlay.cost_per_1k_out);
521        merge_option(&mut self.latency_p50_ms, &overlay.latency_p50_ms);
522        merge_option(&mut self.performance, &overlay.performance);
523    }
524}
525
526fn merge_option<T: Clone>(base: &mut Option<T>, overlay: &Option<T>) {
527    if overlay.is_some() {
528        *base = overlay.clone();
529    }
530}
531
532fn merge_string(base: &mut String, overlay: &str) {
533    if !overlay.is_empty() {
534        *base = overlay.to_string();
535    }
536}
537
538fn merge_vec<T: Clone>(base: &mut Vec<T>, overlay: &[T]) {
539    if !overlay.is_empty() {
540        *base = overlay.to_vec();
541    }
542}
543
544fn default_bearer() -> String {
545    "bearer".to_string()
546}
547
548/// Auth env var name(s) for the provider. Can be a single string or an array
549/// (tried in order until one is set).
550#[derive(Debug, Clone, Deserialize, Default)]
551#[serde(untagged)]
552pub enum AuthEnv {
553    #[default]
554    None,
555    Single(String),
556    Multiple(Vec<String>),
557}
558
559impl AuthEnv {
560    fn is_none(&self) -> bool {
561        matches!(self, AuthEnv::None)
562    }
563}
564
565#[derive(Debug, Clone, Deserialize)]
566pub struct HealthcheckDef {
567    pub method: String,
568    #[serde(default)]
569    pub path: Option<String>,
570    #[serde(default)]
571    pub url: Option<String>,
572    #[serde(default)]
573    pub body: Option<String>,
574}
575
576#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
577pub struct LocalRuntimeDef {
578    /// Lifecycle style: `daemon_api` for runtimes with their own resident
579    /// daemon (Ollama), `managed_process` for Harn-spawned servers.
580    #[serde(default, skip_serializing_if = "Option::is_none")]
581    pub kind: Option<String>,
582    /// Command Harn should execute for managed-process runtimes.
583    #[serde(default, skip_serializing_if = "Option::is_none")]
584    pub command: Option<String>,
585    /// Arguments that must appear immediately after the command, before model
586    /// and server flags. Used by CLIs such as `vllm serve ...`.
587    #[serde(default, skip_serializing_if = "Vec::is_empty")]
588    pub prefix_args: Vec<String>,
589    /// Default model source/path/repo. User overlays may set this; embedded
590    /// catalog rows avoid machine-specific absolute paths except examples.
591    #[serde(default, skip_serializing_if = "Option::is_none")]
592    pub model_source: Option<String>,
593    /// Environment variable that can provide a model source.
594    #[serde(default, skip_serializing_if = "Option::is_none")]
595    pub model_source_env: Option<String>,
596    /// Default port when the provider base URL has none.
597    #[serde(default, skip_serializing_if = "Option::is_none")]
598    pub default_port: Option<u16>,
599    /// Argument names used by the runtime CLI.
600    #[serde(default, skip_serializing_if = "Option::is_none")]
601    pub model_arg: Option<String>,
602    #[serde(default, skip_serializing_if = "Option::is_none")]
603    pub served_model_arg: Option<String>,
604    #[serde(default, skip_serializing_if = "Option::is_none")]
605    pub host_arg: Option<String>,
606    #[serde(default, skip_serializing_if = "Option::is_none")]
607    pub port_arg: Option<String>,
608    #[serde(default, skip_serializing_if = "Option::is_none")]
609    pub ctx_arg: Option<String>,
610    #[serde(default, skip_serializing_if = "Option::is_none")]
611    pub parallel_arg: Option<String>,
612    #[serde(default, skip_serializing_if = "Option::is_none")]
613    pub gpu_layers_arg: Option<String>,
614    #[serde(default, skip_serializing_if = "Option::is_none")]
615    pub cache_type_k_arg: Option<String>,
616    #[serde(default, skip_serializing_if = "Option::is_none")]
617    pub cache_type_v_arg: Option<String>,
618    #[serde(default, skip_serializing_if = "Option::is_none")]
619    pub cache_ram_arg: Option<String>,
620    /// Flag that enables adapter-aware serving for LoRA-capable runtimes.
621    #[serde(default, skip_serializing_if = "Option::is_none")]
622    pub enable_lora_arg: Option<String>,
623    /// Flag that accepts one or more LoRA module specs.
624    #[serde(default, skip_serializing_if = "Option::is_none")]
625    pub lora_modules_arg: Option<String>,
626    /// Runtime value shape for LoRA module specs. Defaults to `name_path`.
627    #[serde(default, skip_serializing_if = "Option::is_none")]
628    pub lora_modules_value_format: Option<String>,
629    /// Optional rank-limit flag for runtimes that need an explicit ceiling.
630    #[serde(default, skip_serializing_if = "Option::is_none")]
631    pub max_lora_rank_arg: Option<String>,
632    /// Extra arguments Harn applies by default when launching this runtime.
633    #[serde(default, skip_serializing_if = "Vec::is_empty")]
634    pub default_args: Vec<String>,
635    /// Stop strategy: `keep_alive_zero`, `pid`, or `external`.
636    #[serde(default, skip_serializing_if = "Option::is_none")]
637    pub stop: Option<String>,
638    /// Official docs/source URL for the lifecycle contract.
639    #[serde(default, skip_serializing_if = "Option::is_none")]
640    pub source_url: Option<String>,
641    /// YYYY-MM-DD date when the local runtime row was last verified.
642    #[serde(default, skip_serializing_if = "Option::is_none")]
643    pub last_verified: Option<String>,
644    /// Short operational note surfaced by CLI docs/help.
645    #[serde(default, skip_serializing_if = "Option::is_none")]
646    pub notes: Option<String>,
647}
648
649#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
650pub struct LocalMemoryDef {
651    /// Empirical resident memory observed for this route/runtime.
652    #[serde(default, skip_serializing_if = "Option::is_none")]
653    pub measured_resident_gib: Option<f64>,
654    /// Context size used for the empirical measurement.
655    #[serde(default, skip_serializing_if = "Option::is_none")]
656    pub measured_context_window: Option<u64>,
657    /// KV-cache type used for the empirical measurement.
658    #[serde(default, skip_serializing_if = "Option::is_none")]
659    pub measured_cache_type: Option<String>,
660    /// Approximate non-context resident footprint for this model/runtime.
661    #[serde(default, skip_serializing_if = "Option::is_none")]
662    pub base_resident_gib: Option<f64>,
663    /// Approximate GiB consumed by KV cache per 1,000 context tokens at the
664    /// default cache type.
665    #[serde(default, skip_serializing_if = "Option::is_none")]
666    pub kv_cache_gib_per_1k_ctx: Option<f64>,
667    /// Cache-type multiplier relative to `kv_cache_gib_per_1k_ctx`.
668    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
669    pub cache_type_multipliers: BTreeMap<String, f64>,
670    /// Cache type assumed when the launch command does not set K/V cache.
671    #[serde(default, skip_serializing_if = "Option::is_none")]
672    pub default_cache_type: Option<String>,
673    /// Minimum headroom Harn should leave for the OS and other apps.
674    #[serde(default, skip_serializing_if = "Option::is_none")]
675    pub safety_margin_gib: Option<f64>,
676    /// Highest context Harn should recommend automatically from this row.
677    #[serde(default, skip_serializing_if = "Option::is_none")]
678    pub max_recommended_context: Option<u64>,
679    /// Official or empirical source for the sizing row.
680    #[serde(default, skip_serializing_if = "Option::is_none")]
681    pub source_url: Option<String>,
682    /// YYYY-MM-DD date when the sizing row was last verified.
683    #[serde(default, skip_serializing_if = "Option::is_none")]
684    pub last_verified: Option<String>,
685    /// Short operational note surfaced by CLI diagnostics/docs.
686    #[serde(default, skip_serializing_if = "Option::is_none")]
687    pub notes: Option<String>,
688}
689
690impl LocalMemoryDef {
691    pub fn is_empty(&self) -> bool {
692        self.measured_resident_gib.is_none()
693            && self.measured_context_window.is_none()
694            && self.measured_cache_type.is_none()
695            && self.base_resident_gib.is_none()
696            && self.kv_cache_gib_per_1k_ctx.is_none()
697            && self.cache_type_multipliers.is_empty()
698            && self.default_cache_type.is_none()
699            && self.safety_margin_gib.is_none()
700            && self.max_recommended_context.is_none()
701            && self.source_url.is_none()
702            && self.last_verified.is_none()
703            && self.notes.is_none()
704    }
705}
706
707#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
708pub struct AliasDef {
709    pub id: String,
710    pub provider: String,
711    /// Per-model tool format override: "native" or "text". When set, this
712    /// takes precedence over the provider-level default. Models with strong
713    /// tool-calling fine-tuning (Kimi-K2.5, GPT-4o) should use "native";
714    /// models better served by text-based tool calling use "text".
715    #[serde(default)]
716    pub tool_format: Option<String>,
717}
718
719#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
720pub struct AliasToolCallingDef {
721    #[serde(default)]
722    #[serde(skip_serializing_if = "Option::is_none")]
723    pub native: Option<String>,
724    #[serde(default)]
725    #[serde(skip_serializing_if = "Option::is_none")]
726    pub text: Option<String>,
727    #[serde(default)]
728    #[serde(skip_serializing_if = "Option::is_none")]
729    pub streaming_native: Option<String>,
730    #[serde(default)]
731    #[serde(skip_serializing_if = "Option::is_none")]
732    pub fallback_mode: Option<String>,
733    #[serde(default)]
734    #[serde(skip_serializing_if = "Option::is_none")]
735    pub failure_reason: Option<String>,
736    #[serde(default)]
737    #[serde(skip_serializing_if = "Option::is_none")]
738    pub last_probe_at: Option<String>,
739}
740
741#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
742pub struct ModelPricing {
743    pub input_per_mtok: f64,
744    pub output_per_mtok: f64,
745    #[serde(default)]
746    pub cache_read_per_mtok: Option<f64>,
747    #[serde(default)]
748    pub cache_write_per_mtok: Option<f64>,
749}
750
751/// Provider or model quota metadata. Providers publish these along several
752/// axes, and any one exhausted bucket can trigger throttling.
753#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
754pub struct RateLimitsDef {
755    /// Requests per minute.
756    #[serde(default, skip_serializing_if = "Option::is_none")]
757    pub rpm: Option<u32>,
758    /// Requests per hour.
759    #[serde(default, skip_serializing_if = "Option::is_none")]
760    pub rph: Option<u32>,
761    /// Requests per day.
762    #[serde(default, skip_serializing_if = "Option::is_none")]
763    pub rpd: Option<u32>,
764    /// Total tokens per minute.
765    #[serde(default, skip_serializing_if = "Option::is_none")]
766    pub tpm: Option<u64>,
767    /// Total tokens per hour.
768    #[serde(default, skip_serializing_if = "Option::is_none")]
769    pub tph: Option<u64>,
770    /// Total tokens per day.
771    #[serde(default, skip_serializing_if = "Option::is_none")]
772    pub tpd: Option<u64>,
773    /// Input tokens per minute, when the provider splits input/output quotas.
774    #[serde(default, skip_serializing_if = "Option::is_none")]
775    pub input_tpm: Option<u64>,
776    /// Output tokens per minute, when the provider splits input/output quotas.
777    #[serde(default, skip_serializing_if = "Option::is_none")]
778    pub output_tpm: Option<u64>,
779    /// Concurrent in-flight requests, if published.
780    #[serde(default, skip_serializing_if = "Option::is_none")]
781    pub concurrency: Option<u32>,
782    /// Account tier or route class these limits describe.
783    #[serde(default, skip_serializing_if = "Option::is_none")]
784    pub tier: Option<String>,
785    /// Official source URL for the row.
786    #[serde(default, skip_serializing_if = "Option::is_none")]
787    pub source_url: Option<String>,
788    /// YYYY-MM-DD date when the row was last verified.
789    #[serde(default, skip_serializing_if = "Option::is_none")]
790    pub last_verified: Option<String>,
791    /// Free-text caveat for account-dependent or burst limits.
792    #[serde(default, skip_serializing_if = "Option::is_none")]
793    pub notes: Option<String>,
794}
795
796impl RateLimitsDef {
797    pub fn is_empty(&self) -> bool {
798        self.rpm.is_none()
799            && self.rph.is_none()
800            && self.rpd.is_none()
801            && self.tpm.is_none()
802            && self.tph.is_none()
803            && self.tpd.is_none()
804            && self.input_tpm.is_none()
805            && self.output_tpm.is_none()
806            && self.concurrency.is_none()
807            && self.tier.is_none()
808            && self.source_url.is_none()
809            && self.last_verified.is_none()
810            && self.notes.is_none()
811    }
812
813    pub fn with_rpm_fallback(mut self, rpm: Option<u32>) -> Option<Self> {
814        if self.rpm.is_none() {
815            self.rpm = rpm;
816        }
817        (!self.is_empty()).then_some(self)
818    }
819}
820
821/// Optional provider/model serving-performance observation. This records
822/// benchmark or live-probe facts, not a hard runtime contract; callers should
823/// treat missing fields as unknown and stale dates as advisory.
824#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
825pub struct ServingPerformanceDef {
826    /// Observed time-to-first-token in milliseconds.
827    #[serde(default, skip_serializing_if = "Option::is_none")]
828    pub observed_ttft_ms: Option<u64>,
829    /// Observed output generation rate in tokens per second.
830    #[serde(default, skip_serializing_if = "Option::is_none")]
831    pub output_tokens_per_sec: Option<f64>,
832    /// End-to-end time-to-answer in seconds for the cited benchmark, when
833    /// reported separately from TTFT/generation rate.
834    #[serde(default, skip_serializing_if = "Option::is_none")]
835    pub time_to_answer_s: Option<f64>,
836    /// Source label, e.g. `artificial_analysis`, `harn_probe`, or
837    /// `provider_blog`.
838    #[serde(default, skip_serializing_if = "Option::is_none")]
839    pub source: Option<String>,
840    /// Source URL for the observation.
841    #[serde(default, skip_serializing_if = "Option::is_none")]
842    pub source_url: Option<String>,
843    /// YYYY-MM-DD date when the observation was last verified.
844    #[serde(default, skip_serializing_if = "Option::is_none")]
845    pub last_verified: Option<String>,
846    /// Number of requests or benchmark samples behind this row, if known.
847    #[serde(default, skip_serializing_if = "Option::is_none")]
848    pub sample_size: Option<u32>,
849    /// Short caveat such as streaming mode, warm/cold route, or prompt shape.
850    #[serde(default, skip_serializing_if = "Option::is_none")]
851    pub notes: Option<String>,
852}
853
854impl ServingPerformanceDef {
855    pub fn is_empty(&self) -> bool {
856        self.observed_ttft_ms.is_none()
857            && self.output_tokens_per_sec.is_none()
858            && self.time_to_answer_s.is_none()
859            && self.source.is_none()
860            && self.source_url.is_none()
861            && self.last_verified.is_none()
862            && self.sample_size.is_none()
863            && self.notes.is_none()
864    }
865}
866
867/// Logical-model facts separated from provider serving routes. These fields
868/// describe the underlying weights or public model family, not Harn's alias or
869/// provider/model selector.
870#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
871pub struct ModelArchitectureDef {
872    /// Total parameter count in billions.
873    #[serde(default, skip_serializing_if = "Option::is_none")]
874    pub parameter_count_b: Option<f64>,
875    /// Active parameter count in billions for MoE models.
876    #[serde(default, skip_serializing_if = "Option::is_none")]
877    pub active_parameter_count_b: Option<f64>,
878    /// True for mixture-of-experts models.
879    #[serde(default, skip_serializing_if = "Option::is_none")]
880    pub moe: Option<bool>,
881    /// Quantization advertised by this route, if route-specific.
882    #[serde(default, skip_serializing_if = "Option::is_none")]
883    pub quantization: Option<String>,
884    /// Numeric precision advertised by this route, if known.
885    #[serde(default, skip_serializing_if = "Option::is_none")]
886    pub precision: Option<String>,
887    /// License identifier or short label.
888    #[serde(default, skip_serializing_if = "Option::is_none")]
889    pub license: Option<String>,
890    /// Tokenizer family or implementation hint.
891    #[serde(default, skip_serializing_if = "Option::is_none")]
892    pub tokenizer: Option<String>,
893    /// Public knowledge cutoff claim, when published.
894    #[serde(default, skip_serializing_if = "Option::is_none")]
895    pub knowledge_cutoff: Option<String>,
896    /// Official source URL for these facts.
897    #[serde(default, skip_serializing_if = "Option::is_none")]
898    pub source_url: Option<String>,
899    /// YYYY-MM-DD date when these facts were last verified.
900    #[serde(default, skip_serializing_if = "Option::is_none")]
901    pub last_verified: Option<String>,
902}
903
904impl ModelArchitectureDef {
905    pub fn is_empty(&self) -> bool {
906        self.parameter_count_b.is_none()
907            && self.active_parameter_count_b.is_none()
908            && self.moe.is_none()
909            && self.quantization.is_none()
910            && self.precision.is_none()
911            && self.license.is_none()
912            && self.tokenizer.is_none()
913            && self.knowledge_cutoff.is_none()
914            && self.source_url.is_none()
915            && self.last_verified.is_none()
916    }
917}
918
919/// Optional accelerated-serving ("fast mode") tier for a model. Off by
920/// default: its presence only *describes* that the provider offers a
921/// faster, premium-priced serving path running the same weights — callers
922/// must explicitly opt in via the provider's request knob, so nothing here
923/// changes default behavior. Deliberately provider-agnostic: Anthropic
924/// exposes the tier as `speed = "fast"` (beta-gated), while OpenAI uses
925/// `service_tier = "fast"` / `"priority"`. Premium pricing is stored as
926/// absolute per-MTok rates rather than a single multiplier because
927/// providers price the tier asymmetrically (Anthropic Opus 4.8 is 2x
928/// standard; Opus 4.7 fast mode is 6x).
929#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
930pub struct FastModeDef {
931    /// Request field that opts into the fast tier (e.g. "speed" for
932    /// Anthropic, "service_tier" for OpenAI).
933    pub param: String,
934    /// Value to send on `param` (e.g. "fast", "priority").
935    pub value: String,
936    /// Provider beta/feature header required to use the tier, if any
937    /// (e.g. Anthropic "fast-mode-2026-02-01").
938    #[serde(default)]
939    pub beta_header: Option<String>,
940    /// Output-tokens-per-second speedup vs standard serving (e.g. 2.5).
941    #[serde(default)]
942    pub otps_speedup: Option<f64>,
943    /// Lifecycle of the fast tier: "ga" | "research_preview" |
944    /// "deprecated". None when unspecified.
945    #[serde(default)]
946    pub status: Option<String>,
947    /// Premium pricing charged while the fast tier is active (absolute
948    /// per-MTok rates, not a multiplier on standard pricing).
949    #[serde(default)]
950    pub pricing: Option<ModelPricing>,
951    /// Free-text note: constraints, deprecation timeline, etc.
952    #[serde(default)]
953    pub note: Option<String>,
954}
955
956#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
957pub struct ModelDef {
958    pub name: String,
959    pub provider: String,
960    pub context_window: u64,
961    /// Provider-independent logical model id, when multiple serving routes map
962    /// to the same weights or model family.
963    #[serde(default)]
964    pub logical_model: Option<String>,
965    /// Equivalence class for failover/escalation candidates. Entries in the
966    /// same group are capability-compatible alternatives, not byte-identical
967    /// APIs; callers must still re-render transcripts for the target provider.
968    #[serde(default)]
969    pub equivalence_group: Option<String>,
970    /// Serving-route detail such as "serverless", "priority", "fp8", or a
971    /// provider route slug. This is intentionally separate from `name`.
972    #[serde(default)]
973    pub served_variant: Option<String>,
974    /// Provider-native model id to send on the wire. Defaults to the catalog
975    /// key. Required when two providers expose the same native id and Harn
976    /// needs a unique catalog key for each route.
977    #[serde(default)]
978    pub wire_model: Option<String>,
979    /// Preferred API dialect for the route, e.g. `openai_chat`,
980    /// `openai_responses`, `anthropic_messages`, `gemini_generate_content`.
981    #[serde(default)]
982    pub api_dialect: Option<String>,
983    /// Route-specific token/request quota metadata.
984    #[serde(default)]
985    pub rate_limits: Option<RateLimitsDef>,
986    /// Optional route-level serving performance observations.
987    #[serde(default)]
988    pub performance: Option<ServingPerformanceDef>,
989    /// Underlying model architecture facts separated from the provider id.
990    #[serde(default)]
991    pub architecture: Option<ModelArchitectureDef>,
992    /// Local launch memory-sizing hints used by `harn local launch`.
993    #[serde(default)]
994    pub local_memory: Option<LocalMemoryDef>,
995    #[serde(default)]
996    pub runtime_context_window: Option<u64>,
997    #[serde(default)]
998    pub stream_timeout: Option<f64>,
999    #[serde(default)]
1000    pub capabilities: Vec<String>,
1001    #[serde(default)]
1002    pub pricing: Option<ModelPricing>,
1003    #[serde(default)]
1004    pub deprecated: bool,
1005    #[serde(default)]
1006    pub deprecation_note: Option<String>,
1007    /// Structured replacement pointer: the catalog id of the model that
1008    /// supersedes this one (e.g. an older Opus row points at the newest
1009    /// Opus). Lets release tooling express "migrate to X" in a
1010    /// machine-readable way instead of burying it in `deprecation_note`
1011    /// free text. A model may be superseded without being `deprecated`
1012    /// (a newer option exists but this one is still fully supported);
1013    /// pair it with `deprecated = true` once a sunset is announced.
1014    #[serde(default)]
1015    pub superseded_by: Option<String>,
1016    /// Accelerated-serving ("fast mode") tier metadata, when the model's
1017    /// provider offers one. Off by default — see [`FastModeDef`]. None for
1018    /// models with no faster serving path.
1019    #[serde(default)]
1020    pub fast_mode: Option<FastModeDef>,
1021    #[serde(default)]
1022    pub quality_tags: Vec<String>,
1023    /// Whether the model can be reached over a normal API-key serverless call,
1024    /// or only via a dedicated/provisioned endpoint that the caller must spin
1025    /// up out-of-band. Providers like Together list dedicated-only routes
1026    /// alongside serverless ones in `/v1/models`, so this metadata lets clients
1027    /// avoid presenting them as one-click options.
1028    #[serde(default)]
1029    pub availability: ModelAvailability,
1030    /// Popular-consensus tier label. Enum-typed string: "small" | "mid" |
1031    /// "frontier" | "reasoning". Self-declared per model (no pattern-matched
1032    /// rule table) so the catalog is the single source of truth. When None
1033    /// the resolver returns the catalog default ("mid"). Use the richer
1034    /// `strengths` + `benchmarks` fields to pick models for specific
1035    /// workloads — `tier` exists only as a coarse popular-consensus shortcut.
1036    #[serde(default)]
1037    pub tier: Option<String>,
1038    /// True when the model weights are downloadable / self-hostable
1039    /// (open-weight / open-source license, regardless of commercial-use
1040    /// restrictions). False when weights are closed (Anthropic, OpenAI,
1041    /// Google, etc.). None when the catalog row predates the migration.
1042    #[serde(default)]
1043    pub open_weight: Option<bool>,
1044    /// Workload-shaped strength tags. Conventional values include
1045    /// `coding`, `summarization`, `long_context`, `tool_use`, `reasoning`,
1046    /// `vision`, `speed`, `cheap`, `agentic`. Selectors should treat
1047    /// missing entries as "no claim" rather than "no strength."
1048    #[serde(default)]
1049    pub strengths: Vec<String>,
1050    /// Public benchmark numbers, keyed by a snake_case identifier
1051    /// (`swe_bench_verified`, `humaneval`, `aa_intelligence_index`, etc.).
1052    /// Values are the raw published scores. The selector layer is free
1053    /// to normalize per benchmark; the catalog records the canonical
1054    /// score so future readers can audit the source.
1055    #[serde(default)]
1056    pub benchmarks: BTreeMap<String, f64>,
1057    /// Normalized model-family token used as a diversity signal for
1058    /// reviewer selection. Distinct from provider: hosted wrappers should
1059    /// keep the underlying family (for example OpenRouter-hosted Claude
1060    /// still uses `anthropic-claude`).
1061    #[serde(default)]
1062    pub family: Option<String>,
1063    /// Narrower family lineage used by option-pack calibration.
1064    #[serde(default)]
1065    pub lineage: Option<String>,
1066    /// Preferred reviewer families for critique/review workloads.
1067    #[serde(default)]
1068    pub complementary_with: Vec<String>,
1069    /// Author families, lineages, model ids, or provider/model selectors
1070    /// this row should not review.
1071    #[serde(default)]
1072    pub avoid_as_reviewer_for: Vec<String>,
1073}
1074
1075#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Default)]
1076#[serde(rename_all = "snake_case")]
1077pub enum ModelAvailability {
1078    /// Reachable through the provider's normal API-key path with no extra
1079    /// setup. The default for cataloged hosted/local models: by cataloging a
1080    /// row we are claiming the route works out of the box.
1081    #[default]
1082    Serverless,
1083    /// Requires the caller to provision a dedicated endpoint before requests
1084    /// will succeed. The catalog row exists for selection/pricing UI, but
1085    /// hosts must not auto-route to it.
1086    Dedicated,
1087    /// Availability is not known ahead of time. Used for routes that were
1088    /// surfaced dynamically (e.g. through `/v1/models`) without a static
1089    /// claim from Harn or the user.
1090    Unknown,
1091}
1092
1093impl ModelAvailability {
1094    pub fn as_str(self) -> &'static str {
1095        match self {
1096            Self::Serverless => "serverless",
1097            Self::Dedicated => "dedicated",
1098            Self::Unknown => "unknown",
1099        }
1100    }
1101
1102    pub fn parse(value: &str) -> Option<Self> {
1103        match value {
1104            "serverless" => Some(Self::Serverless),
1105            "dedicated" => Some(Self::Dedicated),
1106            "unknown" => Some(Self::Unknown),
1107            _ => None,
1108        }
1109    }
1110}
1111
1112#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
1113pub struct ResolvedModel {
1114    pub id: String,
1115    pub provider: String,
1116    pub alias: Option<String>,
1117    pub tool_format: String,
1118    pub tier: String,
1119    pub family: String,
1120    pub lineage: String,
1121}
1122
1123#[derive(Debug, Clone, PartialEq)]
1124pub struct ComplementaryReviewerOptions {
1125    pub author_model: String,
1126    pub author_provider: Option<String>,
1127    pub intent: ComplementaryReviewerIntent,
1128    pub max_price_multiplier: Option<f64>,
1129}
1130
1131#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1132pub enum ComplementaryReviewerIntent {
1133    Review,
1134    Critique,
1135    PlanReview,
1136}
1137
1138impl ComplementaryReviewerIntent {
1139    pub fn parse(value: &str) -> Option<Self> {
1140        match value {
1141            "review" => Some(Self::Review),
1142            "critique" => Some(Self::Critique),
1143            "plan_review" => Some(Self::PlanReview),
1144            _ => None,
1145        }
1146    }
1147
1148    pub fn as_str(self) -> &'static str {
1149        match self {
1150            Self::Review => "review",
1151            Self::Critique => "critique",
1152            Self::PlanReview => "plan_review",
1153        }
1154    }
1155}
1156
1157#[derive(Debug, Clone, Serialize, PartialEq)]
1158pub struct ComplementaryReviewerSelection {
1159    pub intent: String,
1160    pub author: ComplementaryModelIdentity,
1161    pub reviewer: ComplementaryModelIdentity,
1162    pub fallback: bool,
1163    pub fallback_reason: Option<String>,
1164    /// Machine-readable reason a caller can branch on when `fallback` is
1165    /// `true`, distinct from the human-readable `fallback_reason`/`reason`
1166    /// prose. `None` on the success path. Lets a caller hard-fail an
1167    /// independent-review step rather than silently degrade to self-review.
1168    /// See [`ReviewerFallbackCode`] for the stable set of values.
1169    #[serde(skip_serializing_if = "Option::is_none")]
1170    pub fallback_code: Option<String>,
1171    pub reason: String,
1172    pub estimated_incremental_cost: Option<ComplementaryCostEstimate>,
1173}
1174
1175/// Stable, machine-readable reasons `pick_complementary_reviewer` falls back
1176/// to the author model. Serialized as the `fallback_code` string so harn
1177/// pipelines and Rust callers can branch deterministically instead of parsing
1178/// prose. New variants are additive; existing codes are append-only contract.
1179#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1180pub enum ReviewerFallbackCode {
1181    /// The author model's family could not be resolved, so no independent
1182    /// family comparison is possible.
1183    UnknownAuthorFamily,
1184    /// Different-family candidates exist but none satisfy `max_price_multiplier`.
1185    NoDiffFamilyWithinPrice,
1186    /// No active, serverless, different-family reviewer is cataloged at all.
1187    NoDiffFamilyServerless,
1188    /// Different-family candidates exist but were all excluded (e.g. every
1189    /// one declares `avoid_as_reviewer_for` the author).
1190    AllDiffFamilyExcluded,
1191}
1192
1193impl ReviewerFallbackCode {
1194    pub fn as_code(self) -> &'static str {
1195        match self {
1196            Self::UnknownAuthorFamily => "unknown_author_family",
1197            Self::NoDiffFamilyWithinPrice => "no_diff_family_within_price",
1198            Self::NoDiffFamilyServerless => "no_diff_family_serverless",
1199            Self::AllDiffFamilyExcluded => "all_diff_family_excluded",
1200        }
1201    }
1202}
1203
1204#[derive(Debug, Clone, Serialize, PartialEq)]
1205pub struct ComplementaryModelIdentity {
1206    pub id: String,
1207    pub provider: String,
1208    pub family: String,
1209    pub lineage: String,
1210    pub tier: String,
1211    #[serde(skip_serializing_if = "Option::is_none")]
1212    pub pricing: Option<ModelPricing>,
1213}
1214
1215#[derive(Debug, Clone, Serialize, PartialEq)]
1216pub struct ComplementaryCostEstimate {
1217    pub input_per_mtok: f64,
1218    pub output_per_mtok: f64,
1219    pub total_per_mtok: f64,
1220    #[serde(skip_serializing_if = "Option::is_none")]
1221    pub multiplier_vs_author: Option<f64>,
1222}
1223
1224#[derive(Debug, Clone, Deserialize)]
1225pub struct InferenceRule {
1226    #[serde(default)]
1227    pub pattern: Option<String>,
1228    #[serde(default)]
1229    pub contains: Option<String>,
1230    #[serde(default)]
1231    pub exact: Option<String>,
1232    pub provider: String,
1233}
1234
1235#[derive(Debug, Clone, Deserialize)]
1236pub struct TierRule {
1237    #[serde(default)]
1238    pub pattern: Option<String>,
1239    #[serde(default)]
1240    pub contains: Option<String>,
1241    #[serde(default)]
1242    pub exact: Option<String>,
1243    pub tier: String,
1244}
1245
1246#[derive(Debug, Clone, Deserialize)]
1247pub struct TierDefaults {
1248    #[serde(default = "default_mid")]
1249    pub default: String,
1250}
1251
1252impl Default for TierDefaults {
1253    fn default() -> Self {
1254        Self {
1255            default: default_mid(),
1256        }
1257    }
1258}
1259
1260fn default_mid() -> String {
1261    "mid".to_string()
1262}
1263
1264/// Load and cache the providers config. Called once at VM startup.
1265pub fn load_config() -> &'static ProvidersConfig {
1266    CONFIG.get_or_init(|| {
1267        let mut config = default_config();
1268        let verbose_config_logging = matches!(
1269            std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
1270            Some("1" | "true" | "TRUE" | "yes" | "YES")
1271        ) || matches!(
1272            std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
1273            Some("1" | "true" | "TRUE" | "yes" | "YES")
1274        );
1275        if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
1276            if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
1277                config.merge_from(&overlay);
1278                let _ = CONFIG_PATH.set(path);
1279                return config;
1280            }
1281        }
1282        if should_load_home_config() {
1283            if let Some(home) = dirs_or_home() {
1284                let path = format!("{home}/.config/harn/providers.toml");
1285                if let Some(overlay) = read_external_config(&path, false) {
1286                    config.merge_from(&overlay);
1287                    let _ = CONFIG_PATH.set(path);
1288                    return config;
1289                }
1290            }
1291        }
1292        config
1293    })
1294}
1295
1296fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
1297    match std::fs::read_to_string(path) {
1298        // Single parse entry point (`parse_config_toml`) so every overlay
1299        // layer — `HARN_PROVIDERS_CONFIG`, the home file, `[llm]` manifest
1300        // sections — honors the same schema, including `[patch.models]`.
1301        Ok(content) => match parse_config_toml(&content) {
1302            Ok(config) => {
1303                if verbose {
1304                    eprintln!(
1305                        "[llm_config] Loaded {} providers, {} aliases from {}",
1306                        config.providers.len(),
1307                        config.aliases.len(),
1308                        path
1309                    );
1310                }
1311                Some(config)
1312            }
1313            Err(error) => {
1314                eprintln!("[llm_config] TOML parse error in {path}: {error}");
1315                None
1316            }
1317        },
1318        Err(error) => {
1319            if verbose {
1320                eprintln!("[llm_config] Cannot read {path}: {error}");
1321            }
1322            None
1323        }
1324    }
1325}
1326
1327fn should_load_home_config() -> bool {
1328    // Unit tests should cover embedded defaults plus explicit overlays, not
1329    // whichever provider file happens to exist on the developer machine.
1330    !cfg!(test)
1331}
1332
1333/// Parse a provider/model catalog overlay in the same shape as
1334/// `providers.toml` or `[llm]` package-manifest sections.
1335pub fn parse_config_toml(src: &str) -> Result<ProvidersConfig, toml::de::Error> {
1336    toml::from_str::<ProvidersConfig>(src)
1337}
1338
1339/// Returns the filesystem path of the currently-loaded providers config, if
1340/// any. Returns `None` when built-in defaults are active.
1341pub fn loaded_config_path() -> Option<std::path::PathBuf> {
1342    // Force lazy init so CONFIG_PATH is populated if a file was loaded.
1343    let _ = load_config();
1344    CONFIG_PATH.get().map(std::path::PathBuf::from)
1345}
1346
1347/// Install per-run provider config overlays. The overlay uses the same shape as
1348/// `providers.toml`, but lives under `[llm]` in `harn.toml` and package
1349/// manifests. Passing `None` clears the overlay.
1350pub fn set_user_overrides(config: Option<ProvidersConfig>) {
1351    USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
1352}
1353
1354/// Clear per-run provider config overlays.
1355pub fn clear_user_overrides() {
1356    set_user_overrides(None);
1357}
1358
1359/// Install the process-wide runtime catalog overlay used by
1360/// `provider_catalog::refresh_runtime_catalog`. Per-run user overlays still
1361/// merge last so project-local provider config can override hosted catalog
1362/// updates.
1363pub fn set_runtime_catalog_overlay(config: Option<ProvidersConfig>) {
1364    *runtime_catalog_overlay()
1365        .write()
1366        .expect("runtime catalog overlay poisoned") = config;
1367}
1368
1369pub fn clear_runtime_catalog_overlay() {
1370    set_runtime_catalog_overlay(None);
1371}
1372
1373pub(crate) fn effective_config() -> ProvidersConfig {
1374    let user_overrides = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1375    effective_config_with_user_overrides(user_overrides.as_ref())
1376}
1377
1378/// Provider config built purely from the compiled-in `EMBEDDED_PROVIDERS_TOML`
1379/// snapshot, ignoring every ambient layer: the developer's
1380/// `~/.config/harn/providers.toml`, `HARN_PROVIDERS_CONFIG`, the process
1381/// runtime-catalog overlay, and thread-local user overrides.
1382///
1383/// This is the hermetic source of truth for *generating* the checked-in
1384/// `spec/provider-catalog/*` artifacts. Artifact generation must be a pure
1385/// function of the source tree so a developer's personal aliases/providers
1386/// never leak into shipped artifacts (which then makes clean CI flag drift).
1387/// Runtime catalog presentation must keep using [`effective_config`] /
1388/// [`effective_config_with_user_overrides`], which legitimately reflect the
1389/// host's live configuration.
1390///
1391/// An optional explicit overlay (e.g. a `--overlay` file named on the command
1392/// line) is merged on top of the embedded base. Unlike the home file and env
1393/// layers, that overlay is a declared, reproducible input rather than ambient
1394/// machine state, so it is safe to honor while staying hermetic.
1395pub fn embedded_config(explicit_overlay: Option<&ProvidersConfig>) -> ProvidersConfig {
1396    let mut config = default_config();
1397    if let Some(overlay) = explicit_overlay {
1398        config.merge_from(overlay);
1399    }
1400    config
1401}
1402
1403pub(crate) fn effective_config_with_user_overrides(
1404    user_overrides: Option<&ProvidersConfig>,
1405) -> ProvidersConfig {
1406    let mut merged = load_config().clone();
1407    if let Some(overlay) = runtime_catalog_overlay()
1408        .read()
1409        .expect("runtime catalog overlay poisoned")
1410        .as_ref()
1411    {
1412        merged.merge_from(overlay);
1413    }
1414    if let Some(overlay) = user_overrides {
1415        merged.merge_from(overlay);
1416    }
1417    merged
1418}
1419
1420fn runtime_catalog_overlay() -> &'static RwLock<Option<ProvidersConfig>> {
1421    RUNTIME_CATALOG_OVERLAY.get_or_init(|| RwLock::new(None))
1422}
1423
1424/// Resolve a model alias to (model_id, provider_name).
1425pub fn resolve_model(alias: &str) -> (String, Option<String>) {
1426    let config = effective_config();
1427    if let Some(a) = config.aliases.get(alias) {
1428        return (a.id.clone(), Some(a.provider.clone()));
1429    }
1430    (normalize_model_id(alias), None)
1431}
1432
1433/// Strip host/provider selector prefixes that identify transport, not the
1434/// provider-native model id. This mirrors the host's existing normalization so
1435/// `ollama:qwen3:30b` reaches Ollama as `qwen3:30b` instead of an invalid
1436/// model named `ollama`. Cerebras follows the same convention but uses a
1437/// slash separator (`cerebras/gpt-oss-120b`) because its own /v1/models
1438/// endpoint returns bare names that overlap OpenAI's families.
1439pub fn normalize_model_id(raw: &str) -> String {
1440    for prefix in PROVIDER_SELECTOR_PREFIXES {
1441        if let Some(stripped) = raw.strip_prefix(prefix) {
1442            return stripped.to_string();
1443        }
1444    }
1445    raw.to_string()
1446}
1447
1448const PROVIDER_SELECTOR_PREFIXES: &[&str] =
1449    &["ollama:", "local:", "huggingface:", "hf:", "cerebras/"];
1450
1451/// Resolve an alias or selector into the complete catalog identity hosts need:
1452/// provider inference, prefix-normalized model id, default tool format, and tier.
1453pub fn resolve_model_info(selector: &str) -> ResolvedModel {
1454    let config = effective_config();
1455    if let Some(alias) = config.aliases.get(selector) {
1456        let id = alias.id.clone();
1457        let provider = alias.provider.clone();
1458        let requested = alias
1459            .tool_format
1460            .clone()
1461            .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
1462        let tool_format = guard_tool_format(&provider, &id, &requested, Some(selector));
1463        return ResolvedModel {
1464            tier: model_tier_with_config(&config, &id),
1465            family: model_family_with_config(&config, &provider, &id),
1466            lineage: model_lineage_with_config(&config, &provider, &id),
1467            id,
1468            provider,
1469            alias: Some(selector.to_string()),
1470            tool_format,
1471        };
1472    }
1473
1474    let id = normalize_model_id(selector);
1475    let inference = infer_provider_with_config(&config, selector);
1476    let source = inference.source;
1477    let provider = inference.provider;
1478    let requested = default_tool_format_with_config(&config, &id, &provider);
1479    let tool_format = guard_tool_format(&provider, &id, &requested, None);
1480    let tier = model_tier_with_config(&config, &id);
1481    let family = model_family_with_inference_source(&config, &provider, &id, source);
1482    let lineage = model_lineage_with_inference_source(&config, &provider, &id, source);
1483    ResolvedModel {
1484        id,
1485        provider,
1486        alias: None,
1487        tool_format,
1488        tier,
1489        family,
1490        lineage,
1491    }
1492}
1493
1494/// Run the requested `tool_format` through the capability registry's
1495/// dialect-validity gate, returning the safe format to actually use. When the
1496/// registry auto-corrects a known-broken combo (e.g. a `native` pin on a
1497/// `native_unreliable` route that silently drops to unparsed DSML text), the
1498/// correction is logged once at resolution time so a harness developer sees
1499/// *why* their pinned format was not honored — never a silent vanishing.
1500fn guard_tool_format(provider: &str, model: &str, requested: &str, alias: Option<&str>) -> String {
1501    let decision = crate::llm::capabilities::validate_tool_format(provider, model, requested);
1502    if let Some(reason) = &decision.correction {
1503        tracing::warn!(
1504            target: "harn::llm::tool_format",
1505            alias = alias.unwrap_or(""),
1506            "{reason}"
1507        );
1508    }
1509    decision.effective
1510}
1511
1512/// Infer provider from a model ID using inference rules.
1513pub fn infer_provider(model_id: &str) -> String {
1514    infer_provider_detail(model_id).provider
1515}
1516
1517/// Infer provider from a model ID and retain whether the configured default was used.
1518pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
1519    let config = effective_config();
1520    infer_provider_with_config(&config, model_id)
1521}
1522
1523fn infer_provider_with_config(
1524    config: &ProvidersConfig,
1525    model_id: &str,
1526) -> crate::llm::provider::ProviderInference {
1527    if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
1528        return crate::llm::provider::ProviderInference::builtin("ollama");
1529    }
1530    if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
1531        return crate::llm::provider::ProviderInference::builtin("huggingface");
1532    }
1533    // Exact catalog rows are the most authoritative declaration of where
1534    // a model is hosted: any pattern-based inference rule is necessarily
1535    // less specific than `[models."<id>"].provider = "<name>"`. Catalogs
1536    // include user overlays, so users can still re-home a model by
1537    // setting a catalog entry in their own providers.toml.
1538    let normalized_id = normalize_model_id(model_id);
1539    if let Some(model) = config
1540        .models
1541        .get(model_id)
1542        .or_else(|| config.models.get(&normalized_id))
1543    {
1544        return crate::llm::provider::ProviderInference::builtin(model.provider.clone());
1545    }
1546    for rule in &config.inference_rules {
1547        if let Some(exact) = &rule.exact {
1548            if model_id == exact {
1549                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1550            }
1551        }
1552        if let Some(pattern) = &rule.pattern {
1553            if glob_match(pattern, model_id) {
1554                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1555            }
1556        }
1557        if let Some(substr) = &rule.contains {
1558            if model_id.contains(substr.as_str()) {
1559                return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1560            }
1561        }
1562    }
1563    crate::llm::provider::infer_provider_from_model_id(
1564        model_id,
1565        &default_provider_with_config(config),
1566    )
1567}
1568
1569pub fn default_provider() -> String {
1570    let config = effective_config();
1571    default_provider_with_config(&config)
1572}
1573
1574fn default_provider_with_config(config: &ProvidersConfig) -> String {
1575    std::env::var("HARN_DEFAULT_PROVIDER")
1576        .ok()
1577        .map(|value| value.trim().to_string())
1578        .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1579        .or_else(|| {
1580            config
1581                .default_provider
1582                .as_deref()
1583                .map(str::trim)
1584                .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1585                .map(str::to_string)
1586        })
1587        .unwrap_or_else(|| auto_select_provider(config))
1588}
1589
1590/// Provider assumed when nothing is configured and no credentials are found.
1591/// Anthropic is Harn's documented default; [`auto_select_provider`] only falls
1592/// back to it after probing for a credentialed or local provider, and warns
1593/// once so adopters without Anthropic credentials get a clear nudge instead of
1594/// a raw auth failure.
1595const FALLBACK_PROVIDER: &str = "anthropic";
1596
1597static AUTO_PROVIDER_WARNED: AtomicBool = AtomicBool::new(false);
1598
1599/// True when any of the provider's auth env vars holds a non-empty value.
1600fn provider_has_credentials(def: &ProviderDef) -> bool {
1601    auth_env_names(&def.auth_env)
1602        .iter()
1603        .any(|name| std::env::var(name).is_ok_and(|value| !value.trim().is_empty()))
1604}
1605
1606/// True when the provider can serve without cloud credentials — a managed
1607/// local runtime (`harn local`) or an auth-free endpoint such as Ollama.
1608fn provider_is_local(def: &ProviderDef) -> bool {
1609    def.local_runtime.is_some() || matches!(def.auth_env, AuthEnv::None)
1610}
1611
1612/// Emit a provider auto-selection notice at most once per process.
1613fn warn_auto_provider_once(message: &str) {
1614    if !AUTO_PROVIDER_WARNED.swap(true, Ordering::Relaxed) {
1615        crate::events::log_warn("llm_config", message);
1616    }
1617}
1618
1619/// Choose a provider when neither `HARN_DEFAULT_PROVIDER` nor
1620/// `config.default_provider` is set. Prefers a credentialed cloud provider,
1621/// then a locally-available one, and only then falls back to the documented
1622/// default. Detection is portable: it reads provider `auth_env` variables and
1623/// `local_runtime` metadata from the catalog — never hardcoded paths or ports.
1624fn auto_select_provider(config: &ProvidersConfig) -> String {
1625    // Well-known providers first for a stable, predictable choice; then any
1626    // other configured provider (BTreeMap iteration is sorted/deterministic).
1627    const PREFERRED: &[&str] = &[
1628        "anthropic",
1629        "openai",
1630        "google",
1631        "azure-openai",
1632        "groq",
1633        "mistral",
1634        "deepseek",
1635        "xai",
1636        "openrouter",
1637    ];
1638    for name in PREFERRED {
1639        if config
1640            .providers
1641            .get(*name)
1642            .is_some_and(provider_has_credentials)
1643        {
1644            if *name != FALLBACK_PROVIDER {
1645                warn_auto_provider_once(&format!(
1646                    "no default provider configured; using '{name}' (its API key is set). \
1647                     Set HARN_DEFAULT_PROVIDER or `default_provider` to silence this."
1648                ));
1649            }
1650            return (*name).to_string();
1651        }
1652    }
1653    for (name, def) in &config.providers {
1654        if provider_has_credentials(def) {
1655            warn_auto_provider_once(&format!(
1656                "no default provider configured; using '{name}' (its API key is set). \
1657                 Set HARN_DEFAULT_PROVIDER or `default_provider` to silence this."
1658            ));
1659            return name.clone();
1660        }
1661    }
1662    // No cloud credentials: prefer something that runs locally with no key.
1663    for (name, def) in &config.providers {
1664        if provider_is_local(def) {
1665            warn_auto_provider_once(&format!(
1666                "no provider API keys found; using local provider '{name}'. \
1667                 Set an API key + HARN_DEFAULT_PROVIDER to use a cloud provider."
1668            ));
1669            return name.clone();
1670        }
1671    }
1672    // Nothing detected. Fall back to the documented default and say how to fix.
1673    warn_auto_provider_once(&format!(
1674        "no LLM provider configured and no API keys detected; defaulting to \
1675         '{FALLBACK_PROVIDER}'. Set ANTHROPIC_API_KEY (or another provider's key plus \
1676         HARN_DEFAULT_PROVIDER), or run a local model with `harn local launch`."
1677    ));
1678    FALLBACK_PROVIDER.to_string()
1679}
1680
1681/// Get model tier ("small", "mid", "frontier").
1682pub fn model_tier(model_id: &str) -> String {
1683    let config = effective_config();
1684    model_tier_with_config(&config, model_id)
1685}
1686
1687pub(crate) fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
1688    // Per-model self-declared tier wins. This is the only path.
1689    if let Some(model) = config.models.get(model_id) {
1690        if let Some(tier) = model.tier.as_deref() {
1691            let trimmed = tier.trim();
1692            if !trimmed.is_empty() {
1693                return trimmed.to_string();
1694            }
1695        }
1696    }
1697    // Legacy pattern-rules: still consulted while we finish migrating the
1698    // long tail of models to per-row `tier = "..."`. Newly added rows
1699    // should set `tier` directly; the rule table is a fallback only.
1700    for rule in &config.tier_rules {
1701        if let Some(exact) = &rule.exact {
1702            if model_id == exact {
1703                return rule.tier.clone();
1704            }
1705        }
1706        if let Some(pattern) = &rule.pattern {
1707            if glob_match(pattern, model_id) {
1708                return rule.tier.clone();
1709            }
1710        }
1711        if let Some(substr) = &rule.contains {
1712            if model_id.contains(substr.as_str()) {
1713                return rule.tier.clone();
1714            }
1715        }
1716    }
1717    config.tier_defaults.default.clone()
1718}
1719
1720/// Return the normalized model-family token used for cross-family review.
1721pub fn model_family(provider: &str, model_id: &str) -> String {
1722    let config = effective_config();
1723    model_family_with_config(&config, provider, model_id)
1724}
1725
1726pub(crate) fn model_family_with_config(
1727    config: &ProvidersConfig,
1728    provider: &str,
1729    model_id: &str,
1730) -> String {
1731    catalog_family_token(config, model_id)
1732        .unwrap_or_else(|| derive_model_family(provider, model_id))
1733}
1734
1735fn model_family_with_inference_source(
1736    config: &ProvidersConfig,
1737    provider: &str,
1738    model_id: &str,
1739    source: crate::llm::provider::ProviderInferenceSource,
1740) -> String {
1741    if let Some(family) = catalog_family_token(config, model_id) {
1742        return family;
1743    }
1744    let id_family = derive_model_family("", model_id);
1745    if id_family != "unknown" {
1746        return id_family;
1747    }
1748    if matches!(
1749        source,
1750        crate::llm::provider::ProviderInferenceSource::DefaultFallback
1751    ) {
1752        return "unknown".to_string();
1753    }
1754    derive_model_family(provider, model_id)
1755}
1756
1757/// Return the narrower lineage token used for model-aware option packs.
1758pub fn model_lineage(provider: &str, model_id: &str) -> String {
1759    let config = effective_config();
1760    model_lineage_with_config(&config, provider, model_id)
1761}
1762
1763pub(crate) fn model_lineage_with_config(
1764    config: &ProvidersConfig,
1765    provider: &str,
1766    model_id: &str,
1767) -> String {
1768    catalog_lineage_token(config, model_id)
1769        .unwrap_or_else(|| derive_model_lineage(provider, model_id))
1770}
1771
1772fn model_lineage_with_inference_source(
1773    config: &ProvidersConfig,
1774    provider: &str,
1775    model_id: &str,
1776    source: crate::llm::provider::ProviderInferenceSource,
1777) -> String {
1778    if let Some(lineage) = catalog_lineage_token(config, model_id) {
1779        return lineage;
1780    }
1781    let id_lineage = derive_model_lineage("", model_id);
1782    if id_lineage != "unknown" {
1783        return id_lineage;
1784    }
1785    if matches!(
1786        source,
1787        crate::llm::provider::ProviderInferenceSource::DefaultFallback
1788    ) {
1789        return "unknown".to_string();
1790    }
1791    derive_model_lineage(provider, model_id)
1792}
1793
1794fn catalog_family_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1795    config
1796        .models
1797        .get(model_id)
1798        .and_then(|model| normalized_catalog_token(model.family.as_deref()))
1799}
1800
1801fn catalog_lineage_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1802    config
1803        .models
1804        .get(model_id)
1805        .and_then(|model| normalized_catalog_token(model.lineage.as_deref()))
1806}
1807
1808fn normalized_catalog_token(value: Option<&str>) -> Option<String> {
1809    value
1810        .map(str::trim)
1811        .filter(|value| !value.is_empty())
1812        .map(|value| value.to_ascii_lowercase().replace('_', "-"))
1813}
1814
1815fn derive_model_family(provider: &str, model_id: &str) -> String {
1816    let id = model_id.to_ascii_lowercase();
1817    if contains_any(&id, &["claude", "anthropic.claude"]) {
1818        return "anthropic-claude".to_string();
1819    }
1820    if contains_any(&id, &["gemini", "google/gemini"]) {
1821        return "google-gemini".to_string();
1822    }
1823    if contains_any(&id, &["deepseek"]) {
1824        return "deepseek".to_string();
1825    }
1826    if contains_any(&id, &["qwen"]) {
1827        return "qwen".to_string();
1828    }
1829    if contains_any(&id, &["kimi", "moonshot"]) {
1830        return "kimi".to_string();
1831    }
1832    if contains_any(&id, &["glm", "z-ai/glm", "zhipu"]) {
1833        return "glm".to_string();
1834    }
1835    if contains_any(&id, &["mistral", "mixtral", "devstral"]) {
1836        return "mistral".to_string();
1837    }
1838    if contains_any(&id, &["minimax"]) {
1839        return "minimax".to_string();
1840    }
1841    if contains_any(&id, &["llama"]) {
1842        return "llama".to_string();
1843    }
1844    if contains_any(&id, &["gemma"]) {
1845        return "gemma".to_string();
1846    }
1847    if is_openai_reasoning_model(&id) {
1848        return "openai-reasoning".to_string();
1849    }
1850    if contains_any(&id, &["gpt-oss", "openai/gpt", "gpt-"]) {
1851        return "openai-gpt".to_string();
1852    }
1853    match provider {
1854        "anthropic" | "bedrock" | "vertex-anthropic" => "anthropic-claude".to_string(),
1855        "openai" | "azure" | "azure_openai" => "openai-gpt".to_string(),
1856        "gemini" | "vertex" | "google" => "google-gemini".to_string(),
1857        "deepseek" => "deepseek".to_string(),
1858        "zai" => "glm".to_string(),
1859        "minimax" => "minimax".to_string(),
1860        other if !other.is_empty() => normalize_identifier_token(other),
1861        _ => "unknown".to_string(),
1862    }
1863}
1864
1865fn derive_model_lineage(provider: &str, model_id: &str) -> String {
1866    let id = model_id.to_ascii_lowercase();
1867    if contains_any(&id, &["haiku"]) {
1868        return "claude-haiku".to_string();
1869    }
1870    if contains_any(&id, &["opus-4-7", "opus-4-8", "opus-mythos"]) {
1871        return "claude-opus-adaptive".to_string();
1872    }
1873    if contains_any(&id, &["claude"]) {
1874        return "claude-sonnet-opus".to_string();
1875    }
1876    if contains_any(&id, &["gpt-5"]) {
1877        return "openai-gpt5".to_string();
1878    }
1879    if is_openai_reasoning_model(&id) {
1880        return "openai-reasoning".to_string();
1881    }
1882    if contains_any(&id, &["gpt-", "gpt_"]) {
1883        return "openai-legacy".to_string();
1884    }
1885    if contains_any(&id, &["gemini"]) {
1886        if contains_any(&id, &["flash"]) {
1887            return "gemini-flash".to_string();
1888        }
1889        return "gemini-pro".to_string();
1890    }
1891    if contains_any(&id, &["qwen3", "qwen/qwen3"]) {
1892        return "qwen3".to_string();
1893    }
1894    if contains_any(&id, &["gemma4", "gemma-4"]) {
1895        return "gemma4".to_string();
1896    }
1897    let family = derive_model_family(provider, model_id);
1898    if family == "unknown" {
1899        "unknown".to_string()
1900    } else {
1901        family
1902    }
1903}
1904
1905fn contains_any(haystack: &str, needles: &[&str]) -> bool {
1906    needles.iter().any(|needle| haystack.contains(needle))
1907}
1908
1909fn starts_with_any(haystack: &str, prefixes: &[&str]) -> bool {
1910    prefixes.iter().any(|prefix| haystack.starts_with(prefix))
1911}
1912
1913fn is_openai_reasoning_model(id: &str) -> bool {
1914    starts_with_any(id, &["o1", "o3", "o4"])
1915        || contains_any(
1916            id,
1917            &[
1918                "/o1", "/o3", "/o4", ":o1", ":o3", ":o4", ".o1", ".o3", ".o4",
1919            ],
1920        )
1921}
1922
1923fn normalize_identifier_token(value: &str) -> String {
1924    value
1925        .trim()
1926        .to_ascii_lowercase()
1927        .chars()
1928        .map(|ch| {
1929            if ch.is_ascii_alphanumeric() || ch == '-' {
1930                ch
1931            } else {
1932                '-'
1933            }
1934        })
1935        .collect::<String>()
1936        .split('-')
1937        .filter(|part| !part.is_empty())
1938        .collect::<Vec<_>>()
1939        .join("-")
1940}
1941
1942/// Get provider config for resolving base_url, auth, etc.
1943pub fn provider_config(name: &str) -> Option<ProviderDef> {
1944    effective_config().providers.get(name).cloned()
1945}
1946
1947pub fn provider_protocol(name: &str) -> Option<String> {
1948    provider_config(name).and_then(|def| def.protocol)
1949}
1950
1951pub fn provider_uses_acp(name: &str) -> bool {
1952    provider_protocol(name)
1953        .as_deref()
1954        .is_some_and(|protocol| protocol.eq_ignore_ascii_case("acp"))
1955}
1956
1957/// Get model-specific default parameters (temperature, etc.).
1958/// Matches glob patterns in model_defaults keys.
1959pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
1960    let config = effective_config();
1961    let mut params = BTreeMap::new();
1962    for (pattern, defaults) in &config.model_defaults {
1963        if glob_match(pattern, model_id) {
1964            for (k, v) in defaults {
1965                params.insert(k.clone(), v.clone());
1966            }
1967        }
1968    }
1969    params
1970}
1971
1972/// Get per-role LLM defaults, e.g. `[model_roles.merge]`.
1973///
1974/// Role defaults are intentionally shaped like ordinary `llm_call` options:
1975/// callers can pin `provider`/`model`, install `route_policy` or `prefer`,
1976/// and tune budget/latency knobs without creating a parallel routing stack.
1977/// Environment variables provide a lightweight operational override for
1978/// merge/fast-apply workers:
1979///
1980/// - `HARN_LLM_MERGE_PROVIDER`, `HARN_LLM_MERGE_MODEL`,
1981///   `HARN_LLM_MERGE_ROUTE_POLICY`
1982/// - `HARN_LLM_FAST_APPLY_PROVIDER`, `HARN_LLM_FAST_APPLY_MODEL`,
1983///   `HARN_LLM_FAST_APPLY_ROUTE_POLICY`
1984/// - `HARN_LLM_ROLE_<ROLE>_PROVIDER`, `_MODEL`, `_ROUTE_POLICY`
1985pub fn model_role_defaults(role: &str) -> BTreeMap<String, toml::Value> {
1986    let normalized = normalize_model_role_name(role);
1987    if normalized.is_empty() {
1988        return BTreeMap::new();
1989    }
1990    let config = effective_config();
1991    let mut params = BTreeMap::new();
1992    for key in role_lookup_keys(&normalized) {
1993        extend_model_role_defaults(&config, &key, &mut params);
1994    }
1995    apply_model_role_env_overrides(&normalized, &mut params);
1996    params
1997}
1998
1999fn extend_model_role_defaults(
2000    config: &ProvidersConfig,
2001    role: &str,
2002    params: &mut BTreeMap<String, toml::Value>,
2003) {
2004    for (configured_role, defaults) in &config.model_roles {
2005        if normalize_model_role_name(configured_role) == role {
2006            params.extend(defaults.clone());
2007        }
2008    }
2009    if let Some(defaults) = config.model_roles.get(role) {
2010        params.extend(defaults.clone());
2011    }
2012}
2013
2014fn normalize_model_role_name(role: &str) -> String {
2015    role.trim().to_ascii_lowercase().replace('-', "_")
2016}
2017
2018fn role_lookup_keys(role: &str) -> Vec<String> {
2019    if role == "merge" {
2020        vec!["fast_apply".to_string(), "merge".to_string()]
2021    } else if role == "fast_apply" {
2022        vec!["merge".to_string(), "fast_apply".to_string()]
2023    } else {
2024        vec![role.to_string()]
2025    }
2026}
2027
2028fn role_env_token(role: &str) -> String {
2029    role.chars()
2030        .map(|ch| {
2031            if ch.is_ascii_alphanumeric() {
2032                ch.to_ascii_uppercase()
2033            } else {
2034                '_'
2035            }
2036        })
2037        .collect::<String>()
2038        .split('_')
2039        .filter(|part| !part.is_empty())
2040        .collect::<Vec<_>>()
2041        .join("_")
2042}
2043
2044fn apply_model_role_env_overrides(role: &str, params: &mut BTreeMap<String, toml::Value>) {
2045    for alias in role_env_aliases(role) {
2046        apply_model_role_env_var(&format!("HARN_LLM_{alias}_PROVIDER"), "provider", params);
2047        apply_model_role_env_var(&format!("HARN_LLM_{alias}_MODEL"), "model", params);
2048        apply_model_role_env_var(
2049            &format!("HARN_LLM_{alias}_ROUTE_POLICY"),
2050            "route_policy",
2051            params,
2052        );
2053        apply_model_role_env_var(
2054            &format!("HARN_LLM_ROLE_{alias}_PROVIDER"),
2055            "provider",
2056            params,
2057        );
2058        apply_model_role_env_var(&format!("HARN_LLM_ROLE_{alias}_MODEL"), "model", params);
2059        apply_model_role_env_var(
2060            &format!("HARN_LLM_ROLE_{alias}_ROUTE_POLICY"),
2061            "route_policy",
2062            params,
2063        );
2064    }
2065}
2066
2067fn role_env_aliases(role: &str) -> Vec<String> {
2068    let token = role_env_token(role);
2069    if token.is_empty() {
2070        return Vec::new();
2071    }
2072    if token == "MERGE" {
2073        vec!["FAST_APPLY".to_string(), "MERGE".to_string()]
2074    } else if token == "FAST_APPLY" {
2075        vec!["MERGE".to_string(), "FAST_APPLY".to_string()]
2076    } else {
2077        vec![token]
2078    }
2079}
2080
2081fn apply_model_role_env_var(
2082    env_name: &str,
2083    option_name: &str,
2084    params: &mut BTreeMap<String, toml::Value>,
2085) {
2086    let Ok(value) = std::env::var(env_name) else {
2087        return;
2088    };
2089    let trimmed = value.trim();
2090    if trimmed.is_empty() {
2091        return;
2092    }
2093    params.insert(
2094        option_name.to_string(),
2095        toml::Value::String(trimmed.to_string()),
2096    );
2097}
2098
2099/// Get list of configured provider names.
2100pub fn provider_names() -> Vec<String> {
2101    effective_config().providers.keys().cloned().collect()
2102}
2103
2104/// Return every configured alias name, sorted deterministically.
2105pub fn known_model_names() -> Vec<String> {
2106    effective_config().aliases.keys().cloned().collect()
2107}
2108
2109pub fn alias_entries() -> Vec<(String, AliasDef)> {
2110    effective_config().aliases.into_iter().collect()
2111}
2112
2113pub fn alias_tool_calling_entry(alias: &str) -> Option<AliasToolCallingDef> {
2114    effective_config().alias_tool_calling.get(alias).cloned()
2115}
2116
2117/// Return every configured model-catalog entry, sorted by provider then id.
2118pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
2119    let config = effective_config();
2120    model_catalog_entries_with_config(&config)
2121}
2122
2123pub(crate) fn model_catalog_entries_with_config(
2124    config: &ProvidersConfig,
2125) -> Vec<(String, ModelDef)> {
2126    sorted_model_entries_with_config(config)
2127        .into_iter()
2128        .map(|(id, model)| {
2129            let provider = model.provider.clone();
2130            (
2131                id.clone(),
2132                with_effective_capability_tags(id, provider, model),
2133            )
2134        })
2135        .collect()
2136}
2137
2138pub(crate) fn sorted_model_entries_with_config(
2139    config: &ProvidersConfig,
2140) -> Vec<(String, ModelDef)> {
2141    let mut entries: Vec<_> = config
2142        .models
2143        .iter()
2144        .map(|(id, model)| (id.clone(), model.clone()))
2145        .collect();
2146    entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
2147        model_a
2148            .provider
2149            .cmp(&model_b.provider)
2150            .then_with(|| id_a.cmp(id_b))
2151    });
2152    entries
2153}
2154
2155pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
2156    effective_config()
2157        .models
2158        .get(model_id)
2159        .cloned()
2160        .map(|model| {
2161            let provider = model.provider.clone();
2162            with_effective_capability_tags(model_id.to_string(), provider, model)
2163        })
2164}
2165
2166pub fn model_rate_limits(model_id: &str) -> Option<RateLimitsDef> {
2167    model_catalog_entry(model_id).and_then(|model| model.rate_limits)
2168}
2169
2170pub fn wire_model_id(model_id: &str) -> String {
2171    model_catalog_entry(model_id)
2172        .and_then(|model| model.wire_model)
2173        .unwrap_or_else(|| model_id.to_string())
2174}
2175
2176pub fn provider_rate_limits(provider: &str) -> Option<RateLimitsDef> {
2177    provider_config(provider).and_then(|provider| {
2178        provider
2179            .rate_limits
2180            .unwrap_or_default()
2181            .with_rpm_fallback(provider.rpm)
2182    })
2183}
2184
2185pub fn model_equivalence_group(model_id: &str) -> Option<String> {
2186    model_catalog_entry(model_id).and_then(|model| {
2187        model
2188            .equivalence_group
2189            .or(model.logical_model)
2190            .filter(|group| !group.trim().is_empty())
2191    })
2192}
2193
2194/// Return same-logical-model routes that can be considered for explicit
2195/// failover or cross-provider experiments. Equivalence is a catalog assertion
2196/// about compatible model weights/family, not wire-level identity.
2197pub fn equivalent_model_catalog_entries(selector: &str) -> Vec<(String, ModelDef)> {
2198    let resolved = resolve_model_info(selector);
2199    let Some(group) = model_equivalence_group(&resolved.id) else {
2200        return Vec::new();
2201    };
2202    let config = effective_config();
2203    let Some(source) = config.models.get(&resolved.id) else {
2204        return Vec::new();
2205    };
2206    let source_caps = crate::llm::capabilities::lookup(&source.provider, &resolved.id);
2207    let source_context = source
2208        .runtime_context_window
2209        .unwrap_or(source.context_window);
2210
2211    sorted_model_entries_with_config(&config)
2212        .into_iter()
2213        .filter(|(id, model)| !(id == &resolved.id && model.provider == resolved.provider))
2214        .filter(|(_, model)| !model.deprecated)
2215        .filter(|(_, model)| model.availability != ModelAvailability::Dedicated)
2216        .filter(|(_, model)| {
2217            model.equivalence_group.as_deref() == Some(group.as_str())
2218                || model.logical_model.as_deref() == Some(group.as_str())
2219        })
2220        .filter(|(id, model)| {
2221            let caps = crate::llm::capabilities::lookup(&model.provider, id);
2222            let candidate_context = model.runtime_context_window.unwrap_or(model.context_window);
2223            candidate_context >= source_context
2224                && (!source_caps.native_tools || caps.native_tools)
2225                && (!source_caps.text_tool_wire_format_supported
2226                    || caps.text_tool_wire_format_supported)
2227                && (!source_caps.reasoning_effort_supported || caps.reasoning_effort_supported)
2228                && source_caps.structured_output_mode == caps.structured_output_mode
2229        })
2230        .map(|(id, model)| {
2231            let provider = model.provider.clone();
2232            (
2233                id.clone(),
2234                with_effective_capability_tags(id, provider, model),
2235            )
2236        })
2237        .collect()
2238}
2239
2240pub fn qc_default_model(provider: &str) -> Option<String> {
2241    std::env::var("BURIN_QC_MODEL")
2242        .ok()
2243        .filter(|value| !value.trim().is_empty())
2244        .or_else(|| {
2245            effective_config()
2246                .qc_defaults
2247                .get(&provider.to_lowercase())
2248                .cloned()
2249        })
2250}
2251
2252pub fn default_model_for_provider(provider: &str) -> String {
2253    if provider_uses_acp(provider) {
2254        return "default".to_string();
2255    }
2256    match provider {
2257        "local" => std::env::var("LOCAL_LLM_MODEL")
2258            .or_else(|_| std::env::var("HARN_LLM_MODEL"))
2259            .unwrap_or_else(|_| "gemma-4-26b-a4b-it".to_string()),
2260        "mlx" => std::env::var("MLX_MODEL_ID")
2261            .unwrap_or_else(|_| "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit".to_string()),
2262        "openai" => "gpt-4o-mini".to_string(),
2263        "ollama" => "llama3.2".to_string(),
2264        "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
2265        _ => "claude-sonnet-4-6".to_string(),
2266    }
2267}
2268
2269pub fn qc_defaults() -> BTreeMap<String, String> {
2270    effective_config().qc_defaults
2271}
2272
2273pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
2274    effective_config()
2275        .models
2276        .get(model_id)
2277        .and_then(|model| model.pricing.clone())
2278}
2279
2280/// Premium per-MTok pricing for a model's accelerated-serving ("fast mode")
2281/// tier, when the catalog declares one. Returns `None` for models with no
2282/// fast tier or a tier that omits explicit pricing — callers fall back to
2283/// standard pricing in that case.
2284pub fn model_fast_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
2285    effective_config()
2286        .models
2287        .get(model_id)
2288        .and_then(|model| model.fast_mode.as_ref())
2289        .and_then(|fast_mode| fast_mode.pricing.clone())
2290}
2291
2292pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
2293    model_pricing_per_mtok(model_id)
2294        .map(|pricing| {
2295            (
2296                pricing.input_per_mtok / 1000.0,
2297                pricing.output_per_mtok / 1000.0,
2298            )
2299        })
2300        .or_else(|| {
2301            let (input, output, _) = provider_economics(provider);
2302            match (input, output) {
2303                (Some(input), Some(output)) => Some((input, output)),
2304                _ => None,
2305            }
2306        })
2307}
2308
2309pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
2310    match auth_env {
2311        AuthEnv::None => Vec::new(),
2312        AuthEnv::Single(name) => vec![name.clone()],
2313        AuthEnv::Multiple(names) => names.clone(),
2314    }
2315}
2316
2317pub fn provider_key_available(provider: &str) -> bool {
2318    let Some(pdef) = provider_config(provider) else {
2319        return provider == "ollama";
2320    };
2321    if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
2322        return true;
2323    }
2324    auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
2325        std::env::var(env_name)
2326            .ok()
2327            .is_some_and(|value| !value.trim().is_empty())
2328    })
2329}
2330
2331pub fn available_provider_names() -> Vec<String> {
2332    provider_names()
2333        .into_iter()
2334        .filter(|provider| provider_key_available(provider))
2335        .collect()
2336}
2337
2338/// Check if a provider advertises a legacy provider-level feature.
2339pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
2340    provider_config(provider)
2341        .map(|p| p.features.iter().any(|f| f == feature))
2342        .unwrap_or(false)
2343}
2344
2345/// Provider-level catalog pricing/latency. Model-specific catalog pricing
2346/// wins when available; this is the adapter-level fallback used by routing
2347/// and portal summaries when a model has no explicit catalog entry.
2348pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
2349    provider_config(provider)
2350        .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
2351        .unwrap_or((None, None, None))
2352}
2353
2354/// The tool-call channel a `tool_format` string addresses.
2355///
2356/// `native` is the provider JSON tool-calling channel; `text` (the canonical
2357/// tagged/heredoc grammar) and `json` (fenced-JSON) are both TEXT-channel
2358/// formats — they ride in the assistant's visible content and parse with a
2359/// text parser. This is the single source of truth for "is this format a
2360/// text-channel format?" so the parity gates, native-tools resolution, and
2361/// tool-result message role all agree.
2362#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2363pub enum ToolFormatChannel {
2364    /// Provider native JSON tool calling.
2365    Native,
2366    /// A text-channel grammar carried in assistant content (`text` or `json`).
2367    Text,
2368}
2369
2370/// Classify a `tool_format` string into its channel, or `None` for an unknown
2371/// value (a typo, or a not-yet-wired format). Callers use this to reject
2372/// unknown formats loudly instead of silently defaulting.
2373///
2374/// EXHAUSTIVE-MATCH GUARD: this `match` is the canonical place tool_format is
2375/// switched. Adding a new format requires a branch here, so a half-wired
2376/// format fails to compile rather than silently reading as text.
2377pub fn tool_format_channel(format: &str) -> Option<ToolFormatChannel> {
2378    match format {
2379        "native" => Some(ToolFormatChannel::Native),
2380        "text" | "json" => Some(ToolFormatChannel::Text),
2381        _ => None,
2382    }
2383}
2384
2385/// True when `format` is a tool_format Harn understands (`native`, `text`, or
2386/// `json`). Used to gate the capability-matrix `preferred_tool_format` so a
2387/// pinned format is honored, while an unknown value falls through to the
2388/// native/text heuristic.
2389pub fn is_known_tool_format(format: &str) -> bool {
2390    tool_format_channel(format).is_some()
2391}
2392
2393/// Resolve the default tool format for a model+provider combination.
2394/// Priority: alias `tool_format` (matched by model ID) > provider/model
2395/// capability matrix > legacy provider feature > "json" (the global
2396/// text-channel default; heredoc "text" is opt-in via a pin or explicit
2397/// request).
2398pub fn default_tool_format(model: &str, provider: &str) -> String {
2399    let config = effective_config();
2400    default_tool_format_with_config(&config, model, provider)
2401}
2402
2403fn default_tool_format_with_config(
2404    config: &ProvidersConfig,
2405    model: &str,
2406    provider: &str,
2407) -> String {
2408    // Aliases match by model ID + provider, or by alias name.
2409    for (name, alias) in &config.aliases {
2410        let matches = (alias.id == model && alias.provider == provider) || name == model;
2411        if matches {
2412            if let Some(ref fmt) = alias.tool_format {
2413                return fmt.clone();
2414            }
2415        }
2416    }
2417    let capabilities = crate::llm::capabilities::lookup(provider, model);
2418    if let Some(format) = capabilities.preferred_tool_format.as_deref() {
2419        // A capability row may pin any known tool_format, including `text`
2420        // (heredoc) — the reverse safety valve a regressing model uses to pin
2421        // OFF the global json default. `json` is also honored when a row sets
2422        // it. The exhaustive match below is the EXHAUSTIVE-MATCH GUARD: a new
2423        // tool_format that isn't classified here fails loudly rather than
2424        // silently falling through to the native/json heuristic.
2425        if is_known_tool_format(format) {
2426            return format.to_string();
2427        }
2428    }
2429    let capability_matrix_native = capabilities.native_tools;
2430    let legacy_provider_native = config
2431        .providers
2432        .get(provider)
2433        .map(|p| p.features.iter().any(|f| f == "native_tools"))
2434        .unwrap_or(false);
2435    if capability_matrix_native || legacy_provider_native {
2436        "native".to_string()
2437    } else {
2438        // GLOBAL DEFAULT: a text-channel model with no pinned format resolves
2439        // to fenced-json (`json`), not heredoc (`text`). The win is STRUCTURAL
2440        // — a JSON string can't carry a raw newline, so a `<<EOF` content
2441        // delimiter never collides with the call wrapper (heredoc's known
2442        // production defect: models leak `<<EOF` into file content → the
2443        // `line 0: <<` thrash). Fenced-json swept a clean 1.0/1.0/1.0
2444        // (compliance/parse-determinism/expressiveness) across every model
2445        // measured, and the structural guarantee generalizes to unmeasured
2446        // models. Heredoc (`text`) stays selectable explicitly and via a
2447        // per-model `preferred_tool_format = "text"` pin (the reverse valve).
2448        "json".to_string()
2449    }
2450}
2451
2452fn with_effective_capability_tags(
2453    model_id: String,
2454    provider: String,
2455    mut model: ModelDef,
2456) -> ModelDef {
2457    model.capabilities = effective_model_capability_tags(&provider, &model_id);
2458    model
2459}
2460
2461/// Legacy display tags derived from the canonical provider/model capability
2462/// matrix. The matrix is the source of truth; `models.*.capabilities` in
2463/// providers.toml is accepted only for backwards-compatible parsing.
2464pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
2465    let caps = crate::llm::capabilities::lookup(provider, model_id);
2466    capability_tags_from_capabilities(&caps)
2467}
2468
2469pub(crate) fn capability_tags_from_capabilities(
2470    caps: &crate::llm::capabilities::Capabilities,
2471) -> Vec<String> {
2472    let mut tags = Vec::new();
2473    // Today all Harn chat providers expose streaming. Keep this as a
2474    // transport baseline rather than a duplicated per-model declaration.
2475    tags.push("streaming".to_string());
2476    if caps.native_tools || caps.text_tool_wire_format_supported {
2477        tags.push("tools".to_string());
2478    }
2479    if !caps.tool_search.is_empty() {
2480        tags.push("tool_search".to_string());
2481    }
2482    if caps.vision || caps.vision_supported {
2483        tags.push("vision".to_string());
2484    }
2485    if caps.audio {
2486        tags.push("audio".to_string());
2487    }
2488    if caps.pdf {
2489        tags.push("pdf".to_string());
2490    }
2491    if caps.video {
2492        tags.push("video".to_string());
2493    }
2494    if caps.files_api_supported {
2495        tags.push("files".to_string());
2496    }
2497    if caps.prompt_caching {
2498        tags.push("prompt_caching".to_string());
2499    }
2500    if !caps.thinking_modes.is_empty() {
2501        tags.push("thinking".to_string());
2502    }
2503    if caps.interleaved_thinking_supported
2504        || caps
2505            .thinking_modes
2506            .iter()
2507            .any(|mode| mode == "adaptive" || mode == "effort")
2508    {
2509        tags.push("extended_thinking".to_string());
2510    }
2511    if caps.structured_output.is_some() || caps.json_schema.is_some() {
2512        tags.push("structured_output".to_string());
2513    }
2514    tags
2515}
2516
2517/// Resolve a tier or alias into a concrete model/provider pair.
2518pub fn resolve_tier_model(
2519    target: &str,
2520    preferred_provider: Option<&str>,
2521) -> Option<(String, String)> {
2522    let config = effective_config();
2523
2524    let candidate_aliases = if let Some(provider) = preferred_provider {
2525        vec![
2526            format!("{provider}/{target}"),
2527            format!("{provider}:{target}"),
2528            format!("tier/{target}"),
2529            target.to_string(),
2530        ]
2531    } else {
2532        vec![format!("tier/{target}"), target.to_string()]
2533    };
2534
2535    for alias_name in candidate_aliases {
2536        if let Some(alias) = config.aliases.get(&alias_name) {
2537            return Some((alias.id.clone(), alias.provider.clone()));
2538        }
2539    }
2540
2541    None
2542}
2543
2544/// Return all configured alias-backed model/provider pairs whose resolved
2545/// model falls into the requested capability tier. The result is de-duplicated
2546/// and sorted deterministically by provider then model id.
2547pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
2548    let config = effective_config();
2549    let mut seen = std::collections::BTreeSet::new();
2550    let mut candidates = Vec::new();
2551
2552    for alias in config.aliases.values() {
2553        let pair = (alias.id.clone(), alias.provider.clone());
2554        if seen.contains(&pair) {
2555            continue;
2556        }
2557        if model_tier(&alias.id) == target {
2558            seen.insert(pair.clone());
2559            candidates.push(pair);
2560        }
2561    }
2562
2563    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2564        provider_a
2565            .cmp(provider_b)
2566            .then_with(|| model_a.cmp(model_b))
2567    });
2568    candidates
2569}
2570
2571/// Return all configured alias-backed model/provider pairs. Used by routing
2572/// policies that need to compare alternatives across tiers.
2573pub fn all_model_candidates() -> Vec<(String, String)> {
2574    let config = effective_config();
2575    let mut seen = std::collections::BTreeSet::new();
2576    let mut candidates = Vec::new();
2577
2578    for alias in config.aliases.values() {
2579        let pair = (alias.id.clone(), alias.provider.clone());
2580        if seen.insert(pair.clone()) {
2581            candidates.push(pair);
2582        }
2583    }
2584
2585    candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2586        provider_a
2587            .cmp(provider_b)
2588            .then_with(|| model_a.cmp(model_b))
2589    });
2590    candidates
2591}
2592
2593pub fn pick_complementary_reviewer(
2594    options: ComplementaryReviewerOptions,
2595) -> ComplementaryReviewerSelection {
2596    let config = effective_config();
2597    let mut author = resolve_model_info(&options.author_model);
2598    if let Some(provider) = options
2599        .author_provider
2600        .as_deref()
2601        .map(str::trim)
2602        .filter(|provider| !provider.is_empty())
2603    {
2604        author.provider = provider.to_string();
2605        author.family = model_family_with_config(&config, &author.provider, &author.id);
2606        author.lineage = model_lineage_with_config(&config, &author.provider, &author.id);
2607        author.tool_format = default_tool_format_with_config(&config, &author.id, &author.provider);
2608    }
2609    let author_entry = config.models.get(&author.id);
2610    let author_identity = complementary_identity(
2611        author.id.clone(),
2612        author.provider.clone(),
2613        author.family.clone(),
2614        author.lineage.clone(),
2615        author.tier.clone(),
2616        author_entry.and_then(|model| model.pricing.clone()),
2617    );
2618
2619    let fallback =
2620        |code: ReviewerFallbackCode, fallback_reason: String| ComplementaryReviewerSelection {
2621            intent: options.intent.as_str().to_string(),
2622            reviewer: author_identity.clone(),
2623            estimated_incremental_cost: cost_estimate(
2624                author_identity.pricing.as_ref(),
2625                author_identity.pricing.as_ref(),
2626            ),
2627            author: author_identity.clone(),
2628            fallback: true,
2629            reason: format!(
2630                "using author model {} because {fallback_reason}",
2631                author_identity.id
2632            ),
2633            fallback_reason: Some(fallback_reason),
2634            fallback_code: Some(code.as_code().to_string()),
2635        };
2636
2637    if author_identity.family == "unknown" {
2638        return fallback(
2639            ReviewerFallbackCode::UnknownAuthorFamily,
2640            "author model family is unknown".to_string(),
2641        );
2642    }
2643
2644    let preferred_families = author_entry
2645        .map(|model| model.complementary_with.clone())
2646        .unwrap_or_default();
2647    let author_refs = reviewer_match_refs(&author_identity);
2648    let mut rejected_by_price = 0usize;
2649    let mut diff_family_seen = 0usize;
2650    let mut candidates = Vec::new();
2651
2652    for (id, model) in config.models.iter() {
2653        if id == &author_identity.id && model.provider == author_identity.provider {
2654            continue;
2655        }
2656        if model.deprecated || model.availability != ModelAvailability::Serverless {
2657            continue;
2658        }
2659        let family = model_family_with_config(&config, &model.provider, id);
2660        if family == "unknown" || family == author_identity.family {
2661            continue;
2662        }
2663        diff_family_seen += 1;
2664        let lineage = model_lineage_with_config(&config, &model.provider, id);
2665        let candidate_identity = complementary_identity(
2666            id.clone(),
2667            model.provider.clone(),
2668            family,
2669            lineage,
2670            model_tier_with_config(&config, id),
2671            model.pricing.clone(),
2672        );
2673        if model
2674            .avoid_as_reviewer_for
2675            .iter()
2676            .any(|selector| refs_contain_selector(&author_refs, selector))
2677        {
2678            continue;
2679        }
2680        if exceeds_price_cap(
2681            author_identity.pricing.as_ref(),
2682            candidate_identity.pricing.as_ref(),
2683            options.max_price_multiplier,
2684        ) {
2685            rejected_by_price += 1;
2686            continue;
2687        }
2688        let score = reviewer_score(
2689            &options,
2690            &author_identity,
2691            &candidate_identity,
2692            model,
2693            &preferred_families,
2694        );
2695        candidates.push(ReviewerCandidate {
2696            identity: candidate_identity,
2697            score,
2698        });
2699    }
2700
2701    candidates.sort_by(|left, right| {
2702        right
2703            .score
2704            .partial_cmp(&left.score)
2705            .unwrap_or(std::cmp::Ordering::Equal)
2706            .then_with(|| left.identity.provider.cmp(&right.identity.provider))
2707            .then_with(|| left.identity.id.cmp(&right.identity.id))
2708    });
2709
2710    let Some(best) = candidates.into_iter().next() else {
2711        if rejected_by_price > 0 {
2712            let cap = options.max_price_multiplier.unwrap_or_default();
2713            return fallback(
2714                ReviewerFallbackCode::NoDiffFamilyWithinPrice,
2715                format!("no different-family reviewer satisfied max_price_multiplier {cap}"),
2716            );
2717        }
2718        if diff_family_seen == 0 {
2719            return fallback(
2720                ReviewerFallbackCode::NoDiffFamilyServerless,
2721                "no active serverless different-family reviewer is cataloged".to_string(),
2722            );
2723        }
2724        return fallback(
2725            ReviewerFallbackCode::AllDiffFamilyExcluded,
2726            "all different-family reviewer candidates were excluded".to_string(),
2727        );
2728    };
2729
2730    let estimate = cost_estimate(
2731        best.identity.pricing.as_ref(),
2732        author_identity.pricing.as_ref(),
2733    );
2734    ComplementaryReviewerSelection {
2735        intent: options.intent.as_str().to_string(),
2736        reason: reviewer_reason(&author_identity, &best.identity, estimate.as_ref()),
2737        estimated_incremental_cost: estimate,
2738        author: author_identity,
2739        reviewer: best.identity,
2740        fallback: false,
2741        fallback_reason: None,
2742        fallback_code: None,
2743    }
2744}
2745
2746#[derive(Debug, Clone)]
2747struct ReviewerCandidate {
2748    identity: ComplementaryModelIdentity,
2749    score: f64,
2750}
2751
2752fn complementary_identity(
2753    id: String,
2754    provider: String,
2755    family: String,
2756    lineage: String,
2757    tier: String,
2758    pricing: Option<ModelPricing>,
2759) -> ComplementaryModelIdentity {
2760    ComplementaryModelIdentity {
2761        id,
2762        provider,
2763        family,
2764        lineage,
2765        tier,
2766        pricing,
2767    }
2768}
2769
2770fn reviewer_score(
2771    options: &ComplementaryReviewerOptions,
2772    author: &ComplementaryModelIdentity,
2773    candidate: &ComplementaryModelIdentity,
2774    model: &ModelDef,
2775    preferred_families: &[String],
2776) -> f64 {
2777    let candidate_refs = reviewer_match_refs(candidate);
2778    let mut score = 0.0;
2779    if let Some(rank) = preferred_families
2780        .iter()
2781        .position(|selector| refs_contain_selector(&candidate_refs, selector))
2782    {
2783        score += 1_000.0 - rank as f64;
2784    }
2785    if candidate.provider != author.provider {
2786        score += 100.0;
2787    }
2788    score += match tier_distance(&author.tier, &candidate.tier) {
2789        0 => 80.0,
2790        1 => 45.0,
2791        2 => 15.0,
2792        _ => 0.0,
2793    };
2794    for strength in intent_strengths(options.intent) {
2795        if model.strengths.iter().any(|tag| tag == strength) {
2796            score += 8.0;
2797        }
2798    }
2799    if model.capabilities.iter().any(|tag| tag == "tools") {
2800        score += 4.0;
2801    }
2802    if let (Some(author_total), Some(candidate_total)) = (
2803        pricing_total(author.pricing.as_ref()),
2804        pricing_total(candidate.pricing.as_ref()),
2805    ) {
2806        if author_total > 0.0 {
2807            let ratio = candidate_total / author_total;
2808            if ratio <= 1.0 {
2809                score += 20.0;
2810            }
2811            score -= (ratio - 1.0).abs().min(10.0) * 8.0;
2812        }
2813    }
2814    score
2815}
2816
2817fn intent_strengths(intent: ComplementaryReviewerIntent) -> &'static [&'static str] {
2818    match intent {
2819        ComplementaryReviewerIntent::Review => &["reasoning", "coding", "tool_use"],
2820        ComplementaryReviewerIntent::Critique => &["reasoning", "long_context", "tool_use"],
2821        ComplementaryReviewerIntent::PlanReview => {
2822            &["reasoning", "coding", "agentic", "long_context", "tool_use"]
2823        }
2824    }
2825}
2826
2827fn tier_distance(left: &str, right: &str) -> u8 {
2828    let left = tier_rank(left);
2829    let right = tier_rank(right);
2830    left.abs_diff(right)
2831}
2832
2833fn tier_rank(tier: &str) -> u8 {
2834    match tier {
2835        "small" => 0,
2836        "mid" => 1,
2837        "frontier" | "reasoning" => 2,
2838        _ => 1,
2839    }
2840}
2841
2842fn exceeds_price_cap(
2843    author_pricing: Option<&ModelPricing>,
2844    candidate_pricing: Option<&ModelPricing>,
2845    max_price_multiplier: Option<f64>,
2846) -> bool {
2847    let Some(max_price_multiplier) = max_price_multiplier else {
2848        return false;
2849    };
2850    let Some(author_total) = pricing_total(author_pricing) else {
2851        return false;
2852    };
2853    let Some(candidate_total) = pricing_total(candidate_pricing) else {
2854        return true;
2855    };
2856    author_total > 0.0 && candidate_total > author_total * max_price_multiplier
2857}
2858
2859fn cost_estimate(
2860    reviewer_pricing: Option<&ModelPricing>,
2861    author_pricing: Option<&ModelPricing>,
2862) -> Option<ComplementaryCostEstimate> {
2863    let reviewer_pricing = reviewer_pricing?;
2864    let total_per_mtok = reviewer_pricing.input_per_mtok + reviewer_pricing.output_per_mtok;
2865    let multiplier_vs_author = pricing_total(author_pricing)
2866        .filter(|author_total| *author_total > 0.0)
2867        .map(|author_total| total_per_mtok / author_total);
2868    Some(ComplementaryCostEstimate {
2869        input_per_mtok: reviewer_pricing.input_per_mtok,
2870        output_per_mtok: reviewer_pricing.output_per_mtok,
2871        total_per_mtok,
2872        multiplier_vs_author,
2873    })
2874}
2875
2876fn pricing_total(pricing: Option<&ModelPricing>) -> Option<f64> {
2877    pricing.map(|pricing| pricing.input_per_mtok + pricing.output_per_mtok)
2878}
2879
2880fn reviewer_reason(
2881    author: &ComplementaryModelIdentity,
2882    reviewer: &ComplementaryModelIdentity,
2883    estimate: Option<&ComplementaryCostEstimate>,
2884) -> String {
2885    let cost = estimate
2886        .and_then(|estimate| estimate.multiplier_vs_author)
2887        .map(|multiplier| format!("{multiplier:.2}x the author model price"))
2888        .unwrap_or_else(|| "price ratio unavailable".to_string());
2889    format!(
2890        "selected {} via {} because family {} differs from author family {}, tier {} matches author tier {}, and {}",
2891        reviewer.id,
2892        reviewer.provider,
2893        reviewer.family,
2894        author.family,
2895        reviewer.tier,
2896        author.tier,
2897        cost
2898    )
2899}
2900
2901fn reviewer_match_refs(identity: &ComplementaryModelIdentity) -> BTreeSet<String> {
2902    BTreeSet::from([
2903        identity.id.to_ascii_lowercase(),
2904        identity.provider.to_ascii_lowercase(),
2905        format!("{}/{}", identity.provider, identity.id).to_ascii_lowercase(),
2906        format!("{}:{}", identity.provider, identity.id).to_ascii_lowercase(),
2907        identity.family.to_ascii_lowercase(),
2908        identity.lineage.to_ascii_lowercase(),
2909    ])
2910}
2911
2912fn refs_contain_selector(refs: &BTreeSet<String>, selector: &str) -> bool {
2913    normalized_catalog_token(Some(selector))
2914        .or_else(|| Some(selector.trim().to_ascii_lowercase()))
2915        .is_some_and(|selector| refs.contains(&selector))
2916}
2917
2918// Model-pattern matching for forms like "claude-*", "qwen/*", "ollama:*".
2919// Shared workspace semantics live in `harn-glob`.
2920use harn_glob::match_name as glob_match;
2921
2922fn dirs_or_home() -> Option<String> {
2923    crate::user_dirs::home_dir().map(|home| home.to_string_lossy().into_owned())
2924}
2925
2926/// Resolve the effective base URL for a provider, checking the `base_url_env`
2927/// override first, then falling back to the configured `base_url`.
2928pub fn resolve_base_url(pdef: &ProviderDef) -> String {
2929    if let Some(env_name) = &pdef.base_url_env {
2930        if let Ok(val) = std::env::var(env_name) {
2931            // Strip surrounding quotes that some .env parsers leave intact.
2932            let trimmed = val.trim().trim_matches('"').trim_matches('\'');
2933            if !trimmed.is_empty() {
2934                return trimmed.to_string();
2935            }
2936        }
2937    }
2938    pdef.base_url.clone()
2939}
2940
2941/// Embedded copy of generated `llm/providers.toml`, built from
2942/// `llm/catalog_sources/**/*.toml` by `harn provider catalog build-config`.
2943/// Edit the fragments, not this generated snapshot or this string.
2944const EMBEDDED_PROVIDERS_TOML: &str = include_str!("llm/providers.toml");
2945
2946/// Parse the embedded generated `providers.toml` into the runtime
2947/// `ProvidersConfig`.
2948///
2949/// Hosts overlay this base via `HARN_PROVIDERS_CONFIG`,
2950/// `~/.config/harn/providers.toml`, `harn.toml`, package-manifest
2951/// `[llm]` sections, and per-run `set_user_overrides(...)`. The same
2952/// Serde shape applies at every layer, so there is exactly one schema to
2953/// keep coherent — no parallel Rust-literal catalog.
2954///
2955/// We `expect` on parse failure because the file is bundled into the
2956/// binary at compile time; a malformed embedded catalog is a build-time
2957/// invariant violation that should fail every test, not silently
2958/// degrade in production.
2959fn default_config() -> ProvidersConfig {
2960    parse_config_toml(EMBEDDED_PROVIDERS_TOML)
2961        .expect("embedded providers.toml must parse — invariant checked by harn-vm tests")
2962}
2963
2964#[cfg(test)]
2965fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
2966    let mut config = default_config();
2967    config.merge_from(&overlay);
2968    config
2969}
2970
2971#[cfg(test)]
2972mod tests {
2973    use super::*;
2974
2975    fn reset_overrides() {
2976        clear_user_overrides();
2977    }
2978
2979    #[test]
2980    fn resolve_model_info_guards_bad_native_pin_on_unreliable_route() {
2981        reset_overrides();
2982        // An alias that pins tool_format = "native" for DeepSeek V3.2 on
2983        // OpenRouter — a route the capability registry knows is
2984        // native_unreliable (drops to unparsed DSML text). Before the
2985        // footgun-removal gate this bad pin survived resolution verbatim and
2986        // produced vanishing tool calls; now it is steered to the route's safe
2987        // text-channel format.
2988        let overlay = parse_config_toml(
2989            "[aliases.guard-ds]\nid = \"deepseek/deepseek-v3.2\"\nprovider = \"openrouter\"\ntool_format = \"native\"\n",
2990        )
2991        .expect("overlay parses");
2992        set_user_overrides(Some(overlay));
2993        let resolved = resolve_model_info("guard-ds");
2994        assert_eq!(
2995            resolved.tool_format, "text",
2996            "a native pin on a native_unreliable route must be auto-corrected to text"
2997        );
2998        clear_user_overrides();
2999
3000        // A safe native pin (a route with no adverse parity) is untouched.
3001        let overlay_ok = parse_config_toml(
3002            "[aliases.guard-ds-ok]\nid = \"deepseek/deepseek-v3-base\"\nprovider = \"openrouter\"\ntool_format = \"native\"\n",
3003        )
3004        .expect("overlay parses");
3005        set_user_overrides(Some(overlay_ok));
3006        let resolved_ok = resolve_model_info("guard-ds-ok");
3007        assert_eq!(resolved_ok.tool_format, "native");
3008        clear_user_overrides();
3009    }
3010
3011    #[test]
3012    fn auto_select_prefers_local_provider_without_cloud_credentials() {
3013        // A catalog whose only provider is local and auth-free resolves to it
3014        // regardless of ambient cloud API keys: no preferred/credentialed cloud
3015        // provider is present, so the local fallback wins deterministically.
3016        let config = parse_config_toml(
3017            "[providers.ollama]\nbase_url = \"http://localhost:11434\"\nchat_endpoint = \"/v1/chat/completions\"\n",
3018        )
3019        .expect("config parses");
3020        assert!(provider_is_local(config.providers.get("ollama").unwrap()));
3021        assert_eq!(auto_select_provider(&config), "ollama");
3022    }
3023
3024    #[test]
3025    fn auto_select_falls_back_to_documented_default_when_empty() {
3026        let config = parse_config_toml("").expect("config parses");
3027        assert_eq!(auto_select_provider(&config), FALLBACK_PROVIDER);
3028    }
3029
3030    #[test]
3031    fn suppress_routes_parse_and_merge_dedupe() {
3032        let mut base =
3033            parse_config_toml("[suppress]\nroutes = [\"together:Qwen/Qwen3-Coder-Next-FP8\"]\n")
3034                .expect("base parses");
3035        assert!(!base.is_empty(), "a suppress-only overlay is not empty");
3036        let overlay = parse_config_toml(
3037            "[suppress]\nroutes = [\"together:Qwen/Qwen3-Coder-Next-FP8\", \"ollama:img:tag\"]\n",
3038        )
3039        .expect("overlay parses");
3040        base.merge_from(&overlay);
3041        assert_eq!(
3042            base.suppress.routes,
3043            vec![
3044                "together:Qwen/Qwen3-Coder-Next-FP8".to_string(),
3045                "ollama:img:tag".to_string(),
3046            ],
3047            "merge appends new selectors without duplicating existing ones"
3048        );
3049    }
3050
3051    /// Base config for the `[patch.models]` tests: one fully-populated row.
3052    const PATCH_BASE_TOML: &str = r#"
3053[models."demo/patch-target"]
3054name = "Patch Target"
3055provider = "demo"
3056context_window = 128000
3057stream_timeout = 300.0
3058capabilities = ["tools", "vision"]
3059strengths = ["coding"]
3060
3061[models."demo/patch-target".pricing]
3062input_per_mtok = 1.0
3063output_per_mtok = 5.0
3064"#;
3065
3066    fn patch_base() -> ProvidersConfig {
3067        parse_config_toml(PATCH_BASE_TOML).expect("patch base parses")
3068    }
3069
3070    fn patched_row(config: &ProvidersConfig) -> &ModelDef {
3071        config
3072            .models
3073            .get("demo/patch-target")
3074            .expect("patch target row present")
3075    }
3076
3077    #[test]
3078    fn patch_models_scalar_and_nested_field_preserve_siblings() {
3079        let mut base = patch_base();
3080        let overlay = parse_config_toml(
3081            "[patch.models.\"demo/patch-target\"]\nstream_timeout = 1200.0\n\
3082             [patch.models.\"demo/patch-target\".pricing]\noutput_per_mtok = 2.5\n",
3083        )
3084        .expect("patch overlay parses");
3085        assert!(!overlay.is_empty(), "a patch-only overlay is not empty");
3086        base.merge_from(&overlay);
3087        let row = patched_row(&base);
3088        assert_eq!(row.stream_timeout, Some(1200.0), "patched scalar applies");
3089        assert_eq!(row.name, "Patch Target", "unpatched scalar is intact");
3090        assert_eq!(row.context_window, 128000, "unpatched scalar is intact");
3091        assert_eq!(
3092            row.capabilities,
3093            vec!["tools".to_string(), "vision".to_string()],
3094            "unpatched array is intact"
3095        );
3096        let pricing = row.pricing.as_ref().expect("pricing survives the patch");
3097        assert_eq!(pricing.output_per_mtok, 2.5, "patched nested field applies");
3098        assert_eq!(
3099            pricing.input_per_mtok, 1.0,
3100            "sibling nested field is preserved by the deep merge"
3101        );
3102        assert!(base.dangling_model_patches().is_empty());
3103    }
3104
3105    #[test]
3106    fn patch_models_array_replaces_wholesale() {
3107        let mut base = patch_base();
3108        let overlay =
3109            parse_config_toml("[patch.models.\"demo/patch-target\"]\ncapabilities = [\"tools\"]\n")
3110                .expect("patch overlay parses");
3111        base.merge_from(&overlay);
3112        let row = patched_row(&base);
3113        assert_eq!(
3114            row.capabilities,
3115            vec!["tools".to_string()],
3116            "arrays replace wholesale — no element-wise merge"
3117        );
3118        assert_eq!(
3119            row.strengths,
3120            vec!["coding".to_string()],
3121            "arrays the patch does not name are intact"
3122        );
3123    }
3124
3125    #[test]
3126    fn patch_models_wins_over_whole_row_in_same_overlay() {
3127        let mut base = patch_base();
3128        let overlay = parse_config_toml(
3129            "[models.\"demo/patch-target\"]\n\
3130             name = \"Replaced Row\"\nprovider = \"demo\"\ncontext_window = 64000\n\
3131             stream_timeout = 600.0\n\
3132             [patch.models.\"demo/patch-target\"]\nstream_timeout = 1200.0\n",
3133        )
3134        .expect("overlay parses");
3135        base.merge_from(&overlay);
3136        let row = patched_row(&base);
3137        assert_eq!(
3138            row.name, "Replaced Row",
3139            "the whole-row replacement lands first"
3140        );
3141        assert_eq!(row.context_window, 64000);
3142        assert_eq!(
3143            row.stream_timeout,
3144            Some(1200.0),
3145            "the same overlay's patch fields win over its whole-row fields"
3146        );
3147    }
3148
3149    #[test]
3150    fn patch_models_chained_layers_accumulate_and_later_wins() {
3151        let mut base = patch_base();
3152        let layer1 =
3153            parse_config_toml("[patch.models.\"demo/patch-target\"]\nstream_timeout = 900.0\n")
3154                .expect("layer1 parses");
3155        let layer2 = parse_config_toml(
3156            "[patch.models.\"demo/patch-target\".pricing]\noutput_per_mtok = 2.5\n",
3157        )
3158        .expect("layer2 parses");
3159        base.merge_from(&layer1);
3160        base.merge_from(&layer2);
3161        let row = patched_row(&base);
3162        assert_eq!(
3163            row.stream_timeout,
3164            Some(900.0),
3165            "layer1's field patch survives layer2 patching a different field"
3166        );
3167        assert_eq!(
3168            row.pricing
3169                .as_ref()
3170                .expect("pricing present")
3171                .output_per_mtok,
3172            2.5,
3173            "layer2's field patch applies"
3174        );
3175
3176        let layer3 =
3177            parse_config_toml("[patch.models.\"demo/patch-target\"]\nstream_timeout = 1200.0\n")
3178                .expect("layer3 parses");
3179        base.merge_from(&layer3);
3180        assert_eq!(
3181            patched_row(&base).stream_timeout,
3182            Some(1200.0),
3183            "for the same field, the later layer's patch wins"
3184        );
3185    }
3186
3187    #[test]
3188    fn patch_models_sticky_across_later_whole_row_replacement() {
3189        let mut base = patch_base();
3190        let patch_layer =
3191            parse_config_toml("[patch.models.\"demo/patch-target\"]\nstream_timeout = 1200.0\n")
3192                .expect("patch layer parses");
3193        base.merge_from(&patch_layer);
3194        // A later layer replaces the whole row (e.g. a hosted runtime-catalog
3195        // refresh re-ships the baseline). The accumulated patch re-applies:
3196        // patches mean "always tweak this field", not "tweak it once".
3197        let replacement_layer = parse_config_toml(
3198            "[models.\"demo/patch-target\"]\n\
3199             name = \"Refreshed Row\"\nprovider = \"demo\"\ncontext_window = 256000\n\
3200             stream_timeout = 300.0\n",
3201        )
3202        .expect("replacement layer parses");
3203        base.merge_from(&replacement_layer);
3204        let row = patched_row(&base);
3205        assert_eq!(row.name, "Refreshed Row", "the whole-row refresh lands");
3206        assert_eq!(row.context_window, 256000);
3207        assert_eq!(
3208            row.stream_timeout,
3209            Some(1200.0),
3210            "the sticky patch re-applies on top of the refreshed row"
3211        );
3212    }
3213
3214    #[test]
3215    fn patch_models_dangling_patch_reports_and_applies_when_row_arrives() {
3216        let mut base = patch_base();
3217        let dangling =
3218            parse_config_toml("[patch.models.\"demo/not-yet-cataloged\"]\nstream_timeout = 42.0\n")
3219                .expect("dangling patch parses");
3220        base.merge_from(&dangling);
3221        assert_eq!(
3222            base.dangling_model_patches(),
3223            vec!["demo/not-yet-cataloged"],
3224            "a patch with no matching row is reported, not dropped"
3225        );
3226        assert_eq!(
3227            patched_row(&base).stream_timeout,
3228            Some(300.0),
3229            "existing rows are untouched by a dangling patch"
3230        );
3231
3232        // The row arrives from a LATER layer; the accumulated patch applies.
3233        let late_row = parse_config_toml(
3234            "[models.\"demo/not-yet-cataloged\"]\n\
3235             name = \"Late Arrival\"\nprovider = \"demo\"\ncontext_window = 8192\n",
3236        )
3237        .expect("late row parses");
3238        base.merge_from(&late_row);
3239        assert!(base.dangling_model_patches().is_empty());
3240        let row = base
3241            .models
3242            .get("demo/not-yet-cataloged")
3243            .expect("late row present");
3244        assert_eq!(row.stream_timeout, Some(42.0), "the held patch applied");
3245        assert_eq!(row.name, "Late Arrival");
3246    }
3247
3248    #[test]
3249    fn patch_models_type_error_keeps_unpatched_row() {
3250        let mut base = patch_base();
3251        let bad =
3252            parse_config_toml("[patch.models.\"demo/patch-target\"]\nstream_timeout = \"soon\"\n")
3253                .expect("the patch overlay itself is valid TOML");
3254        base.merge_from(&bad);
3255        let row = patched_row(&base);
3256        assert_eq!(
3257            row.stream_timeout,
3258            Some(300.0),
3259            "a type-invalid patch keeps the unpatched row"
3260        );
3261        assert_eq!(row.name, "Patch Target", "the rest of the row is intact");
3262    }
3263
3264    #[test]
3265    fn model_rows_roundtrip_through_toml_value_for_patching() {
3266        // Patch application is `ModelDef -> toml::Value -> deep merge ->
3267        // ModelDef`. This property test guards the serialization leg: every
3268        // embedded catalog row must survive the round trip unchanged (a
3269        // missing `Serialize` derive or asymmetric serde attribute on a
3270        // nested def would corrupt rows the first time they are patched).
3271        let config = default_config();
3272        assert!(!config.models.is_empty());
3273        for (id, row) in &config.models {
3274            let value = toml::Value::try_from(row)
3275                .unwrap_or_else(|error| panic!("serialize model row {id}: {error}"));
3276            let roundtripped = ModelDef::deserialize(value)
3277                .unwrap_or_else(|error| panic!("deserialize model row {id}: {error}"));
3278            assert_eq!(&roundtripped, row, "model row {id} must round-trip");
3279        }
3280    }
3281
3282    #[test]
3283    fn test_glob_match_prefix() {
3284        assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
3285        assert!(glob_match("gpt-*", "gpt-4o"));
3286        assert!(!glob_match("claude-*", "gpt-4o"));
3287    }
3288
3289    #[test]
3290    fn test_glob_match_suffix() {
3291        assert!(glob_match("*-latest", "llama3.2-latest"));
3292        assert!(!glob_match("*-latest", "llama3.2"));
3293    }
3294
3295    #[test]
3296    fn test_glob_match_middle() {
3297        assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
3298        assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
3299    }
3300
3301    #[test]
3302    fn test_glob_match_exact() {
3303        assert!(glob_match("gpt-4o", "gpt-4o"));
3304        assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
3305    }
3306
3307    #[test]
3308    fn test_infer_provider_from_defaults() {
3309        let _guard = crate::llm::env_guard();
3310        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3311        unsafe {
3312            std::env::remove_var("HARN_DEFAULT_PROVIDER");
3313        }
3314
3315        assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
3316        assert_eq!(infer_provider("gpt-4o"), "openai");
3317        assert_eq!(infer_provider("o1-preview"), "openai");
3318        assert_eq!(infer_provider("o3-mini"), "openai");
3319        assert_eq!(infer_provider("o4-mini"), "openai");
3320        assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
3321        assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
3322        assert_eq!(infer_provider("llama3.2:latest"), "ollama");
3323        assert_eq!(infer_provider("unknown-model"), "anthropic");
3324
3325        unsafe {
3326            match prev_default_provider {
3327                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3328                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3329            }
3330        }
3331    }
3332
3333    #[test]
3334    fn test_infer_provider_prefix_rules() {
3335        assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
3336        assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
3337        // Even when the id also contains `/`, the local transport prefix wins.
3338        assert_eq!(infer_provider("local:owner/model"), "ollama");
3339        assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
3340    }
3341
3342    #[test]
3343    fn test_openrouter_inference_requires_one_slash() {
3344        let _guard = crate::llm::env_guard();
3345        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3346        unsafe {
3347            std::env::remove_var("HARN_DEFAULT_PROVIDER");
3348        }
3349
3350        assert_eq!(infer_provider("org/model"), "openrouter");
3351        assert_eq!(infer_provider("org/team/model"), "anthropic");
3352
3353        unsafe {
3354            match prev_default_provider {
3355                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3356                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3357            }
3358        }
3359    }
3360
3361    #[test]
3362    fn test_cerebras_inference_beats_openrouter_slash_fallback() {
3363        let _guard = crate::llm::env_guard();
3364        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3365        unsafe {
3366            std::env::remove_var("HARN_DEFAULT_PROVIDER");
3367        }
3368
3369        assert_eq!(infer_provider("cerebras/gpt-oss-120b"), "cerebras");
3370        assert_eq!(infer_provider("cerebras/zai-glm-4.7"), "cerebras");
3371        assert_eq!(infer_provider("cerebras/llama-3.3-70b"), "cerebras");
3372
3373        unsafe {
3374            match prev_default_provider {
3375                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3376                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3377            }
3378        }
3379    }
3380
3381    #[test]
3382    fn test_direct_catalog_model_id_resolves_to_catalog_provider() {
3383        // Bare model IDs that the embedded catalog hosts on Cerebras must
3384        // not be misrouted by the generic `gpt-*` / single-slash inference
3385        // fallbacks. Regression for harn#2142 (model-info routed
3386        // `gpt-oss-120b` to openai, breaking host TUI credential checks).
3387        let _guard = crate::llm::env_guard();
3388        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3389        unsafe {
3390            std::env::remove_var("HARN_DEFAULT_PROVIDER");
3391        }
3392
3393        for model in ["gpt-oss-120b", "zai-glm-4.7", "llama-3.3-70b"] {
3394            assert_eq!(
3395                infer_provider(model),
3396                "cerebras",
3397                "{model} should route to its catalog provider"
3398            );
3399            let resolved = resolve_model_info(model);
3400            assert_eq!(resolved.id, model);
3401            assert_eq!(resolved.provider, "cerebras");
3402        }
3403
3404        unsafe {
3405            match prev_default_provider {
3406                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3407                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3408            }
3409        }
3410    }
3411
3412    #[test]
3413    fn test_equivalent_model_catalog_entries_use_capability_compatible_routes() {
3414        reset_overrides();
3415
3416        assert_eq!(
3417            wire_model_id("groq/openai/gpt-oss-120b"),
3418            "openai/gpt-oss-120b"
3419        );
3420        assert_eq!(wire_model_id("gpt-oss-120b"), "gpt-oss-120b");
3421
3422        let equivalents = equivalent_model_catalog_entries("gpt-oss-120b");
3423        let ids = equivalents
3424            .iter()
3425            .map(|(id, _)| id.as_str())
3426            .collect::<Vec<_>>();
3427
3428        assert!(
3429            ids.contains(&"groq/openai/gpt-oss-120b"),
3430            "Cerebras GPT-OSS should surface the Groq serving variant"
3431        );
3432        assert!(
3433            !ids.contains(&"gpt-oss-120b"),
3434            "equivalence results should not include the source row"
3435        );
3436        assert!(equivalents.iter().all(|(_, model)| {
3437            model.equivalence_group.as_deref() == Some("openai-gpt-oss-120b")
3438        }));
3439    }
3440
3441    #[test]
3442    fn fireworks_gpt_oss_route_has_real_context_window() {
3443        // Regression: the Fireworks-served `accounts/fireworks/models/gpt-oss-120b`
3444        // wire id had NO catalog row, so its context window resolved to None and
3445        // the agent's auto-compaction budget had nothing to enforce — the prompt
3446        // grew until Fireworks rejected the turn with HTTP 400 [context_overflow]
3447        // (session 019ee303: 197467 tokens > 131071 max). Cataloging the real
3448        // 131072 window lets compaction trigger before the hard limit.
3449        reset_overrides();
3450
3451        let entry = model_catalog_entry("accounts/fireworks/models/gpt-oss-120b")
3452            .expect("Fireworks gpt-oss-120b must be in the model catalog");
3453        assert_eq!(entry.context_window, 131_072);
3454        assert_eq!(entry.provider, "fireworks");
3455        assert_eq!(
3456            entry.equivalence_group.as_deref(),
3457            Some("openai-gpt-oss-120b"),
3458        );
3459    }
3460
3461    #[test]
3462    fn test_user_catalog_overlay_re_homes_model_provider() {
3463        // Users can re-home a built-in model by overlaying a catalog row;
3464        // the exact-match catalog lookup must honor overlays as well as the
3465        // embedded TOML.
3466        reset_overrides();
3467        let mut overlay = ProvidersConfig::default();
3468        overlay.models.insert(
3469            "gpt-4o".to_string(),
3470            ModelDef {
3471                name: "GPT-4o via OpenRouter".to_string(),
3472                provider: "openrouter".to_string(),
3473                context_window: 128_000,
3474                logical_model: None,
3475                equivalence_group: None,
3476                served_variant: None,
3477                wire_model: None,
3478                api_dialect: None,
3479                rate_limits: None,
3480                performance: None,
3481                architecture: None,
3482                local_memory: None,
3483                runtime_context_window: None,
3484                stream_timeout: None,
3485                capabilities: Vec::new(),
3486                pricing: None,
3487                deprecated: false,
3488                deprecation_note: None,
3489                superseded_by: None,
3490                fast_mode: None,
3491                quality_tags: Vec::new(),
3492                availability: ModelAvailability::default(),
3493                tier: None,
3494                open_weight: None,
3495                strengths: Vec::new(),
3496                benchmarks: std::collections::BTreeMap::new(),
3497                family: None,
3498                lineage: None,
3499                complementary_with: Vec::new(),
3500                avoid_as_reviewer_for: Vec::new(),
3501            },
3502        );
3503        set_user_overrides(Some(overlay));
3504
3505        assert_eq!(infer_provider("gpt-4o"), "openrouter");
3506
3507        reset_overrides();
3508    }
3509
3510    #[test]
3511    fn test_resolve_model_info_normalizes_provider_prefixes() {
3512        let local = resolve_model_info("local:gemma-4-e4b-it");
3513        assert_eq!(local.id, "gemma-4-e4b-it");
3514        assert_eq!(local.provider, "ollama");
3515
3516        let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
3517        assert_eq!(ollama.id, "qwen3:30b-a3b");
3518        assert_eq!(ollama.provider, "ollama");
3519
3520        let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
3521        assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
3522        assert_eq!(hf.provider, "huggingface");
3523
3524        let cerebras = resolve_model_info("cerebras/gpt-oss-120b");
3525        assert_eq!(cerebras.id, "gpt-oss-120b");
3526        assert_eq!(cerebras.provider, "cerebras");
3527
3528        let cerebras_glm = resolve_model_info("cerebras/zai-glm-4.7");
3529        assert_eq!(cerebras_glm.id, "zai-glm-4.7");
3530        assert_eq!(cerebras_glm.provider, "cerebras");
3531    }
3532
3533    #[test]
3534    fn test_model_tier_from_defaults() {
3535        // Tier is now self-declared per model row in providers.toml.
3536        // Models that match an entry use the declared value; unknown
3537        // model ids fall through to `tier_defaults.default` ("mid").
3538        assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
3539        assert_eq!(model_tier("gpt-4o"), "frontier");
3540        assert_eq!(model_tier("Qwen/Qwen3.5-9B"), "small");
3541        assert_eq!(model_tier("deepseek-v4-flash"), "mid");
3542        assert_eq!(model_tier("deepseek-v4-pro"), "frontier");
3543        assert_eq!(model_tier("MiniMax-M2.7"), "frontier");
3544        assert_eq!(model_tier("glm-5.1"), "frontier");
3545        // Unknown ids resolve to the default.
3546        assert_eq!(model_tier("definitely-not-a-real-model"), "mid");
3547    }
3548
3549    #[test]
3550    fn test_model_family_preserves_underlying_hosted_lineage() {
3551        assert_eq!(
3552            model_family("openrouter", "anthropic/claude-sonnet-4-6"),
3553            "anthropic-claude"
3554        );
3555        assert_eq!(
3556            model_family("openrouter", "google/gemini-2.5-flash"),
3557            "google-gemini"
3558        );
3559        assert_eq!(
3560            model_family("openrouter", "openai/o3-mini"),
3561            "openai-reasoning"
3562        );
3563        assert_eq!(model_lineage("openrouter", "openai/gpt-5.5"), "openai-gpt5");
3564        assert_eq!(
3565            model_lineage("openrouter", "openai/o3-mini"),
3566            "openai-reasoning"
3567        );
3568        assert_eq!(
3569            model_lineage("anthropic", "claude-opus-4-8"),
3570            "claude-opus-adaptive"
3571        );
3572        assert_eq!(model_lineage("llamacpp", "qwen3.6-35b-a3b"), "qwen3");
3573    }
3574
3575    #[test]
3576    fn test_complementary_reviewer_uses_different_family() {
3577        let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
3578            author_model: "claude-sonnet-4-6".to_string(),
3579            author_provider: None,
3580            intent: ComplementaryReviewerIntent::PlanReview,
3581            max_price_multiplier: Some(3.0),
3582        });
3583
3584        assert!(!selection.fallback, "{selection:?}");
3585        assert_eq!(selection.author.family, "anthropic-claude");
3586        assert_ne!(selection.reviewer.family, selection.author.family);
3587        assert_eq!(selection.reviewer.tier, "frontier");
3588        assert!(selection.estimated_incremental_cost.is_some());
3589        // Success path carries no machine-readable fallback code, so a caller
3590        // can treat `fallback_code.is_some()` as "must not self-review".
3591        assert_eq!(selection.fallback_code, None, "{selection:?}");
3592    }
3593
3594    #[test]
3595    fn test_complementary_reviewer_falls_back_deterministically_on_price_cap() {
3596        let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
3597            author_model: "gpt-4o-mini".to_string(),
3598            author_provider: Some("openai".to_string()),
3599            intent: ComplementaryReviewerIntent::Review,
3600            max_price_multiplier: Some(0.01),
3601        });
3602
3603        assert!(selection.fallback, "{selection:?}");
3604        assert_eq!(selection.reviewer.id, "gpt-4o-mini");
3605        assert_eq!(selection.reviewer.family, selection.author.family);
3606        assert!(selection
3607            .fallback_reason
3608            .as_deref()
3609            .is_some_and(|reason| reason.contains("max_price_multiplier")));
3610        // The machine-readable code is stable regardless of the prose; a caller
3611        // hard-fails an independent-review step by branching on this, never by
3612        // parsing `fallback_reason`.
3613        assert_eq!(
3614            selection.fallback_code.as_deref(),
3615            Some(ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code()),
3616            "{selection:?}"
3617        );
3618        assert_eq!(
3619            ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code(),
3620            "no_diff_family_within_price"
3621        );
3622    }
3623
3624    #[test]
3625    fn test_reviewer_fallback_codes_are_stable_strings() {
3626        // Append-only contract: harn pipelines and Rust callers branch on these
3627        // exact strings, so changing one is a breaking change.
3628        assert_eq!(
3629            ReviewerFallbackCode::UnknownAuthorFamily.as_code(),
3630            "unknown_author_family"
3631        );
3632        assert_eq!(
3633            ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code(),
3634            "no_diff_family_within_price"
3635        );
3636        assert_eq!(
3637            ReviewerFallbackCode::NoDiffFamilyServerless.as_code(),
3638            "no_diff_family_serverless"
3639        );
3640        assert_eq!(
3641            ReviewerFallbackCode::AllDiffFamilyExcluded.as_code(),
3642            "all_diff_family_excluded"
3643        );
3644    }
3645
3646    #[test]
3647    fn test_resolve_model_unknown_alias() {
3648        let (id, provider) = resolve_model("gpt-4o");
3649        assert_eq!(id, "gpt-4o");
3650        assert!(provider.is_none());
3651    }
3652
3653    #[test]
3654    fn test_provider_names() {
3655        let names = provider_names();
3656        assert!(names.len() >= 7);
3657        assert!(names.contains(&"anthropic".to_string()));
3658        assert!(names.contains(&"together".to_string()));
3659        assert!(names.contains(&"local".to_string()));
3660        assert!(names.contains(&"mlx".to_string()));
3661        assert!(names.contains(&"openai".to_string()));
3662        assert!(names.contains(&"ollama".to_string()));
3663        assert!(names.contains(&"bedrock".to_string()));
3664        assert!(names.contains(&"azure_openai".to_string()));
3665        assert!(names.contains(&"vertex".to_string()));
3666    }
3667
3668    #[test]
3669    fn global_provider_file_is_an_overlay_on_builtin_defaults() {
3670        let mut overlay = ProvidersConfig {
3671            default_provider: Some("ollama".to_string()),
3672            ..Default::default()
3673        };
3674        overlay.aliases.insert(
3675            "quickstart".to_string(),
3676            AliasDef {
3677                id: "llama3.2".to_string(),
3678                provider: "ollama".to_string(),
3679                tool_format: None,
3680            },
3681        );
3682
3683        let merged = merge_global_config(overlay);
3684
3685        assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
3686        assert!(merged.providers.contains_key("anthropic"));
3687        assert!(merged.providers.contains_key("ollama"));
3688        assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
3689    }
3690
3691    #[test]
3692    fn partial_provider_overlay_preserves_builtin_provider_metadata() {
3693        let overlay = parse_config_toml(
3694            r#"
3695            [providers.ollama]
3696            base_url = "http://localhost:11435"
3697            extra_headers = { "x-local" = "1" }
3698            "#,
3699        )
3700        .expect("provider overlay parses");
3701
3702        let merged = merge_global_config(overlay);
3703        let ollama = merged
3704            .providers
3705            .get("ollama")
3706            .expect("ollama remains configured");
3707
3708        assert_eq!(ollama.base_url, "http://localhost:11435");
3709        assert_eq!(ollama.auth_style, "none");
3710        assert_eq!(ollama.chat_endpoint, "/api/chat");
3711        assert_eq!(ollama.completion_endpoint.as_deref(), Some("/api/generate"));
3712        assert_eq!(ollama.cost_per_1k_in, Some(0.0));
3713        assert_eq!(ollama.cost_per_1k_out, Some(0.0));
3714        assert_eq!(
3715            ollama
3716                .healthcheck
3717                .as_ref()
3718                .and_then(|healthcheck| healthcheck.path.as_deref()),
3719            Some("/api/tags")
3720        );
3721        assert_eq!(
3722            ollama.extra_headers.get("x-local").map(String::as_str),
3723            Some("1")
3724        );
3725    }
3726
3727    #[test]
3728    fn partial_provider_overlay_can_explicitly_replace_default_auth_style() {
3729        let overlay = parse_config_toml(
3730            r#"
3731            [providers.ollama]
3732            auth_style = "bearer"
3733            auth_env = "OLLAMA_API_KEY"
3734            "#,
3735        )
3736        .expect("provider overlay parses");
3737
3738        let merged = merge_global_config(overlay);
3739        let ollama = merged
3740            .providers
3741            .get("ollama")
3742            .expect("ollama remains configured");
3743
3744        assert_eq!(ollama.auth_style, "bearer");
3745        assert_eq!(auth_env_names(&ollama.auth_env), vec!["OLLAMA_API_KEY"]);
3746        assert_eq!(ollama.chat_endpoint, "/api/chat");
3747    }
3748
3749    #[test]
3750    fn test_resolve_tier_model_default_aliases() {
3751        // Exercise the alias-resolution machinery, not the specific catalog
3752        // value: the model under each tier alias evolves as the embedded
3753        // providers.toml is updated. The invariants worth pinning are the
3754        // provider routing + catalog-registration of the resolved model.
3755        let (model, provider) = resolve_tier_model("frontier", None)
3756            .expect("frontier alias must resolve from the embedded catalog");
3757        assert_eq!(provider, "anthropic");
3758        assert!(
3759            model_catalog_entry(&model)
3760                .is_some_and(|entry| entry.provider == "anthropic" && !entry.deprecated),
3761            "frontier alias must point at a registered, non-deprecated anthropic model (got {model})"
3762        );
3763
3764        let (model, provider) = resolve_tier_model("small", None)
3765            .expect("small alias must resolve from the embedded catalog");
3766        assert!(
3767            [
3768                "openrouter",
3769                "huggingface",
3770                "local",
3771                "llamacpp",
3772                "mlx",
3773                "ollama"
3774            ]
3775            .contains(&provider.as_str()),
3776            "small tier should resolve to an open-weight provider (got {provider} / {model})"
3777        );
3778
3779        let (model, provider) = resolve_tier_model("mid", None)
3780            .expect("mid alias must resolve from the embedded catalog");
3781        assert_eq!(provider, "openrouter");
3782        assert_eq!(model, "qwen/qwen3.6-flash");
3783    }
3784
3785    #[test]
3786    fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
3787        // tier/<provider> takes precedence over generic tier when the
3788        // caller scopes by provider. Don't pin the specific model — the
3789        // catalog evolves.
3790        let (model, provider) = resolve_tier_model("mid", Some("openai"))
3791            .expect("mid tier scoped to openai must resolve");
3792        assert_eq!(provider, "openai");
3793        assert_eq!(model, "gpt-5.4-mini");
3794        assert!(
3795            model_catalog_entry(&model).is_some(),
3796            "mid/openai alias must point at a registered model (got {model})"
3797        );
3798    }
3799
3800    #[test]
3801    fn test_provider_config_anthropic() {
3802        let pdef = provider_config("anthropic").unwrap();
3803        assert_eq!(pdef.auth_style, "header");
3804        assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
3805    }
3806
3807    #[test]
3808    fn test_provider_config_mlx() {
3809        let pdef = provider_config("mlx").unwrap();
3810        assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
3811        assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
3812        assert_eq!(
3813            pdef.healthcheck.unwrap().path.as_deref(),
3814            Some("/v1/models")
3815        );
3816
3817        let (model, provider) = resolve_model("mlx-qwen36-27b");
3818        assert_eq!(model, "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit");
3819        assert_eq!(provider.as_deref(), Some("mlx"));
3820    }
3821
3822    #[test]
3823    fn test_enterprise_provider_defaults_and_inference() {
3824        let bedrock = provider_config("bedrock").unwrap();
3825        assert_eq!(bedrock.auth_style, "aws_sigv4");
3826        assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
3827        assert_eq!(
3828            infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
3829            "bedrock"
3830        );
3831        assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
3832
3833        let azure = provider_config("azure_openai").unwrap();
3834        assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
3835        assert_eq!(
3836            auth_env_names(&azure.auth_env),
3837            vec![
3838                "AZURE_OPENAI_API_KEY".to_string(),
3839                "AZURE_OPENAI_AD_TOKEN".to_string(),
3840                "AZURE_OPENAI_BEARER_TOKEN".to_string(),
3841            ]
3842        );
3843
3844        let vertex = provider_config("vertex").unwrap();
3845        assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
3846        assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
3847    }
3848
3849    #[test]
3850    fn test_default_provider_env_override_for_unknown_model() {
3851        let _guard = crate::llm::env_guard();
3852        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3853        unsafe {
3854            std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
3855        }
3856
3857        let inference = infer_provider_detail("unknown-model");
3858
3859        unsafe {
3860            match prev_default_provider {
3861                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3862                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3863            }
3864        }
3865
3866        assert_eq!(inference.provider, "openai");
3867        assert_eq!(
3868            inference.source,
3869            crate::llm::provider::ProviderInferenceSource::DefaultFallback
3870        );
3871    }
3872
3873    #[test]
3874    fn test_unknown_model_family_ignores_default_provider_fallback() {
3875        let _guard = crate::llm::env_guard();
3876        let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3877        unsafe {
3878            std::env::set_var("HARN_DEFAULT_PROVIDER", "ollama");
3879        }
3880
3881        let unknown = resolve_model_info("mystery-model-xyz");
3882        let known_family = resolve_model_info("deepseek-mystery-model");
3883
3884        unsafe {
3885            match prev_default_provider {
3886                Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3887                None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3888            }
3889        }
3890
3891        assert_eq!(unknown.provider, "ollama");
3892        assert_eq!(unknown.family, "unknown");
3893        assert_eq!(unknown.lineage, "unknown");
3894        assert_eq!(known_family.family, "deepseek");
3895        assert_eq!(known_family.lineage, "deepseek");
3896    }
3897
3898    #[test]
3899    fn test_resolve_base_url_no_env() {
3900        let pdef = ProviderDef {
3901            base_url: "https://example.com".to_string(),
3902            ..Default::default()
3903        };
3904        assert_eq!(resolve_base_url(&pdef), "https://example.com");
3905    }
3906
3907    #[test]
3908    fn test_default_config_roundtrip() {
3909        let config = default_config();
3910        assert!(!config.providers.is_empty());
3911        assert!(!config.inference_rules.is_empty());
3912        // Tier is now declared on each model row; tier_rules is allowed
3913        // to be empty (the rule table is a legacy fallback only).
3914        assert_eq!(config.tier_defaults.default, "mid");
3915        // At least the new open-weight frontiers should have explicit tiers.
3916        let frontiers = config
3917            .models
3918            .iter()
3919            .filter(|(_, m)| m.tier.as_deref() == Some("frontier"))
3920            .count();
3921        assert!(
3922            frontiers >= 4,
3923            "expected at least 4 frontier-tagged models, got {frontiers}"
3924        );
3925    }
3926
3927    #[test]
3928    fn test_local_ollama_catalog_metadata() {
3929        reset_overrides();
3930
3931        let devstral =
3932            model_catalog_entry("devstral-small-2:24b").expect("devstral-small-2 catalog entry");
3933        assert_eq!(devstral.context_window, 262_144);
3934        assert!(!devstral.capabilities.iter().any(|cap| cap == "vision"));
3935
3936        let gemma4 = model_catalog_entry("gemma4:26b").expect("gemma4 catalog entry");
3937        assert_eq!(gemma4.context_window, 262_144);
3938        assert!(gemma4.capabilities.iter().any(|cap| cap == "vision"));
3939    }
3940
3941    #[test]
3942    fn local_gemma4_source_tags_match_structured_capability_tags() {
3943        reset_overrides();
3944        let config = default_config();
3945        for id in [
3946            "gemma-4-e2b-it",
3947            "gemma-4-e4b-it",
3948            "gemma-4-12b-it",
3949            "gemma-4-26b-a4b-it",
3950            "gemma-4-31b-it",
3951        ] {
3952            let source = config
3953                .models
3954                .get(id)
3955                .unwrap_or_else(|| panic!("{id} should be in the embedded catalog"));
3956            let derived = effective_model_capability_tags(&source.provider, id);
3957            assert_eq!(
3958                source.capabilities, derived,
3959                "{}/{} source capabilities must match derived capability_tags",
3960                source.provider, id
3961            );
3962        }
3963    }
3964
3965    #[test]
3966    fn capability_tags_include_structured_capability_flags() {
3967        let caps = crate::llm::capabilities::Capabilities {
3968            native_tools: true,
3969            tool_search: vec!["web".to_string()],
3970            vision_supported: true,
3971            audio: true,
3972            pdf: true,
3973            video: true,
3974            files_api_supported: true,
3975            prompt_caching: true,
3976            thinking_modes: vec!["enabled".to_string()],
3977            structured_output: Some("native".to_string()),
3978            ..Default::default()
3979        };
3980
3981        assert_eq!(
3982            capability_tags_from_capabilities(&caps),
3983            vec![
3984                "streaming",
3985                "tools",
3986                "tool_search",
3987                "vision",
3988                "audio",
3989                "pdf",
3990                "video",
3991                "files",
3992                "prompt_caching",
3993                "thinking",
3994                "structured_output",
3995            ]
3996        );
3997    }
3998
3999    #[test]
4000    fn test_external_config_overlays_default_catalog() {
4001        let mut config = default_config();
4002        let mut overlay = ProvidersConfig {
4003            default_provider: Some("ollama".to_string()),
4004            ..Default::default()
4005        };
4006        overlay.providers.insert(
4007            "custom".to_string(),
4008            ProviderDef {
4009                base_url: "https://llm.example.test/v1".to_string(),
4010                chat_endpoint: "/chat/completions".to_string(),
4011                ..Default::default()
4012            },
4013        );
4014
4015        config.merge_from(&overlay);
4016
4017        assert_eq!(config.default_provider.as_deref(), Some("ollama"));
4018        assert!(config.providers.contains_key("custom"));
4019        assert!(config.providers.contains_key("anthropic"));
4020        assert!(config.providers.contains_key("ollama"));
4021    }
4022
4023    #[test]
4024    fn test_model_params_empty() {
4025        let params = model_params("claude-sonnet-4-20250514");
4026        assert!(params.is_empty());
4027    }
4028
4029    #[test]
4030    fn test_user_overrides_add_provider_and_alias() {
4031        reset_overrides();
4032        let mut overlay = ProvidersConfig::default();
4033        overlay.providers.insert(
4034            "acme".to_string(),
4035            ProviderDef {
4036                base_url: "https://llm.acme.test/v1".to_string(),
4037                chat_endpoint: "/chat/completions".to_string(),
4038                ..Default::default()
4039            },
4040        );
4041        overlay.aliases.insert(
4042            "acme-fast".to_string(),
4043            AliasDef {
4044                id: "acme/model-fast".to_string(),
4045                provider: "acme".to_string(),
4046                tool_format: Some("native".to_string()),
4047            },
4048        );
4049        set_user_overrides(Some(overlay));
4050
4051        let (model, provider) = resolve_model("acme-fast");
4052        assert_eq!(model, "acme/model-fast");
4053        assert_eq!(provider.as_deref(), Some("acme"));
4054        assert!(provider_names().contains(&"acme".to_string()));
4055        assert_eq!(
4056            provider_config("acme").map(|provider| provider.base_url),
4057            Some("https://llm.acme.test/v1".to_string())
4058        );
4059
4060        reset_overrides();
4061    }
4062
4063    #[test]
4064    fn test_default_tool_format_uses_capability_matrix() {
4065        reset_overrides();
4066
4067        assert_eq!(
4068            default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
4069            "native"
4070        );
4071        // devstral dropped its stale heredoc `text` pin (it has no reserved-token
4072        // constraint, so there was no structural reason to stay on heredoc) and
4073        // now inherits the global `json` text-channel default. Heredoc is still
4074        // reachable via an explicit `preferred_tool_format = "text"` pin.
4075        assert_eq!(
4076            default_tool_format("devstral-small-2:24b", "ollama"),
4077            "json"
4078        );
4079        // vLLM/SGLang-served Gemma 4 exposes OpenAI-compatible function calling,
4080        // so the local route declares native tools (matching every hosted gemma-4
4081        // sibling) rather than degrading to a text tool format.
4082        assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "native");
4083        // deepseek-v3.2 and qwen3-coder both pin `text` in the capability
4084        // matrix, so they keep heredoc rather than inheriting the json default.
4085        assert_eq!(
4086            default_tool_format("deepseek/deepseek-v3.2", "openrouter"),
4087            "text"
4088        );
4089        assert_eq!(
4090            default_tool_format("qwen/qwen3-coder-flash", "openrouter"),
4091            "text"
4092        );
4093        assert_eq!(
4094            default_tool_format("qwen/qwen3.6-flash", "openrouter"),
4095            "native"
4096        );
4097        assert_eq!(default_tool_format("z-ai/glm-5.2", "openrouter"), "text");
4098        // GPT-OSS tool defaults are provider-specific: aggregate OpenRouter and
4099        // Fireworks use Harn's heredoc text tools, as does DeepInfra — its
4100        // native Harmony channel drops tool calls into the private reasoning
4101        // channel (footgun), so it is pinned to text. Native-reliable hosts
4102        // (Cerebras, Groq) stay on provider-native tool calls.
4103        assert_eq!(
4104            default_tool_format("openai/gpt-oss-120b", "openrouter"),
4105            "text"
4106        );
4107        assert_eq!(
4108            default_tool_format("accounts/fireworks/models/gpt-oss-120b", "fireworks"),
4109            "text"
4110        );
4111        assert_eq!(default_tool_format("gpt-oss-120b", "cerebras"), "native");
4112        assert_eq!(
4113            default_tool_format("openai/gpt-oss-120b", "deepinfra"),
4114            "text"
4115        );
4116        assert_eq!(default_tool_format("openai/gpt-oss-120b", "groq"), "native");
4117    }
4118
4119    #[test]
4120    fn test_default_tool_format_unpinned_text_channel_is_json() {
4121        reset_overrides();
4122
4123        // GLOBAL DEFAULT FLIP: a model with no capability-matrix pin and no
4124        // native tool support resolves to fenced-json (`json`), not heredoc
4125        // (`text`). This is the behavior change — an unknown text-channel model
4126        // gets the delimiter-safe default. (Native-capable unknowns still get
4127        // `native`; pinned models still honor their pin, covered above.)
4128        assert_eq!(default_tool_format("mystery-model-xyz", "ollama"), "json");
4129    }
4130
4131    #[test]
4132    fn test_claude_family_defaults_native_without_host_pin() {
4133        reset_overrides();
4134
4135        // Unpinned claude-family routes on first-class tool-calling providers
4136        // resolve `native` from the capability matrix alone — no host alias
4137        // pin required. The openrouter rows exercise the family-level
4138        // catch-all: a dated slug, an unparseable version segment, and a new
4139        // family name have no versioned rule and previously fell through to
4140        // the global text-channel `json` default.
4141        for (model, provider) in [
4142            ("claude-sonnet-4-6", "anthropic"),
4143            ("claude-sonnet-5", "anthropic"),
4144            ("anthropic/claude-nova-1", "anthropic"),
4145            ("anthropic/claude-sonnet-4.6", "openrouter"),
4146            ("anthropic/claude-sonnet-5", "openrouter"),
4147            ("anthropic/claude-opus-4-5-20251101", "openrouter"),
4148            ("anthropic/claude-sonnet-next", "openrouter"),
4149            ("anthropic/claude-nova-1", "openrouter"),
4150            ("anthropic.claude-sonnet-4-6", "bedrock"),
4151        ] {
4152            assert_eq!(
4153                default_tool_format(model, provider),
4154                "native",
4155                "{provider}:{model} must default native without a host pin"
4156            );
4157        }
4158
4159        // An unpinned host alias resolves native end-to-end through
4160        // `resolve_model_info` (alias -> id -> capability matrix -> dialect
4161        // guard) — the exact seam hosts consume via `llm_resolve_model`.
4162        let overlay = parse_config_toml(
4163            "[aliases.probe-sonnet]\nid = \"claude-sonnet-4-6\"\nprovider = \"anthropic\"\n",
4164        )
4165        .expect("overlay parses");
4166        set_user_overrides(Some(overlay));
4167        let resolved = resolve_model_info("probe-sonnet");
4168        assert_eq!(resolved.provider, "anthropic");
4169        assert_eq!(
4170            resolved.tool_format, "native",
4171            "an unpinned claude alias must inherit the family-level native default"
4172        );
4173        clear_user_overrides();
4174
4175        // An explicit host pin still wins over the family default: a
4176        // text-channel `json` pin on a native-capable claude route survives
4177        // resolution (the dialect guard only corrects known-broken combos).
4178        let overlay = parse_config_toml(
4179            "[aliases.probe-sonnet-json]\nid = \"claude-sonnet-4-6\"\nprovider = \"anthropic\"\ntool_format = \"json\"\n",
4180        )
4181        .expect("overlay parses");
4182        set_user_overrides(Some(overlay));
4183        let pinned = resolve_model_info("probe-sonnet-json");
4184        assert_eq!(
4185            pinned.tool_format, "json",
4186            "an explicit host pin must win over the claude family default"
4187        );
4188        clear_user_overrides();
4189
4190        // Non-claude models keep the global text-channel `json` default —
4191        // the catch-all is family-scoped, not a provider-wide flip.
4192        assert_eq!(
4193            default_tool_format("mystery-model-xyz", "openrouter"),
4194            "json"
4195        );
4196    }
4197
4198    #[test]
4199    fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
4200        reset_overrides();
4201        let mut overlay = ProvidersConfig::default();
4202        overlay.models.insert(
4203            "acme/model-fast".to_string(),
4204            ModelDef {
4205                name: "Acme Fast".to_string(),
4206                provider: "acme".to_string(),
4207                context_window: 65_536,
4208                logical_model: None,
4209                equivalence_group: None,
4210                served_variant: None,
4211                wire_model: None,
4212                api_dialect: None,
4213                rate_limits: None,
4214                performance: None,
4215                architecture: None,
4216                local_memory: None,
4217                runtime_context_window: None,
4218                stream_timeout: Some(42.0),
4219                capabilities: vec!["tools".to_string(), "streaming".to_string()],
4220                pricing: Some(ModelPricing {
4221                    input_per_mtok: 1.25,
4222                    output_per_mtok: 2.5,
4223                    cache_read_per_mtok: Some(0.25),
4224                    cache_write_per_mtok: None,
4225                }),
4226                deprecated: false,
4227                deprecation_note: None,
4228                superseded_by: None,
4229                fast_mode: None,
4230                quality_tags: Vec::new(),
4231                availability: ModelAvailability::default(),
4232                tier: None,
4233                open_weight: None,
4234                strengths: Vec::new(),
4235                benchmarks: std::collections::BTreeMap::new(),
4236                family: None,
4237                lineage: None,
4238                complementary_with: Vec::new(),
4239                avoid_as_reviewer_for: Vec::new(),
4240            },
4241        );
4242        overlay
4243            .qc_defaults
4244            .insert("acme".to_string(), "acme/model-cheap".to_string());
4245        set_user_overrides(Some(overlay));
4246
4247        let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
4248        assert_eq!(entry.context_window, 65_536);
4249        assert_eq!(
4250            entry.capabilities,
4251            vec!["streaming".to_string(), "tools".to_string()]
4252        );
4253        assert_eq!(
4254            entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
4255            Some(1.25)
4256        );
4257        assert_eq!(
4258            pricing_per_1k_for("acme", "acme/model-fast"),
4259            Some((0.00125, 0.0025))
4260        );
4261        assert_eq!(
4262            qc_default_model("acme").as_deref(),
4263            Some("acme/model-cheap")
4264        );
4265
4266        reset_overrides();
4267    }
4268
4269    #[test]
4270    fn test_user_overrides_prepend_inference_rules() {
4271        reset_overrides();
4272        let mut overlay = ProvidersConfig::default();
4273        overlay.inference_rules.push(InferenceRule {
4274            pattern: Some("internal-*".to_string()),
4275            contains: None,
4276            exact: None,
4277            provider: "openai".to_string(),
4278        });
4279        set_user_overrides(Some(overlay));
4280
4281        assert_eq!(infer_provider("internal-foo"), "openai");
4282
4283        reset_overrides();
4284    }
4285
4286    // ── Embedded providers.toml invariants ───────────────────────────────────
4287    // These tests pin properties of the *system* — TOML parses, every
4288    // alias resolves, every deprecated model has a note — without
4289    // pinning specific catalog values. They survive future catalog
4290    // churn and surface real schema breakage.
4291
4292    #[test]
4293    fn embedded_providers_toml_parses_and_is_not_trivially_empty() {
4294        let config = default_config();
4295        assert!(
4296            config.providers.len() >= 10,
4297            "expected >=10 providers in embedded catalog, got {}",
4298            config.providers.len()
4299        );
4300        assert!(
4301            config.models.len() >= 20,
4302            "expected >=20 models in embedded catalog, got {}",
4303            config.models.len()
4304        );
4305        assert!(
4306            config.aliases.len() >= 15,
4307            "expected >=15 aliases in embedded catalog, got {}",
4308            config.aliases.len()
4309        );
4310        assert_eq!(config.default_provider.as_deref(), Some("anthropic"));
4311    }
4312
4313    #[test]
4314    fn embedded_catalog_every_deprecated_model_has_a_note() {
4315        let config = default_config();
4316        let offenders: Vec<&str> = config
4317            .models
4318            .iter()
4319            .filter(|(_, model)| {
4320                model.deprecated
4321                    && model
4322                        .deprecation_note
4323                        .as_deref()
4324                        .unwrap_or("")
4325                        .trim()
4326                        .is_empty()
4327            })
4328            .map(|(id, _)| id.as_str())
4329            .collect();
4330        assert!(
4331            offenders.is_empty(),
4332            "deprecated models missing a deprecation_note: {offenders:?}"
4333        );
4334    }
4335
4336    #[test]
4337    fn embedded_cerebras_catalog_separates_public_and_dedicated_routes() {
4338        let config = default_config();
4339        for id in ["gpt-oss-120b", "zai-glm-4.7"] {
4340            let model = config.models.get(id).expect("current public Cerebras row");
4341            assert_eq!(model.provider, "cerebras");
4342            assert_eq!(model.availability, ModelAvailability::Serverless);
4343            assert!(!model.deprecated);
4344        }
4345
4346        let llama = config
4347            .models
4348            .get("llama-3.3-70b")
4349            .expect("legacy Cerebras row");
4350        assert_eq!(llama.provider, "cerebras");
4351        assert_eq!(llama.availability, ModelAvailability::Dedicated);
4352        assert!(llama.deprecated);
4353    }
4354
4355    #[test]
4356    fn embedded_openrouter_gpt_oss_120b_has_no_fragment_bleed() {
4357        // Regression for the provider-catalog leading-key bleed: the openrouter
4358        // `openai/gpt-oss-120b` row was the last model in its fragment with no
4359        // inline tier/open_weight/strengths, so the next fragment's leading bare
4360        // keys reattached to it after raw-text concatenation — mislabeling it as
4361        // `open_weight = false` with a spurious `vision` strength. It must now be
4362        // self-described: open weight, no vision, and a tier consistent with the
4363        // rest of its equivalence group.
4364        let config = default_config();
4365        let model = config
4366            .models
4367            .get("openai/gpt-oss-120b")
4368            .expect("openrouter gpt-oss-120b row");
4369        assert_eq!(model.provider, "openrouter");
4370        assert_eq!(
4371            model.open_weight,
4372            Some(true),
4373            "gpt-oss-120b is Apache-2.0 open weight, not the bled-in open_weight=false"
4374        );
4375        assert!(
4376            !model.strengths.iter().any(|s| s == "vision"),
4377            "gpt-oss-120b is text-only; the bled-in `vision` strength must be gone: {:?}",
4378            model.strengths
4379        );
4380        assert!(
4381            !model.strengths.is_empty(),
4382            "gpt-oss-120b must carry its own strengths, not None"
4383        );
4384
4385        // tier is a property of the logical model: every active row in the
4386        // openai-gpt-oss-120b equivalence group must agree.
4387        let group_tiers: std::collections::BTreeSet<_> = config
4388            .models
4389            .values()
4390            .filter(|m| {
4391                m.equivalence_group.as_deref() == Some("openai-gpt-oss-120b") && !m.deprecated
4392            })
4393            .map(|m| m.tier.clone())
4394            .collect();
4395        assert_eq!(
4396            group_tiers.len(),
4397            1,
4398            "openai-gpt-oss-120b group must share one tier, got {group_tiers:?}"
4399        );
4400    }
4401
4402    #[test]
4403    fn embedded_catalog_every_model_targets_a_registered_provider() {
4404        let config = default_config();
4405        let known: std::collections::BTreeSet<&str> =
4406            config.providers.keys().map(String::as_str).collect();
4407        let orphans: Vec<(&str, &str)> = config
4408            .models
4409            .iter()
4410            .filter(|(_, model)| !known.contains(model.provider.as_str()))
4411            .map(|(id, model)| (id.as_str(), model.provider.as_str()))
4412            .collect();
4413        assert!(
4414            orphans.is_empty(),
4415            "models reference unknown providers: {orphans:?}"
4416        );
4417    }
4418
4419    #[test]
4420    fn embedded_catalog_every_alias_targets_a_registered_provider() {
4421        let config = default_config();
4422        let known: std::collections::BTreeSet<&str> =
4423            config.providers.keys().map(String::as_str).collect();
4424        let orphans: Vec<(&str, &str)> = config
4425            .aliases
4426            .iter()
4427            .filter(|(_, alias)| !known.contains(alias.provider.as_str()))
4428            .map(|(name, alias)| (name.as_str(), alias.provider.as_str()))
4429            .collect();
4430        assert!(
4431            orphans.is_empty(),
4432            "aliases reference unknown providers: {orphans:?}"
4433        );
4434    }
4435
4436    #[test]
4437    fn embedded_catalog_every_qc_default_targets_a_known_model() {
4438        let config = default_config();
4439        let orphans: Vec<(&str, &str)> = config
4440            .qc_defaults
4441            .iter()
4442            .filter(|(_, model_id)| !config.models.contains_key(model_id.as_str()))
4443            .map(|(provider, model_id)| (provider.as_str(), model_id.as_str()))
4444            .collect();
4445        assert!(
4446            orphans.is_empty(),
4447            "qc_defaults reference unknown models: {orphans:?}"
4448        );
4449    }
4450
4451    #[test]
4452    fn embedded_catalog_pricing_rates_are_non_negative() {
4453        let config = default_config();
4454        for (id, model) in &config.models {
4455            let Some(pricing) = &model.pricing else {
4456                continue;
4457            };
4458            assert!(
4459                pricing.input_per_mtok >= 0.0 && pricing.output_per_mtok >= 0.0,
4460                "{id}: negative pricing — in={} out={}",
4461                pricing.input_per_mtok,
4462                pricing.output_per_mtok
4463            );
4464            if let Some(rate) = pricing.cache_read_per_mtok {
4465                assert!(rate >= 0.0, "{id}: negative cache_read rate {rate}");
4466            }
4467            if let Some(rate) = pricing.cache_write_per_mtok {
4468                assert!(rate >= 0.0, "{id}: negative cache_write rate {rate}");
4469            }
4470        }
4471    }
4472
4473    #[test]
4474    fn model_availability_parses_known_strings() {
4475        assert_eq!(
4476            ModelAvailability::parse("serverless"),
4477            Some(ModelAvailability::Serverless)
4478        );
4479        assert_eq!(
4480            ModelAvailability::parse("dedicated"),
4481            Some(ModelAvailability::Dedicated)
4482        );
4483        assert_eq!(
4484            ModelAvailability::parse("unknown"),
4485            Some(ModelAvailability::Unknown)
4486        );
4487        assert_eq!(ModelAvailability::parse("provisioned"), None);
4488        for value in [
4489            ModelAvailability::Serverless,
4490            ModelAvailability::Dedicated,
4491            ModelAvailability::Unknown,
4492        ] {
4493            assert_eq!(ModelAvailability::parse(value.as_str()), Some(value));
4494        }
4495    }
4496
4497    #[test]
4498    fn embedded_catalog_marks_together_dedicated_route_as_dedicated() {
4499        let config = default_config();
4500        let model = config
4501            .models
4502            .get("Qwen/Qwen3-Coder-Next-FP8")
4503            .expect("Together Qwen3 Coder Next FP8 is cataloged");
4504        assert_eq!(model.provider, "together");
4505        assert_eq!(model.availability, ModelAvailability::Dedicated);
4506    }
4507
4508    #[test]
4509    fn embedded_catalog_dedicated_models_are_not_targeted_by_tier_aliases() {
4510        // A dedicated-only model behind a tier alias would silently fail
4511        // every serverless caller; the catalog must keep those routes
4512        // separated.
4513        let config = default_config();
4514        let dedicated: std::collections::BTreeSet<(&str, &str)> = config
4515            .models
4516            .iter()
4517            .filter(|(_, model)| model.availability == ModelAvailability::Dedicated)
4518            .map(|(id, model)| (model.provider.as_str(), id.as_str()))
4519            .collect();
4520        for (name, alias) in &config.aliases {
4521            if matches!(
4522                name.as_str(),
4523                "frontier"
4524                    | "mid"
4525                    | "small"
4526                    | "tier/frontier"
4527                    | "tier/mid"
4528                    | "tier/small"
4529                    | "sonnet"
4530                    | "opus"
4531                    | "haiku"
4532            ) {
4533                assert!(
4534                    !dedicated.contains(&(alias.provider.as_str(), alias.id.as_str())),
4535                    "tier alias `{name}` targets dedicated-only route `{}/{}`",
4536                    alias.provider,
4537                    alias.id,
4538                );
4539            }
4540        }
4541    }
4542
4543    #[test]
4544    fn embedded_catalog_tier_aliases_resolve_to_active_models() {
4545        // The three canonical tier aliases (frontier / mid / small) MUST
4546        // resolve to non-deprecated catalog entries; a default that
4547        // routes the loop into a sunsetted model is a release blocker.
4548        for alias in ["frontier", "mid", "small"] {
4549            let (model, _provider) = resolve_tier_model(alias, None)
4550                .unwrap_or_else(|| panic!("tier alias `{alias}` must resolve"));
4551            let entry = model_catalog_entry(&model).unwrap_or_else(|| {
4552                panic!("tier alias `{alias}` -> `{model}` must be a registered catalog entry")
4553            });
4554            assert!(
4555                !entry.deprecated,
4556                "tier alias `{alias}` resolves to deprecated model `{model}` ({:?})",
4557                entry.deprecation_note
4558            );
4559        }
4560    }
4561
4562    #[test]
4563    fn opus_alias_tracks_claude_opus_4_8_with_fast_mode() {
4564        // The `opus` alias must follow the newest Opus release, and that
4565        // release advertises its (off-by-default) fast-mode tier.
4566        let (model, provider) = resolve_model("opus");
4567        assert_eq!(model, "claude-opus-4-8");
4568        assert_eq!(provider.as_deref(), Some("anthropic"));
4569
4570        let opus48 = model_catalog_entry("claude-opus-4-8").expect("opus 4.8 catalog entry");
4571        assert!(!opus48.deprecated, "newest Opus must not be deprecated");
4572        let fast = opus48.fast_mode.expect("opus 4.8 advertises fast mode");
4573        assert_eq!(fast.param, "speed");
4574        assert_eq!(fast.value, "fast");
4575        assert_eq!(fast.status.as_deref(), Some("research_preview"));
4576        let fast_pricing = fast.pricing.expect("fast mode carries premium pricing");
4577        let standard = opus48.pricing.expect("opus 4.8 standard pricing");
4578        assert!(
4579            fast_pricing.input_per_mtok > standard.input_per_mtok,
4580            "fast mode must be premium-priced relative to standard"
4581        );
4582    }
4583
4584    #[test]
4585    fn superseded_opus_models_point_at_claude_opus_4_8() {
4586        // Earlier Opus rows are deprecated and carry a structured
4587        // `superseded_by` pointer to the current flagship.
4588        for model in ["claude-opus-4-7", "claude-opus-4-6"] {
4589            let entry =
4590                model_catalog_entry(model).unwrap_or_else(|| panic!("{model} catalog entry"));
4591            assert!(entry.deprecated, "{model} should be deprecated");
4592            assert_eq!(
4593                entry.superseded_by.as_deref(),
4594                Some("claude-opus-4-8"),
4595                "{model} should be superseded by claude-opus-4-8"
4596            );
4597        }
4598    }
4599
4600    #[test]
4601    fn opus_46_no_longer_advertises_fast_mode() {
4602        let opus46 = model_catalog_entry("claude-opus-4-6").expect("opus 4.6 catalog entry");
4603        assert!(
4604            opus46.fast_mode.is_none(),
4605            "Anthropic removed Opus 4.6 fast mode on 2026-06-29; Harn should not advertise it"
4606        );
4607
4608        let opus47 = model_catalog_entry("claude-opus-4-7").expect("opus 4.7 catalog entry");
4609        assert!(
4610            opus47.fast_mode.is_some(),
4611            "Opus 4.7 still advertises its own fast-mode tier"
4612        );
4613    }
4614
4615    #[test]
4616    fn gpt_5_5_fast_mode_rides_service_tier() {
4617        // Fast mode is provider-agnostic: OpenAI exposes it through the
4618        // `service_tier` knob rather than Anthropic's `speed`.
4619        let entry = model_catalog_entry("gpt-5.5").expect("gpt-5.5 catalog entry");
4620        let fast = entry.fast_mode.expect("gpt-5.5 advertises a fast tier");
4621        assert_eq!(fast.param, "service_tier");
4622        assert_eq!(fast.status.as_deref(), Some("ga"));
4623    }
4624}
harn_vm/llm_config.rs

harn_vm/
llm_config.rs