1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::{BTreeMap, BTreeSet};
4use std::sync::atomic::{AtomicBool, Ordering};
5use std::sync::{OnceLock, RwLock};
6
7static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
8static CONFIG_PATH: OnceLock<String> = OnceLock::new();
9static RUNTIME_CATALOG_OVERLAY: OnceLock<RwLock<Option<ProvidersConfig>>> = OnceLock::new();
10
11thread_local! {
12 static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
17}
18
19#[derive(Debug, Clone, Deserialize, Default)]
20pub struct ProvidersConfig {
21 #[serde(default)]
22 pub default_provider: Option<String>,
23 #[serde(default)]
24 pub providers: BTreeMap<String, ProviderDef>,
25 #[serde(default)]
26 pub aliases: BTreeMap<String, AliasDef>,
27 #[serde(default)]
28 pub alias_tool_calling: BTreeMap<String, AliasToolCallingDef>,
29 #[serde(default)]
30 pub models: BTreeMap<String, ModelDef>,
31 #[serde(default)]
32 pub qc_defaults: BTreeMap<String, String>,
33 #[serde(default)]
34 pub inference_rules: Vec<InferenceRule>,
35 #[serde(default)]
36 pub tier_rules: Vec<TierRule>,
37 #[serde(default)]
38 pub tier_defaults: TierDefaults,
39 #[serde(default)]
40 pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
41 #[serde(default)]
42 pub model_roles: BTreeMap<String, BTreeMap<String, toml::Value>>,
43 #[serde(default)]
44 pub suppress: SuppressDef,
45 #[serde(default)]
46 pub patch: PatchDef,
47}
48
49#[derive(Debug, Clone, Deserialize, Default, PartialEq)]
82pub struct PatchDef {
83 #[serde(default)]
86 pub models: BTreeMap<String, toml::Value>,
87}
88
89#[derive(Debug, Clone, Deserialize, Default, PartialEq, Eq)]
104pub struct SuppressDef {
105 #[serde(default)]
110 pub routes: Vec<String>,
111}
112
113impl ProvidersConfig {
114 pub fn is_empty(&self) -> bool {
115 self.default_provider.is_none()
116 && self.providers.is_empty()
117 && self.aliases.is_empty()
118 && self.alias_tool_calling.is_empty()
119 && self.models.is_empty()
120 && self.qc_defaults.is_empty()
121 && self.inference_rules.is_empty()
122 && self.tier_rules.is_empty()
123 && self.model_defaults.is_empty()
124 && self.model_roles.is_empty()
125 && self.suppress.routes.is_empty()
126 && self.patch.models.is_empty()
127 && self.tier_defaults.default == default_mid()
128 }
129
130 pub fn dangling_model_patches(&self) -> Vec<&str> {
136 self.patch
137 .models
138 .keys()
139 .filter(|id| !self.models.contains_key(*id))
140 .map(String::as_str)
141 .collect()
142 }
143
144 pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
145 for (name, provider) in &overlay.providers {
146 match self.providers.get_mut(name) {
147 Some(existing) => existing.merge_from(provider),
148 None => {
149 self.providers.insert(name.clone(), provider.clone());
150 }
151 }
152 }
153 self.aliases.extend(overlay.aliases.clone());
154 self.alias_tool_calling
155 .extend(overlay.alias_tool_calling.clone());
156 self.models.extend(overlay.models.clone());
157 self.qc_defaults.extend(overlay.qc_defaults.clone());
158
159 if !overlay.patch.models.is_empty() || !self.patch.models.is_empty() {
174 for (id, patch) in &overlay.patch.models {
175 match self.patch.models.get_mut(id) {
176 Some(existing) => deep_merge_toml(existing, patch),
177 None => {
178 self.patch.models.insert(id.clone(), patch.clone());
179 }
180 }
181 }
182 apply_model_patches(&mut self.models, &self.patch.models);
183 }
184
185 if overlay.default_provider.is_some() {
186 self.default_provider = overlay.default_provider.clone();
187 }
188
189 if !overlay.inference_rules.is_empty() {
190 let mut merged = overlay.inference_rules.clone();
191 merged.extend(self.inference_rules.clone());
192 self.inference_rules = merged;
193 }
194
195 if !overlay.tier_rules.is_empty() {
196 let mut merged = overlay.tier_rules.clone();
197 merged.extend(self.tier_rules.clone());
198 self.tier_rules = merged;
199 }
200
201 if overlay.tier_defaults.default != default_mid() {
202 self.tier_defaults = overlay.tier_defaults.clone();
203 }
204
205 for (pattern, defaults) in &overlay.model_defaults {
206 self.model_defaults
207 .entry(pattern.clone())
208 .or_default()
209 .extend(defaults.clone());
210 }
211
212 for (role, defaults) in &overlay.model_roles {
213 self.model_roles
214 .entry(role.clone())
215 .or_default()
216 .extend(defaults.clone());
217 }
218
219 for route in &overlay.suppress.routes {
220 if !self.suppress.routes.contains(route) {
221 self.suppress.routes.push(route.clone());
222 }
223 }
224 }
225}
226
227fn deep_merge_toml(base: &mut toml::Value, overlay: &toml::Value) {
233 match (base, overlay) {
234 (toml::Value::Table(base_table), toml::Value::Table(overlay_table)) => {
235 for (key, overlay_value) in overlay_table {
236 match base_table.get_mut(key) {
237 Some(base_value) => deep_merge_toml(base_value, overlay_value),
238 None => {
239 base_table.insert(key.clone(), overlay_value.clone());
240 }
241 }
242 }
243 }
244 (base_slot, overlay_value) => *base_slot = overlay_value.clone(),
245 }
246}
247
248static MODEL_PATCH_TYPE_ERROR_WARNED: AtomicBool = AtomicBool::new(false);
252
253fn apply_model_patches(
263 models: &mut BTreeMap<String, ModelDef>,
264 patches: &BTreeMap<String, toml::Value>,
265) {
266 for (id, patch) in patches {
267 let Some(base) = models.get(id) else {
268 continue;
269 };
270 match patched_model_row(base, patch) {
271 Ok(patched) => {
272 models.insert(id.clone(), patched);
273 }
274 Err(error) => {
275 if !MODEL_PATCH_TYPE_ERROR_WARNED.swap(true, Ordering::Relaxed) {
276 eprintln!(
277 "[llm_config] invalid [patch.models.\"{id}\"] overlay \
278 (keeping the unpatched row): {error}"
279 );
280 }
281 }
282 }
283 }
284}
285
286fn patched_model_row(base: &ModelDef, patch: &toml::Value) -> Result<ModelDef, String> {
289 let mut value = toml::Value::try_from(base)
290 .map_err(|error| format!("serialize base row for patching: {error}"))?;
291 deep_merge_toml(&mut value, patch);
292 ModelDef::deserialize(value).map_err(|error| error.to_string())
293}
294
295#[derive(Debug, Clone)]
296pub struct ProviderDef {
297 pub display_name: Option<String>,
298 pub icon: Option<String>,
299 pub protocol: Option<String>,
303 pub base_url: String,
304 pub base_url_env: Option<String>,
305 pub auth_style: String,
306 pub auth_header: Option<String>,
307 pub auth_env: AuthEnv,
308 pub extra_headers: BTreeMap<String, String>,
309 pub chat_endpoint: String,
310 pub completion_endpoint: Option<String>,
311 pub command: Option<String>,
312 pub args: Vec<String>,
313 pub env: BTreeMap<String, String>,
314 pub cwd: Option<String>,
315 pub mcp_servers: Vec<serde_json::Value>,
316 pub healthcheck: Option<HealthcheckDef>,
317 pub local_runtime: Option<LocalRuntimeDef>,
321 pub features: Vec<String>,
322 pub fallback: Option<String>,
324 pub retry_count: Option<u32>,
326 pub retry_delay_ms: Option<u64>,
328 pub rpm: Option<u32>,
330 pub rate_limits: Option<RateLimitsDef>,
334 pub cost_per_1k_in: Option<f64>,
336 pub cost_per_1k_out: Option<f64>,
338 pub latency_p50_ms: Option<u64>,
340 pub performance: Option<ServingPerformanceDef>,
342 #[doc(hidden)]
343 pub auth_style_explicit: bool,
344}
345
346#[derive(Debug, Clone, Deserialize)]
347struct ProviderDefWire {
348 #[serde(default)]
349 display_name: Option<String>,
350 #[serde(default)]
351 icon: Option<String>,
352 #[serde(default)]
353 protocol: Option<String>,
354 #[serde(default)]
355 base_url: String,
356 #[serde(default)]
357 base_url_env: Option<String>,
358 #[serde(default)]
359 auth_style: Option<String>,
360 #[serde(default)]
361 auth_header: Option<String>,
362 #[serde(default)]
363 auth_env: AuthEnv,
364 #[serde(default)]
365 extra_headers: BTreeMap<String, String>,
366 #[serde(default)]
367 chat_endpoint: String,
368 #[serde(default)]
369 completion_endpoint: Option<String>,
370 #[serde(default)]
371 command: Option<String>,
372 #[serde(default)]
373 args: Vec<String>,
374 #[serde(default)]
375 env: BTreeMap<String, String>,
376 #[serde(default)]
377 cwd: Option<String>,
378 #[serde(default)]
379 mcp_servers: Vec<serde_json::Value>,
380 #[serde(default)]
381 healthcheck: Option<HealthcheckDef>,
382 #[serde(default)]
383 local_runtime: Option<LocalRuntimeDef>,
384 #[serde(default)]
385 features: Vec<String>,
386 #[serde(default)]
387 fallback: Option<String>,
388 #[serde(default)]
389 retry_count: Option<u32>,
390 #[serde(default)]
391 retry_delay_ms: Option<u64>,
392 #[serde(default)]
393 rpm: Option<u32>,
394 #[serde(default)]
395 rate_limits: Option<RateLimitsDef>,
396 #[serde(default)]
397 cost_per_1k_in: Option<f64>,
398 #[serde(default)]
399 cost_per_1k_out: Option<f64>,
400 #[serde(default)]
401 latency_p50_ms: Option<u64>,
402 #[serde(default)]
403 performance: Option<ServingPerformanceDef>,
404}
405
406impl<'de> Deserialize<'de> for ProviderDef {
407 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
408 where
409 D: serde::Deserializer<'de>,
410 {
411 let wire = ProviderDefWire::deserialize(deserializer)?;
412 let auth_style_explicit = wire.auth_style.is_some();
413 Ok(Self {
414 display_name: wire.display_name,
415 icon: wire.icon,
416 protocol: wire.protocol,
417 base_url: wire.base_url,
418 base_url_env: wire.base_url_env,
419 auth_style: wire.auth_style.unwrap_or_else(default_bearer),
420 auth_header: wire.auth_header,
421 auth_env: wire.auth_env,
422 extra_headers: wire.extra_headers,
423 chat_endpoint: wire.chat_endpoint,
424 completion_endpoint: wire.completion_endpoint,
425 command: wire.command,
426 args: wire.args,
427 env: wire.env,
428 cwd: wire.cwd,
429 mcp_servers: wire.mcp_servers,
430 healthcheck: wire.healthcheck,
431 local_runtime: wire.local_runtime,
432 features: wire.features,
433 fallback: wire.fallback,
434 retry_count: wire.retry_count,
435 retry_delay_ms: wire.retry_delay_ms,
436 rpm: wire.rpm,
437 rate_limits: wire.rate_limits,
438 cost_per_1k_in: wire.cost_per_1k_in,
439 cost_per_1k_out: wire.cost_per_1k_out,
440 latency_p50_ms: wire.latency_p50_ms,
441 performance: wire.performance,
442 auth_style_explicit,
443 })
444 }
445}
446
447impl Default for ProviderDef {
448 fn default() -> Self {
449 Self {
450 display_name: None,
451 icon: None,
452 protocol: None,
453 base_url: String::new(),
454 base_url_env: None,
455 auth_style: default_bearer(),
456 auth_header: None,
457 auth_env: AuthEnv::None,
458 extra_headers: BTreeMap::new(),
459 chat_endpoint: String::new(),
460 completion_endpoint: None,
461 command: None,
462 args: Vec::new(),
463 env: BTreeMap::new(),
464 cwd: None,
465 mcp_servers: Vec::new(),
466 healthcheck: None,
467 local_runtime: None,
468 features: Vec::new(),
469 fallback: None,
470 retry_count: None,
471 retry_delay_ms: None,
472 rpm: None,
473 rate_limits: None,
474 cost_per_1k_in: None,
475 cost_per_1k_out: None,
476 latency_p50_ms: None,
477 performance: None,
478 auth_style_explicit: false,
479 }
480 }
481}
482
483impl ProviderDef {
484 fn merge_from(&mut self, overlay: &ProviderDef) {
485 merge_option(&mut self.display_name, &overlay.display_name);
486 merge_option(&mut self.icon, &overlay.icon);
487 merge_option(&mut self.protocol, &overlay.protocol);
488 merge_string(&mut self.base_url, &overlay.base_url);
489 merge_option(&mut self.base_url_env, &overlay.base_url_env);
490 let overlay_uses_default_auth_style = overlay.auth_style == default_bearer();
491 if overlay.auth_style_explicit
492 || !overlay_uses_default_auth_style
493 || self.auth_style == default_bearer()
494 {
495 self.auth_style = overlay.auth_style.clone();
496 self.auth_style_explicit |=
497 overlay.auth_style_explicit || !overlay_uses_default_auth_style;
498 }
499 merge_option(&mut self.auth_header, &overlay.auth_header);
500 if !overlay.auth_env.is_none() {
501 self.auth_env = overlay.auth_env.clone();
502 }
503 self.extra_headers.extend(overlay.extra_headers.clone());
504 merge_string(&mut self.chat_endpoint, &overlay.chat_endpoint);
505 merge_option(&mut self.completion_endpoint, &overlay.completion_endpoint);
506 merge_option(&mut self.command, &overlay.command);
507 merge_vec(&mut self.args, &overlay.args);
508 self.env.extend(overlay.env.clone());
509 merge_option(&mut self.cwd, &overlay.cwd);
510 merge_vec(&mut self.mcp_servers, &overlay.mcp_servers);
511 merge_option(&mut self.healthcheck, &overlay.healthcheck);
512 merge_option(&mut self.local_runtime, &overlay.local_runtime);
513 merge_vec(&mut self.features, &overlay.features);
514 merge_option(&mut self.fallback, &overlay.fallback);
515 merge_option(&mut self.retry_count, &overlay.retry_count);
516 merge_option(&mut self.retry_delay_ms, &overlay.retry_delay_ms);
517 merge_option(&mut self.rpm, &overlay.rpm);
518 merge_option(&mut self.rate_limits, &overlay.rate_limits);
519 merge_option(&mut self.cost_per_1k_in, &overlay.cost_per_1k_in);
520 merge_option(&mut self.cost_per_1k_out, &overlay.cost_per_1k_out);
521 merge_option(&mut self.latency_p50_ms, &overlay.latency_p50_ms);
522 merge_option(&mut self.performance, &overlay.performance);
523 }
524}
525
526fn merge_option<T: Clone>(base: &mut Option<T>, overlay: &Option<T>) {
527 if overlay.is_some() {
528 *base = overlay.clone();
529 }
530}
531
532fn merge_string(base: &mut String, overlay: &str) {
533 if !overlay.is_empty() {
534 *base = overlay.to_string();
535 }
536}
537
538fn merge_vec<T: Clone>(base: &mut Vec<T>, overlay: &[T]) {
539 if !overlay.is_empty() {
540 *base = overlay.to_vec();
541 }
542}
543
544fn default_bearer() -> String {
545 "bearer".to_string()
546}
547
548#[derive(Debug, Clone, Deserialize, Default)]
551#[serde(untagged)]
552pub enum AuthEnv {
553 #[default]
554 None,
555 Single(String),
556 Multiple(Vec<String>),
557}
558
559impl AuthEnv {
560 fn is_none(&self) -> bool {
561 matches!(self, AuthEnv::None)
562 }
563}
564
565#[derive(Debug, Clone, Deserialize)]
566pub struct HealthcheckDef {
567 pub method: String,
568 #[serde(default)]
569 pub path: Option<String>,
570 #[serde(default)]
571 pub url: Option<String>,
572 #[serde(default)]
573 pub body: Option<String>,
574}
575
576#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
577pub struct LocalRuntimeDef {
578 #[serde(default, skip_serializing_if = "Option::is_none")]
581 pub kind: Option<String>,
582 #[serde(default, skip_serializing_if = "Option::is_none")]
584 pub command: Option<String>,
585 #[serde(default, skip_serializing_if = "Vec::is_empty")]
588 pub prefix_args: Vec<String>,
589 #[serde(default, skip_serializing_if = "Option::is_none")]
592 pub model_source: Option<String>,
593 #[serde(default, skip_serializing_if = "Option::is_none")]
595 pub model_source_env: Option<String>,
596 #[serde(default, skip_serializing_if = "Option::is_none")]
598 pub default_port: Option<u16>,
599 #[serde(default, skip_serializing_if = "Option::is_none")]
601 pub model_arg: Option<String>,
602 #[serde(default, skip_serializing_if = "Option::is_none")]
603 pub served_model_arg: Option<String>,
604 #[serde(default, skip_serializing_if = "Option::is_none")]
605 pub host_arg: Option<String>,
606 #[serde(default, skip_serializing_if = "Option::is_none")]
607 pub port_arg: Option<String>,
608 #[serde(default, skip_serializing_if = "Option::is_none")]
609 pub ctx_arg: Option<String>,
610 #[serde(default, skip_serializing_if = "Option::is_none")]
611 pub parallel_arg: Option<String>,
612 #[serde(default, skip_serializing_if = "Option::is_none")]
613 pub gpu_layers_arg: Option<String>,
614 #[serde(default, skip_serializing_if = "Option::is_none")]
615 pub cache_type_k_arg: Option<String>,
616 #[serde(default, skip_serializing_if = "Option::is_none")]
617 pub cache_type_v_arg: Option<String>,
618 #[serde(default, skip_serializing_if = "Option::is_none")]
619 pub cache_ram_arg: Option<String>,
620 #[serde(default, skip_serializing_if = "Option::is_none")]
622 pub enable_lora_arg: Option<String>,
623 #[serde(default, skip_serializing_if = "Option::is_none")]
625 pub lora_modules_arg: Option<String>,
626 #[serde(default, skip_serializing_if = "Option::is_none")]
628 pub lora_modules_value_format: Option<String>,
629 #[serde(default, skip_serializing_if = "Option::is_none")]
631 pub max_lora_rank_arg: Option<String>,
632 #[serde(default, skip_serializing_if = "Vec::is_empty")]
634 pub default_args: Vec<String>,
635 #[serde(default, skip_serializing_if = "Option::is_none")]
637 pub stop: Option<String>,
638 #[serde(default, skip_serializing_if = "Option::is_none")]
640 pub source_url: Option<String>,
641 #[serde(default, skip_serializing_if = "Option::is_none")]
643 pub last_verified: Option<String>,
644 #[serde(default, skip_serializing_if = "Option::is_none")]
646 pub notes: Option<String>,
647}
648
649#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
650pub struct LocalMemoryDef {
651 #[serde(default, skip_serializing_if = "Option::is_none")]
653 pub measured_resident_gib: Option<f64>,
654 #[serde(default, skip_serializing_if = "Option::is_none")]
656 pub measured_context_window: Option<u64>,
657 #[serde(default, skip_serializing_if = "Option::is_none")]
659 pub measured_cache_type: Option<String>,
660 #[serde(default, skip_serializing_if = "Option::is_none")]
662 pub base_resident_gib: Option<f64>,
663 #[serde(default, skip_serializing_if = "Option::is_none")]
666 pub kv_cache_gib_per_1k_ctx: Option<f64>,
667 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
669 pub cache_type_multipliers: BTreeMap<String, f64>,
670 #[serde(default, skip_serializing_if = "Option::is_none")]
672 pub default_cache_type: Option<String>,
673 #[serde(default, skip_serializing_if = "Option::is_none")]
675 pub safety_margin_gib: Option<f64>,
676 #[serde(default, skip_serializing_if = "Option::is_none")]
678 pub max_recommended_context: Option<u64>,
679 #[serde(default, skip_serializing_if = "Option::is_none")]
681 pub source_url: Option<String>,
682 #[serde(default, skip_serializing_if = "Option::is_none")]
684 pub last_verified: Option<String>,
685 #[serde(default, skip_serializing_if = "Option::is_none")]
687 pub notes: Option<String>,
688}
689
690impl LocalMemoryDef {
691 pub fn is_empty(&self) -> bool {
692 self.measured_resident_gib.is_none()
693 && self.measured_context_window.is_none()
694 && self.measured_cache_type.is_none()
695 && self.base_resident_gib.is_none()
696 && self.kv_cache_gib_per_1k_ctx.is_none()
697 && self.cache_type_multipliers.is_empty()
698 && self.default_cache_type.is_none()
699 && self.safety_margin_gib.is_none()
700 && self.max_recommended_context.is_none()
701 && self.source_url.is_none()
702 && self.last_verified.is_none()
703 && self.notes.is_none()
704 }
705}
706
707#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
708pub struct AliasDef {
709 pub id: String,
710 pub provider: String,
711 #[serde(default)]
716 pub tool_format: Option<String>,
717}
718
719#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
720pub struct AliasToolCallingDef {
721 #[serde(default)]
722 #[serde(skip_serializing_if = "Option::is_none")]
723 pub native: Option<String>,
724 #[serde(default)]
725 #[serde(skip_serializing_if = "Option::is_none")]
726 pub text: Option<String>,
727 #[serde(default)]
728 #[serde(skip_serializing_if = "Option::is_none")]
729 pub streaming_native: Option<String>,
730 #[serde(default)]
731 #[serde(skip_serializing_if = "Option::is_none")]
732 pub fallback_mode: Option<String>,
733 #[serde(default)]
734 #[serde(skip_serializing_if = "Option::is_none")]
735 pub failure_reason: Option<String>,
736 #[serde(default)]
737 #[serde(skip_serializing_if = "Option::is_none")]
738 pub last_probe_at: Option<String>,
739}
740
741#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
742pub struct ModelPricing {
743 pub input_per_mtok: f64,
744 pub output_per_mtok: f64,
745 #[serde(default)]
746 pub cache_read_per_mtok: Option<f64>,
747 #[serde(default)]
748 pub cache_write_per_mtok: Option<f64>,
749}
750
751#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
754pub struct RateLimitsDef {
755 #[serde(default, skip_serializing_if = "Option::is_none")]
757 pub rpm: Option<u32>,
758 #[serde(default, skip_serializing_if = "Option::is_none")]
760 pub rph: Option<u32>,
761 #[serde(default, skip_serializing_if = "Option::is_none")]
763 pub rpd: Option<u32>,
764 #[serde(default, skip_serializing_if = "Option::is_none")]
766 pub tpm: Option<u64>,
767 #[serde(default, skip_serializing_if = "Option::is_none")]
769 pub tph: Option<u64>,
770 #[serde(default, skip_serializing_if = "Option::is_none")]
772 pub tpd: Option<u64>,
773 #[serde(default, skip_serializing_if = "Option::is_none")]
775 pub input_tpm: Option<u64>,
776 #[serde(default, skip_serializing_if = "Option::is_none")]
778 pub output_tpm: Option<u64>,
779 #[serde(default, skip_serializing_if = "Option::is_none")]
781 pub concurrency: Option<u32>,
782 #[serde(default, skip_serializing_if = "Option::is_none")]
784 pub tier: Option<String>,
785 #[serde(default, skip_serializing_if = "Option::is_none")]
787 pub source_url: Option<String>,
788 #[serde(default, skip_serializing_if = "Option::is_none")]
790 pub last_verified: Option<String>,
791 #[serde(default, skip_serializing_if = "Option::is_none")]
793 pub notes: Option<String>,
794}
795
796impl RateLimitsDef {
797 pub fn is_empty(&self) -> bool {
798 self.rpm.is_none()
799 && self.rph.is_none()
800 && self.rpd.is_none()
801 && self.tpm.is_none()
802 && self.tph.is_none()
803 && self.tpd.is_none()
804 && self.input_tpm.is_none()
805 && self.output_tpm.is_none()
806 && self.concurrency.is_none()
807 && self.tier.is_none()
808 && self.source_url.is_none()
809 && self.last_verified.is_none()
810 && self.notes.is_none()
811 }
812
813 pub fn with_rpm_fallback(mut self, rpm: Option<u32>) -> Option<Self> {
814 if self.rpm.is_none() {
815 self.rpm = rpm;
816 }
817 (!self.is_empty()).then_some(self)
818 }
819}
820
821#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
825pub struct ServingPerformanceDef {
826 #[serde(default, skip_serializing_if = "Option::is_none")]
828 pub observed_ttft_ms: Option<u64>,
829 #[serde(default, skip_serializing_if = "Option::is_none")]
831 pub output_tokens_per_sec: Option<f64>,
832 #[serde(default, skip_serializing_if = "Option::is_none")]
835 pub time_to_answer_s: Option<f64>,
836 #[serde(default, skip_serializing_if = "Option::is_none")]
839 pub source: Option<String>,
840 #[serde(default, skip_serializing_if = "Option::is_none")]
842 pub source_url: Option<String>,
843 #[serde(default, skip_serializing_if = "Option::is_none")]
845 pub last_verified: Option<String>,
846 #[serde(default, skip_serializing_if = "Option::is_none")]
848 pub sample_size: Option<u32>,
849 #[serde(default, skip_serializing_if = "Option::is_none")]
851 pub notes: Option<String>,
852}
853
854impl ServingPerformanceDef {
855 pub fn is_empty(&self) -> bool {
856 self.observed_ttft_ms.is_none()
857 && self.output_tokens_per_sec.is_none()
858 && self.time_to_answer_s.is_none()
859 && self.source.is_none()
860 && self.source_url.is_none()
861 && self.last_verified.is_none()
862 && self.sample_size.is_none()
863 && self.notes.is_none()
864 }
865}
866
867#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
871pub struct ModelArchitectureDef {
872 #[serde(default, skip_serializing_if = "Option::is_none")]
874 pub parameter_count_b: Option<f64>,
875 #[serde(default, skip_serializing_if = "Option::is_none")]
877 pub active_parameter_count_b: Option<f64>,
878 #[serde(default, skip_serializing_if = "Option::is_none")]
880 pub moe: Option<bool>,
881 #[serde(default, skip_serializing_if = "Option::is_none")]
883 pub quantization: Option<String>,
884 #[serde(default, skip_serializing_if = "Option::is_none")]
886 pub precision: Option<String>,
887 #[serde(default, skip_serializing_if = "Option::is_none")]
889 pub license: Option<String>,
890 #[serde(default, skip_serializing_if = "Option::is_none")]
892 pub tokenizer: Option<String>,
893 #[serde(default, skip_serializing_if = "Option::is_none")]
895 pub knowledge_cutoff: Option<String>,
896 #[serde(default, skip_serializing_if = "Option::is_none")]
898 pub source_url: Option<String>,
899 #[serde(default, skip_serializing_if = "Option::is_none")]
901 pub last_verified: Option<String>,
902}
903
904impl ModelArchitectureDef {
905 pub fn is_empty(&self) -> bool {
906 self.parameter_count_b.is_none()
907 && self.active_parameter_count_b.is_none()
908 && self.moe.is_none()
909 && self.quantization.is_none()
910 && self.precision.is_none()
911 && self.license.is_none()
912 && self.tokenizer.is_none()
913 && self.knowledge_cutoff.is_none()
914 && self.source_url.is_none()
915 && self.last_verified.is_none()
916 }
917}
918
919#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
930pub struct FastModeDef {
931 pub param: String,
934 pub value: String,
936 #[serde(default)]
939 pub beta_header: Option<String>,
940 #[serde(default)]
942 pub otps_speedup: Option<f64>,
943 #[serde(default)]
946 pub status: Option<String>,
947 #[serde(default)]
950 pub pricing: Option<ModelPricing>,
951 #[serde(default)]
953 pub note: Option<String>,
954}
955
956#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
957pub struct ModelDef {
958 pub name: String,
959 pub provider: String,
960 pub context_window: u64,
961 #[serde(default)]
964 pub logical_model: Option<String>,
965 #[serde(default)]
969 pub equivalence_group: Option<String>,
970 #[serde(default)]
973 pub served_variant: Option<String>,
974 #[serde(default)]
978 pub wire_model: Option<String>,
979 #[serde(default)]
982 pub api_dialect: Option<String>,
983 #[serde(default)]
985 pub rate_limits: Option<RateLimitsDef>,
986 #[serde(default)]
988 pub performance: Option<ServingPerformanceDef>,
989 #[serde(default)]
991 pub architecture: Option<ModelArchitectureDef>,
992 #[serde(default)]
994 pub local_memory: Option<LocalMemoryDef>,
995 #[serde(default)]
996 pub runtime_context_window: Option<u64>,
997 #[serde(default)]
998 pub stream_timeout: Option<f64>,
999 #[serde(default)]
1000 pub capabilities: Vec<String>,
1001 #[serde(default)]
1002 pub pricing: Option<ModelPricing>,
1003 #[serde(default)]
1004 pub deprecated: bool,
1005 #[serde(default)]
1006 pub deprecation_note: Option<String>,
1007 #[serde(default)]
1015 pub superseded_by: Option<String>,
1016 #[serde(default)]
1020 pub fast_mode: Option<FastModeDef>,
1021 #[serde(default)]
1022 pub quality_tags: Vec<String>,
1023 #[serde(default)]
1029 pub availability: ModelAvailability,
1030 #[serde(default)]
1037 pub tier: Option<String>,
1038 #[serde(default)]
1043 pub open_weight: Option<bool>,
1044 #[serde(default)]
1049 pub strengths: Vec<String>,
1050 #[serde(default)]
1056 pub benchmarks: BTreeMap<String, f64>,
1057 #[serde(default)]
1062 pub family: Option<String>,
1063 #[serde(default)]
1065 pub lineage: Option<String>,
1066 #[serde(default)]
1068 pub complementary_with: Vec<String>,
1069 #[serde(default)]
1072 pub avoid_as_reviewer_for: Vec<String>,
1073}
1074
1075#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Default)]
1076#[serde(rename_all = "snake_case")]
1077pub enum ModelAvailability {
1078 #[default]
1082 Serverless,
1083 Dedicated,
1087 Unknown,
1091}
1092
1093impl ModelAvailability {
1094 pub fn as_str(self) -> &'static str {
1095 match self {
1096 Self::Serverless => "serverless",
1097 Self::Dedicated => "dedicated",
1098 Self::Unknown => "unknown",
1099 }
1100 }
1101
1102 pub fn parse(value: &str) -> Option<Self> {
1103 match value {
1104 "serverless" => Some(Self::Serverless),
1105 "dedicated" => Some(Self::Dedicated),
1106 "unknown" => Some(Self::Unknown),
1107 _ => None,
1108 }
1109 }
1110}
1111
1112#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
1113pub struct ResolvedModel {
1114 pub id: String,
1115 pub provider: String,
1116 pub alias: Option<String>,
1117 pub tool_format: String,
1118 pub tier: String,
1119 pub family: String,
1120 pub lineage: String,
1121}
1122
1123#[derive(Debug, Clone, PartialEq)]
1124pub struct ComplementaryReviewerOptions {
1125 pub author_model: String,
1126 pub author_provider: Option<String>,
1127 pub intent: ComplementaryReviewerIntent,
1128 pub max_price_multiplier: Option<f64>,
1129}
1130
1131#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1132pub enum ComplementaryReviewerIntent {
1133 Review,
1134 Critique,
1135 PlanReview,
1136}
1137
1138impl ComplementaryReviewerIntent {
1139 pub fn parse(value: &str) -> Option<Self> {
1140 match value {
1141 "review" => Some(Self::Review),
1142 "critique" => Some(Self::Critique),
1143 "plan_review" => Some(Self::PlanReview),
1144 _ => None,
1145 }
1146 }
1147
1148 pub fn as_str(self) -> &'static str {
1149 match self {
1150 Self::Review => "review",
1151 Self::Critique => "critique",
1152 Self::PlanReview => "plan_review",
1153 }
1154 }
1155}
1156
1157#[derive(Debug, Clone, Serialize, PartialEq)]
1158pub struct ComplementaryReviewerSelection {
1159 pub intent: String,
1160 pub author: ComplementaryModelIdentity,
1161 pub reviewer: ComplementaryModelIdentity,
1162 pub fallback: bool,
1163 pub fallback_reason: Option<String>,
1164 #[serde(skip_serializing_if = "Option::is_none")]
1170 pub fallback_code: Option<String>,
1171 pub reason: String,
1172 pub estimated_incremental_cost: Option<ComplementaryCostEstimate>,
1173}
1174
1175#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1180pub enum ReviewerFallbackCode {
1181 UnknownAuthorFamily,
1184 NoDiffFamilyWithinPrice,
1186 NoDiffFamilyServerless,
1188 AllDiffFamilyExcluded,
1191}
1192
1193impl ReviewerFallbackCode {
1194 pub fn as_code(self) -> &'static str {
1195 match self {
1196 Self::UnknownAuthorFamily => "unknown_author_family",
1197 Self::NoDiffFamilyWithinPrice => "no_diff_family_within_price",
1198 Self::NoDiffFamilyServerless => "no_diff_family_serverless",
1199 Self::AllDiffFamilyExcluded => "all_diff_family_excluded",
1200 }
1201 }
1202}
1203
1204#[derive(Debug, Clone, Serialize, PartialEq)]
1205pub struct ComplementaryModelIdentity {
1206 pub id: String,
1207 pub provider: String,
1208 pub family: String,
1209 pub lineage: String,
1210 pub tier: String,
1211 #[serde(skip_serializing_if = "Option::is_none")]
1212 pub pricing: Option<ModelPricing>,
1213}
1214
1215#[derive(Debug, Clone, Serialize, PartialEq)]
1216pub struct ComplementaryCostEstimate {
1217 pub input_per_mtok: f64,
1218 pub output_per_mtok: f64,
1219 pub total_per_mtok: f64,
1220 #[serde(skip_serializing_if = "Option::is_none")]
1221 pub multiplier_vs_author: Option<f64>,
1222}
1223
1224#[derive(Debug, Clone, Deserialize)]
1225pub struct InferenceRule {
1226 #[serde(default)]
1227 pub pattern: Option<String>,
1228 #[serde(default)]
1229 pub contains: Option<String>,
1230 #[serde(default)]
1231 pub exact: Option<String>,
1232 pub provider: String,
1233}
1234
1235#[derive(Debug, Clone, Deserialize)]
1236pub struct TierRule {
1237 #[serde(default)]
1238 pub pattern: Option<String>,
1239 #[serde(default)]
1240 pub contains: Option<String>,
1241 #[serde(default)]
1242 pub exact: Option<String>,
1243 pub tier: String,
1244}
1245
1246#[derive(Debug, Clone, Deserialize)]
1247pub struct TierDefaults {
1248 #[serde(default = "default_mid")]
1249 pub default: String,
1250}
1251
1252impl Default for TierDefaults {
1253 fn default() -> Self {
1254 Self {
1255 default: default_mid(),
1256 }
1257 }
1258}
1259
1260fn default_mid() -> String {
1261 "mid".to_string()
1262}
1263
1264pub fn load_config() -> &'static ProvidersConfig {
1266 CONFIG.get_or_init(|| {
1267 let mut config = default_config();
1268 let verbose_config_logging = matches!(
1269 std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
1270 Some("1" | "true" | "TRUE" | "yes" | "YES")
1271 ) || matches!(
1272 std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
1273 Some("1" | "true" | "TRUE" | "yes" | "YES")
1274 );
1275 if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
1276 if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
1277 config.merge_from(&overlay);
1278 let _ = CONFIG_PATH.set(path);
1279 return config;
1280 }
1281 }
1282 if should_load_home_config() {
1283 if let Some(home) = dirs_or_home() {
1284 let path = format!("{home}/.config/harn/providers.toml");
1285 if let Some(overlay) = read_external_config(&path, false) {
1286 config.merge_from(&overlay);
1287 let _ = CONFIG_PATH.set(path);
1288 return config;
1289 }
1290 }
1291 }
1292 config
1293 })
1294}
1295
1296fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
1297 match std::fs::read_to_string(path) {
1298 Ok(content) => match parse_config_toml(&content) {
1302 Ok(config) => {
1303 if verbose {
1304 eprintln!(
1305 "[llm_config] Loaded {} providers, {} aliases from {}",
1306 config.providers.len(),
1307 config.aliases.len(),
1308 path
1309 );
1310 }
1311 Some(config)
1312 }
1313 Err(error) => {
1314 eprintln!("[llm_config] TOML parse error in {path}: {error}");
1315 None
1316 }
1317 },
1318 Err(error) => {
1319 if verbose {
1320 eprintln!("[llm_config] Cannot read {path}: {error}");
1321 }
1322 None
1323 }
1324 }
1325}
1326
1327fn should_load_home_config() -> bool {
1328 !cfg!(test)
1331}
1332
1333pub fn parse_config_toml(src: &str) -> Result<ProvidersConfig, toml::de::Error> {
1336 toml::from_str::<ProvidersConfig>(src)
1337}
1338
1339pub fn loaded_config_path() -> Option<std::path::PathBuf> {
1342 let _ = load_config();
1344 CONFIG_PATH.get().map(std::path::PathBuf::from)
1345}
1346
1347pub fn set_user_overrides(config: Option<ProvidersConfig>) {
1351 USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
1352}
1353
1354pub fn clear_user_overrides() {
1356 set_user_overrides(None);
1357}
1358
1359pub fn set_runtime_catalog_overlay(config: Option<ProvidersConfig>) {
1364 *runtime_catalog_overlay()
1365 .write()
1366 .expect("runtime catalog overlay poisoned") = config;
1367}
1368
1369pub fn clear_runtime_catalog_overlay() {
1370 set_runtime_catalog_overlay(None);
1371}
1372
1373pub(crate) fn effective_config() -> ProvidersConfig {
1374 let user_overrides = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1375 effective_config_with_user_overrides(user_overrides.as_ref())
1376}
1377
1378pub fn embedded_config(explicit_overlay: Option<&ProvidersConfig>) -> ProvidersConfig {
1396 let mut config = default_config();
1397 if let Some(overlay) = explicit_overlay {
1398 config.merge_from(overlay);
1399 }
1400 config
1401}
1402
1403pub(crate) fn effective_config_with_user_overrides(
1404 user_overrides: Option<&ProvidersConfig>,
1405) -> ProvidersConfig {
1406 let mut merged = load_config().clone();
1407 if let Some(overlay) = runtime_catalog_overlay()
1408 .read()
1409 .expect("runtime catalog overlay poisoned")
1410 .as_ref()
1411 {
1412 merged.merge_from(overlay);
1413 }
1414 if let Some(overlay) = user_overrides {
1415 merged.merge_from(overlay);
1416 }
1417 merged
1418}
1419
1420fn runtime_catalog_overlay() -> &'static RwLock<Option<ProvidersConfig>> {
1421 RUNTIME_CATALOG_OVERLAY.get_or_init(|| RwLock::new(None))
1422}
1423
1424pub fn resolve_model(alias: &str) -> (String, Option<String>) {
1426 let config = effective_config();
1427 if let Some(a) = config.aliases.get(alias) {
1428 return (a.id.clone(), Some(a.provider.clone()));
1429 }
1430 (normalize_model_id(alias), None)
1431}
1432
1433pub fn normalize_model_id(raw: &str) -> String {
1440 for prefix in PROVIDER_SELECTOR_PREFIXES {
1441 if let Some(stripped) = raw.strip_prefix(prefix) {
1442 return stripped.to_string();
1443 }
1444 }
1445 raw.to_string()
1446}
1447
1448const PROVIDER_SELECTOR_PREFIXES: &[&str] =
1449 &["ollama:", "local:", "huggingface:", "hf:", "cerebras/"];
1450
1451pub fn resolve_model_info(selector: &str) -> ResolvedModel {
1454 let config = effective_config();
1455 if let Some(alias) = config.aliases.get(selector) {
1456 let id = alias.id.clone();
1457 let provider = alias.provider.clone();
1458 let requested = alias
1459 .tool_format
1460 .clone()
1461 .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
1462 let tool_format = guard_tool_format(&provider, &id, &requested, Some(selector));
1463 return ResolvedModel {
1464 tier: model_tier_with_config(&config, &id),
1465 family: model_family_with_config(&config, &provider, &id),
1466 lineage: model_lineage_with_config(&config, &provider, &id),
1467 id,
1468 provider,
1469 alias: Some(selector.to_string()),
1470 tool_format,
1471 };
1472 }
1473
1474 let id = normalize_model_id(selector);
1475 let inference = infer_provider_with_config(&config, selector);
1476 let source = inference.source;
1477 let provider = inference.provider;
1478 let requested = default_tool_format_with_config(&config, &id, &provider);
1479 let tool_format = guard_tool_format(&provider, &id, &requested, None);
1480 let tier = model_tier_with_config(&config, &id);
1481 let family = model_family_with_inference_source(&config, &provider, &id, source);
1482 let lineage = model_lineage_with_inference_source(&config, &provider, &id, source);
1483 ResolvedModel {
1484 id,
1485 provider,
1486 alias: None,
1487 tool_format,
1488 tier,
1489 family,
1490 lineage,
1491 }
1492}
1493
1494fn guard_tool_format(provider: &str, model: &str, requested: &str, alias: Option<&str>) -> String {
1501 let decision = crate::llm::capabilities::validate_tool_format(provider, model, requested);
1502 if let Some(reason) = &decision.correction {
1503 tracing::warn!(
1504 target: "harn::llm::tool_format",
1505 alias = alias.unwrap_or(""),
1506 "{reason}"
1507 );
1508 }
1509 decision.effective
1510}
1511
1512pub fn infer_provider(model_id: &str) -> String {
1514 infer_provider_detail(model_id).provider
1515}
1516
1517pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
1519 let config = effective_config();
1520 infer_provider_with_config(&config, model_id)
1521}
1522
1523fn infer_provider_with_config(
1524 config: &ProvidersConfig,
1525 model_id: &str,
1526) -> crate::llm::provider::ProviderInference {
1527 if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
1528 return crate::llm::provider::ProviderInference::builtin("ollama");
1529 }
1530 if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
1531 return crate::llm::provider::ProviderInference::builtin("huggingface");
1532 }
1533 let normalized_id = normalize_model_id(model_id);
1539 if let Some(model) = config
1540 .models
1541 .get(model_id)
1542 .or_else(|| config.models.get(&normalized_id))
1543 {
1544 return crate::llm::provider::ProviderInference::builtin(model.provider.clone());
1545 }
1546 for rule in &config.inference_rules {
1547 if let Some(exact) = &rule.exact {
1548 if model_id == exact {
1549 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1550 }
1551 }
1552 if let Some(pattern) = &rule.pattern {
1553 if glob_match(pattern, model_id) {
1554 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1555 }
1556 }
1557 if let Some(substr) = &rule.contains {
1558 if model_id.contains(substr.as_str()) {
1559 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1560 }
1561 }
1562 }
1563 crate::llm::provider::infer_provider_from_model_id(
1564 model_id,
1565 &default_provider_with_config(config),
1566 )
1567}
1568
1569pub fn default_provider() -> String {
1570 let config = effective_config();
1571 default_provider_with_config(&config)
1572}
1573
1574fn default_provider_with_config(config: &ProvidersConfig) -> String {
1575 std::env::var("HARN_DEFAULT_PROVIDER")
1576 .ok()
1577 .map(|value| value.trim().to_string())
1578 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1579 .or_else(|| {
1580 config
1581 .default_provider
1582 .as_deref()
1583 .map(str::trim)
1584 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1585 .map(str::to_string)
1586 })
1587 .unwrap_or_else(|| auto_select_provider(config))
1588}
1589
1590const FALLBACK_PROVIDER: &str = "anthropic";
1596
1597static AUTO_PROVIDER_WARNED: AtomicBool = AtomicBool::new(false);
1598
1599fn provider_has_credentials(def: &ProviderDef) -> bool {
1601 auth_env_names(&def.auth_env)
1602 .iter()
1603 .any(|name| std::env::var(name).is_ok_and(|value| !value.trim().is_empty()))
1604}
1605
1606fn provider_is_local(def: &ProviderDef) -> bool {
1609 def.local_runtime.is_some() || matches!(def.auth_env, AuthEnv::None)
1610}
1611
1612fn warn_auto_provider_once(message: &str) {
1614 if !AUTO_PROVIDER_WARNED.swap(true, Ordering::Relaxed) {
1615 crate::events::log_warn("llm_config", message);
1616 }
1617}
1618
1619fn auto_select_provider(config: &ProvidersConfig) -> String {
1625 const PREFERRED: &[&str] = &[
1628 "anthropic",
1629 "openai",
1630 "google",
1631 "azure-openai",
1632 "groq",
1633 "mistral",
1634 "deepseek",
1635 "xai",
1636 "openrouter",
1637 ];
1638 for name in PREFERRED {
1639 if config
1640 .providers
1641 .get(*name)
1642 .is_some_and(provider_has_credentials)
1643 {
1644 if *name != FALLBACK_PROVIDER {
1645 warn_auto_provider_once(&format!(
1646 "no default provider configured; using '{name}' (its API key is set). \
1647 Set HARN_DEFAULT_PROVIDER or `default_provider` to silence this."
1648 ));
1649 }
1650 return (*name).to_string();
1651 }
1652 }
1653 for (name, def) in &config.providers {
1654 if provider_has_credentials(def) {
1655 warn_auto_provider_once(&format!(
1656 "no default provider configured; using '{name}' (its API key is set). \
1657 Set HARN_DEFAULT_PROVIDER or `default_provider` to silence this."
1658 ));
1659 return name.clone();
1660 }
1661 }
1662 for (name, def) in &config.providers {
1664 if provider_is_local(def) {
1665 warn_auto_provider_once(&format!(
1666 "no provider API keys found; using local provider '{name}'. \
1667 Set an API key + HARN_DEFAULT_PROVIDER to use a cloud provider."
1668 ));
1669 return name.clone();
1670 }
1671 }
1672 warn_auto_provider_once(&format!(
1674 "no LLM provider configured and no API keys detected; defaulting to \
1675 '{FALLBACK_PROVIDER}'. Set ANTHROPIC_API_KEY (or another provider's key plus \
1676 HARN_DEFAULT_PROVIDER), or run a local model with `harn local launch`."
1677 ));
1678 FALLBACK_PROVIDER.to_string()
1679}
1680
1681pub fn model_tier(model_id: &str) -> String {
1683 let config = effective_config();
1684 model_tier_with_config(&config, model_id)
1685}
1686
1687pub(crate) fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
1688 if let Some(model) = config.models.get(model_id) {
1690 if let Some(tier) = model.tier.as_deref() {
1691 let trimmed = tier.trim();
1692 if !trimmed.is_empty() {
1693 return trimmed.to_string();
1694 }
1695 }
1696 }
1697 for rule in &config.tier_rules {
1701 if let Some(exact) = &rule.exact {
1702 if model_id == exact {
1703 return rule.tier.clone();
1704 }
1705 }
1706 if let Some(pattern) = &rule.pattern {
1707 if glob_match(pattern, model_id) {
1708 return rule.tier.clone();
1709 }
1710 }
1711 if let Some(substr) = &rule.contains {
1712 if model_id.contains(substr.as_str()) {
1713 return rule.tier.clone();
1714 }
1715 }
1716 }
1717 config.tier_defaults.default.clone()
1718}
1719
1720pub fn model_family(provider: &str, model_id: &str) -> String {
1722 let config = effective_config();
1723 model_family_with_config(&config, provider, model_id)
1724}
1725
1726pub(crate) fn model_family_with_config(
1727 config: &ProvidersConfig,
1728 provider: &str,
1729 model_id: &str,
1730) -> String {
1731 catalog_family_token(config, model_id)
1732 .unwrap_or_else(|| derive_model_family(provider, model_id))
1733}
1734
1735fn model_family_with_inference_source(
1736 config: &ProvidersConfig,
1737 provider: &str,
1738 model_id: &str,
1739 source: crate::llm::provider::ProviderInferenceSource,
1740) -> String {
1741 if let Some(family) = catalog_family_token(config, model_id) {
1742 return family;
1743 }
1744 let id_family = derive_model_family("", model_id);
1745 if id_family != "unknown" {
1746 return id_family;
1747 }
1748 if matches!(
1749 source,
1750 crate::llm::provider::ProviderInferenceSource::DefaultFallback
1751 ) {
1752 return "unknown".to_string();
1753 }
1754 derive_model_family(provider, model_id)
1755}
1756
1757pub fn model_lineage(provider: &str, model_id: &str) -> String {
1759 let config = effective_config();
1760 model_lineage_with_config(&config, provider, model_id)
1761}
1762
1763pub(crate) fn model_lineage_with_config(
1764 config: &ProvidersConfig,
1765 provider: &str,
1766 model_id: &str,
1767) -> String {
1768 catalog_lineage_token(config, model_id)
1769 .unwrap_or_else(|| derive_model_lineage(provider, model_id))
1770}
1771
1772fn model_lineage_with_inference_source(
1773 config: &ProvidersConfig,
1774 provider: &str,
1775 model_id: &str,
1776 source: crate::llm::provider::ProviderInferenceSource,
1777) -> String {
1778 if let Some(lineage) = catalog_lineage_token(config, model_id) {
1779 return lineage;
1780 }
1781 let id_lineage = derive_model_lineage("", model_id);
1782 if id_lineage != "unknown" {
1783 return id_lineage;
1784 }
1785 if matches!(
1786 source,
1787 crate::llm::provider::ProviderInferenceSource::DefaultFallback
1788 ) {
1789 return "unknown".to_string();
1790 }
1791 derive_model_lineage(provider, model_id)
1792}
1793
1794fn catalog_family_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1795 config
1796 .models
1797 .get(model_id)
1798 .and_then(|model| normalized_catalog_token(model.family.as_deref()))
1799}
1800
1801fn catalog_lineage_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1802 config
1803 .models
1804 .get(model_id)
1805 .and_then(|model| normalized_catalog_token(model.lineage.as_deref()))
1806}
1807
1808fn normalized_catalog_token(value: Option<&str>) -> Option<String> {
1809 value
1810 .map(str::trim)
1811 .filter(|value| !value.is_empty())
1812 .map(|value| value.to_ascii_lowercase().replace('_', "-"))
1813}
1814
1815fn derive_model_family(provider: &str, model_id: &str) -> String {
1816 let id = model_id.to_ascii_lowercase();
1817 if contains_any(&id, &["claude", "anthropic.claude"]) {
1818 return "anthropic-claude".to_string();
1819 }
1820 if contains_any(&id, &["gemini", "google/gemini"]) {
1821 return "google-gemini".to_string();
1822 }
1823 if contains_any(&id, &["deepseek"]) {
1824 return "deepseek".to_string();
1825 }
1826 if contains_any(&id, &["qwen"]) {
1827 return "qwen".to_string();
1828 }
1829 if contains_any(&id, &["kimi", "moonshot"]) {
1830 return "kimi".to_string();
1831 }
1832 if contains_any(&id, &["glm", "z-ai/glm", "zhipu"]) {
1833 return "glm".to_string();
1834 }
1835 if contains_any(&id, &["mistral", "mixtral", "devstral"]) {
1836 return "mistral".to_string();
1837 }
1838 if contains_any(&id, &["minimax"]) {
1839 return "minimax".to_string();
1840 }
1841 if contains_any(&id, &["llama"]) {
1842 return "llama".to_string();
1843 }
1844 if contains_any(&id, &["gemma"]) {
1845 return "gemma".to_string();
1846 }
1847 if is_openai_reasoning_model(&id) {
1848 return "openai-reasoning".to_string();
1849 }
1850 if contains_any(&id, &["gpt-oss", "openai/gpt", "gpt-"]) {
1851 return "openai-gpt".to_string();
1852 }
1853 match provider {
1854 "anthropic" | "bedrock" | "vertex-anthropic" => "anthropic-claude".to_string(),
1855 "openai" | "azure" | "azure_openai" => "openai-gpt".to_string(),
1856 "gemini" | "vertex" | "google" => "google-gemini".to_string(),
1857 "deepseek" => "deepseek".to_string(),
1858 "zai" => "glm".to_string(),
1859 "minimax" => "minimax".to_string(),
1860 other if !other.is_empty() => normalize_identifier_token(other),
1861 _ => "unknown".to_string(),
1862 }
1863}
1864
1865fn derive_model_lineage(provider: &str, model_id: &str) -> String {
1866 let id = model_id.to_ascii_lowercase();
1867 if contains_any(&id, &["haiku"]) {
1868 return "claude-haiku".to_string();
1869 }
1870 if contains_any(&id, &["opus-4-7", "opus-4-8", "opus-mythos"]) {
1871 return "claude-opus-adaptive".to_string();
1872 }
1873 if contains_any(&id, &["claude"]) {
1874 return "claude-sonnet-opus".to_string();
1875 }
1876 if contains_any(&id, &["gpt-5"]) {
1877 return "openai-gpt5".to_string();
1878 }
1879 if is_openai_reasoning_model(&id) {
1880 return "openai-reasoning".to_string();
1881 }
1882 if contains_any(&id, &["gpt-", "gpt_"]) {
1883 return "openai-legacy".to_string();
1884 }
1885 if contains_any(&id, &["gemini"]) {
1886 if contains_any(&id, &["flash"]) {
1887 return "gemini-flash".to_string();
1888 }
1889 return "gemini-pro".to_string();
1890 }
1891 if contains_any(&id, &["qwen3", "qwen/qwen3"]) {
1892 return "qwen3".to_string();
1893 }
1894 if contains_any(&id, &["gemma4", "gemma-4"]) {
1895 return "gemma4".to_string();
1896 }
1897 let family = derive_model_family(provider, model_id);
1898 if family == "unknown" {
1899 "unknown".to_string()
1900 } else {
1901 family
1902 }
1903}
1904
1905fn contains_any(haystack: &str, needles: &[&str]) -> bool {
1906 needles.iter().any(|needle| haystack.contains(needle))
1907}
1908
1909fn starts_with_any(haystack: &str, prefixes: &[&str]) -> bool {
1910 prefixes.iter().any(|prefix| haystack.starts_with(prefix))
1911}
1912
1913fn is_openai_reasoning_model(id: &str) -> bool {
1914 starts_with_any(id, &["o1", "o3", "o4"])
1915 || contains_any(
1916 id,
1917 &[
1918 "/o1", "/o3", "/o4", ":o1", ":o3", ":o4", ".o1", ".o3", ".o4",
1919 ],
1920 )
1921}
1922
1923fn normalize_identifier_token(value: &str) -> String {
1924 value
1925 .trim()
1926 .to_ascii_lowercase()
1927 .chars()
1928 .map(|ch| {
1929 if ch.is_ascii_alphanumeric() || ch == '-' {
1930 ch
1931 } else {
1932 '-'
1933 }
1934 })
1935 .collect::<String>()
1936 .split('-')
1937 .filter(|part| !part.is_empty())
1938 .collect::<Vec<_>>()
1939 .join("-")
1940}
1941
1942pub fn provider_config(name: &str) -> Option<ProviderDef> {
1944 effective_config().providers.get(name).cloned()
1945}
1946
1947pub fn provider_protocol(name: &str) -> Option<String> {
1948 provider_config(name).and_then(|def| def.protocol)
1949}
1950
1951pub fn provider_uses_acp(name: &str) -> bool {
1952 provider_protocol(name)
1953 .as_deref()
1954 .is_some_and(|protocol| protocol.eq_ignore_ascii_case("acp"))
1955}
1956
1957pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
1960 let config = effective_config();
1961 let mut params = BTreeMap::new();
1962 for (pattern, defaults) in &config.model_defaults {
1963 if glob_match(pattern, model_id) {
1964 for (k, v) in defaults {
1965 params.insert(k.clone(), v.clone());
1966 }
1967 }
1968 }
1969 params
1970}
1971
1972pub fn model_role_defaults(role: &str) -> BTreeMap<String, toml::Value> {
1986 let normalized = normalize_model_role_name(role);
1987 if normalized.is_empty() {
1988 return BTreeMap::new();
1989 }
1990 let config = effective_config();
1991 let mut params = BTreeMap::new();
1992 for key in role_lookup_keys(&normalized) {
1993 extend_model_role_defaults(&config, &key, &mut params);
1994 }
1995 apply_model_role_env_overrides(&normalized, &mut params);
1996 params
1997}
1998
1999fn extend_model_role_defaults(
2000 config: &ProvidersConfig,
2001 role: &str,
2002 params: &mut BTreeMap<String, toml::Value>,
2003) {
2004 for (configured_role, defaults) in &config.model_roles {
2005 if normalize_model_role_name(configured_role) == role {
2006 params.extend(defaults.clone());
2007 }
2008 }
2009 if let Some(defaults) = config.model_roles.get(role) {
2010 params.extend(defaults.clone());
2011 }
2012}
2013
2014fn normalize_model_role_name(role: &str) -> String {
2015 role.trim().to_ascii_lowercase().replace('-', "_")
2016}
2017
2018fn role_lookup_keys(role: &str) -> Vec<String> {
2019 if role == "merge" {
2020 vec!["fast_apply".to_string(), "merge".to_string()]
2021 } else if role == "fast_apply" {
2022 vec!["merge".to_string(), "fast_apply".to_string()]
2023 } else {
2024 vec![role.to_string()]
2025 }
2026}
2027
2028fn role_env_token(role: &str) -> String {
2029 role.chars()
2030 .map(|ch| {
2031 if ch.is_ascii_alphanumeric() {
2032 ch.to_ascii_uppercase()
2033 } else {
2034 '_'
2035 }
2036 })
2037 .collect::<String>()
2038 .split('_')
2039 .filter(|part| !part.is_empty())
2040 .collect::<Vec<_>>()
2041 .join("_")
2042}
2043
2044fn apply_model_role_env_overrides(role: &str, params: &mut BTreeMap<String, toml::Value>) {
2045 for alias in role_env_aliases(role) {
2046 apply_model_role_env_var(&format!("HARN_LLM_{alias}_PROVIDER"), "provider", params);
2047 apply_model_role_env_var(&format!("HARN_LLM_{alias}_MODEL"), "model", params);
2048 apply_model_role_env_var(
2049 &format!("HARN_LLM_{alias}_ROUTE_POLICY"),
2050 "route_policy",
2051 params,
2052 );
2053 apply_model_role_env_var(
2054 &format!("HARN_LLM_ROLE_{alias}_PROVIDER"),
2055 "provider",
2056 params,
2057 );
2058 apply_model_role_env_var(&format!("HARN_LLM_ROLE_{alias}_MODEL"), "model", params);
2059 apply_model_role_env_var(
2060 &format!("HARN_LLM_ROLE_{alias}_ROUTE_POLICY"),
2061 "route_policy",
2062 params,
2063 );
2064 }
2065}
2066
2067fn role_env_aliases(role: &str) -> Vec<String> {
2068 let token = role_env_token(role);
2069 if token.is_empty() {
2070 return Vec::new();
2071 }
2072 if token == "MERGE" {
2073 vec!["FAST_APPLY".to_string(), "MERGE".to_string()]
2074 } else if token == "FAST_APPLY" {
2075 vec!["MERGE".to_string(), "FAST_APPLY".to_string()]
2076 } else {
2077 vec![token]
2078 }
2079}
2080
2081fn apply_model_role_env_var(
2082 env_name: &str,
2083 option_name: &str,
2084 params: &mut BTreeMap<String, toml::Value>,
2085) {
2086 let Ok(value) = std::env::var(env_name) else {
2087 return;
2088 };
2089 let trimmed = value.trim();
2090 if trimmed.is_empty() {
2091 return;
2092 }
2093 params.insert(
2094 option_name.to_string(),
2095 toml::Value::String(trimmed.to_string()),
2096 );
2097}
2098
2099pub fn provider_names() -> Vec<String> {
2101 effective_config().providers.keys().cloned().collect()
2102}
2103
2104pub fn known_model_names() -> Vec<String> {
2106 effective_config().aliases.keys().cloned().collect()
2107}
2108
2109pub fn alias_entries() -> Vec<(String, AliasDef)> {
2110 effective_config().aliases.into_iter().collect()
2111}
2112
2113pub fn alias_tool_calling_entry(alias: &str) -> Option<AliasToolCallingDef> {
2114 effective_config().alias_tool_calling.get(alias).cloned()
2115}
2116
2117pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
2119 let config = effective_config();
2120 model_catalog_entries_with_config(&config)
2121}
2122
2123pub(crate) fn model_catalog_entries_with_config(
2124 config: &ProvidersConfig,
2125) -> Vec<(String, ModelDef)> {
2126 sorted_model_entries_with_config(config)
2127 .into_iter()
2128 .map(|(id, model)| {
2129 let provider = model.provider.clone();
2130 (
2131 id.clone(),
2132 with_effective_capability_tags(id, provider, model),
2133 )
2134 })
2135 .collect()
2136}
2137
2138pub(crate) fn sorted_model_entries_with_config(
2139 config: &ProvidersConfig,
2140) -> Vec<(String, ModelDef)> {
2141 let mut entries: Vec<_> = config
2142 .models
2143 .iter()
2144 .map(|(id, model)| (id.clone(), model.clone()))
2145 .collect();
2146 entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
2147 model_a
2148 .provider
2149 .cmp(&model_b.provider)
2150 .then_with(|| id_a.cmp(id_b))
2151 });
2152 entries
2153}
2154
2155pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
2156 effective_config()
2157 .models
2158 .get(model_id)
2159 .cloned()
2160 .map(|model| {
2161 let provider = model.provider.clone();
2162 with_effective_capability_tags(model_id.to_string(), provider, model)
2163 })
2164}
2165
2166pub fn model_rate_limits(model_id: &str) -> Option<RateLimitsDef> {
2167 model_catalog_entry(model_id).and_then(|model| model.rate_limits)
2168}
2169
2170pub fn wire_model_id(model_id: &str) -> String {
2171 model_catalog_entry(model_id)
2172 .and_then(|model| model.wire_model)
2173 .unwrap_or_else(|| model_id.to_string())
2174}
2175
2176pub fn provider_rate_limits(provider: &str) -> Option<RateLimitsDef> {
2177 provider_config(provider).and_then(|provider| {
2178 provider
2179 .rate_limits
2180 .unwrap_or_default()
2181 .with_rpm_fallback(provider.rpm)
2182 })
2183}
2184
2185pub fn model_equivalence_group(model_id: &str) -> Option<String> {
2186 model_catalog_entry(model_id).and_then(|model| {
2187 model
2188 .equivalence_group
2189 .or(model.logical_model)
2190 .filter(|group| !group.trim().is_empty())
2191 })
2192}
2193
2194pub fn equivalent_model_catalog_entries(selector: &str) -> Vec<(String, ModelDef)> {
2198 let resolved = resolve_model_info(selector);
2199 let Some(group) = model_equivalence_group(&resolved.id) else {
2200 return Vec::new();
2201 };
2202 let config = effective_config();
2203 let Some(source) = config.models.get(&resolved.id) else {
2204 return Vec::new();
2205 };
2206 let source_caps = crate::llm::capabilities::lookup(&source.provider, &resolved.id);
2207 let source_context = source
2208 .runtime_context_window
2209 .unwrap_or(source.context_window);
2210
2211 sorted_model_entries_with_config(&config)
2212 .into_iter()
2213 .filter(|(id, model)| !(id == &resolved.id && model.provider == resolved.provider))
2214 .filter(|(_, model)| !model.deprecated)
2215 .filter(|(_, model)| model.availability != ModelAvailability::Dedicated)
2216 .filter(|(_, model)| {
2217 model.equivalence_group.as_deref() == Some(group.as_str())
2218 || model.logical_model.as_deref() == Some(group.as_str())
2219 })
2220 .filter(|(id, model)| {
2221 let caps = crate::llm::capabilities::lookup(&model.provider, id);
2222 let candidate_context = model.runtime_context_window.unwrap_or(model.context_window);
2223 candidate_context >= source_context
2224 && (!source_caps.native_tools || caps.native_tools)
2225 && (!source_caps.text_tool_wire_format_supported
2226 || caps.text_tool_wire_format_supported)
2227 && (!source_caps.reasoning_effort_supported || caps.reasoning_effort_supported)
2228 && source_caps.structured_output_mode == caps.structured_output_mode
2229 })
2230 .map(|(id, model)| {
2231 let provider = model.provider.clone();
2232 (
2233 id.clone(),
2234 with_effective_capability_tags(id, provider, model),
2235 )
2236 })
2237 .collect()
2238}
2239
2240pub fn qc_default_model(provider: &str) -> Option<String> {
2241 std::env::var("BURIN_QC_MODEL")
2242 .ok()
2243 .filter(|value| !value.trim().is_empty())
2244 .or_else(|| {
2245 effective_config()
2246 .qc_defaults
2247 .get(&provider.to_lowercase())
2248 .cloned()
2249 })
2250}
2251
2252pub fn default_model_for_provider(provider: &str) -> String {
2253 if provider_uses_acp(provider) {
2254 return "default".to_string();
2255 }
2256 match provider {
2257 "local" => std::env::var("LOCAL_LLM_MODEL")
2258 .or_else(|_| std::env::var("HARN_LLM_MODEL"))
2259 .unwrap_or_else(|_| "gemma-4-26b-a4b-it".to_string()),
2260 "mlx" => std::env::var("MLX_MODEL_ID")
2261 .unwrap_or_else(|_| "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit".to_string()),
2262 "openai" => "gpt-4o-mini".to_string(),
2263 "ollama" => "llama3.2".to_string(),
2264 "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
2265 _ => "claude-sonnet-4-6".to_string(),
2266 }
2267}
2268
2269pub fn qc_defaults() -> BTreeMap<String, String> {
2270 effective_config().qc_defaults
2271}
2272
2273pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
2274 effective_config()
2275 .models
2276 .get(model_id)
2277 .and_then(|model| model.pricing.clone())
2278}
2279
2280pub fn model_fast_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
2285 effective_config()
2286 .models
2287 .get(model_id)
2288 .and_then(|model| model.fast_mode.as_ref())
2289 .and_then(|fast_mode| fast_mode.pricing.clone())
2290}
2291
2292pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
2293 model_pricing_per_mtok(model_id)
2294 .map(|pricing| {
2295 (
2296 pricing.input_per_mtok / 1000.0,
2297 pricing.output_per_mtok / 1000.0,
2298 )
2299 })
2300 .or_else(|| {
2301 let (input, output, _) = provider_economics(provider);
2302 match (input, output) {
2303 (Some(input), Some(output)) => Some((input, output)),
2304 _ => None,
2305 }
2306 })
2307}
2308
2309pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
2310 match auth_env {
2311 AuthEnv::None => Vec::new(),
2312 AuthEnv::Single(name) => vec![name.clone()],
2313 AuthEnv::Multiple(names) => names.clone(),
2314 }
2315}
2316
2317pub fn provider_key_available(provider: &str) -> bool {
2318 let Some(pdef) = provider_config(provider) else {
2319 return provider == "ollama";
2320 };
2321 if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
2322 return true;
2323 }
2324 auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
2325 std::env::var(env_name)
2326 .ok()
2327 .is_some_and(|value| !value.trim().is_empty())
2328 })
2329}
2330
2331pub fn available_provider_names() -> Vec<String> {
2332 provider_names()
2333 .into_iter()
2334 .filter(|provider| provider_key_available(provider))
2335 .collect()
2336}
2337
2338pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
2340 provider_config(provider)
2341 .map(|p| p.features.iter().any(|f| f == feature))
2342 .unwrap_or(false)
2343}
2344
2345pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
2349 provider_config(provider)
2350 .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
2351 .unwrap_or((None, None, None))
2352}
2353
2354#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2363pub enum ToolFormatChannel {
2364 Native,
2366 Text,
2368}
2369
2370pub fn tool_format_channel(format: &str) -> Option<ToolFormatChannel> {
2378 match format {
2379 "native" => Some(ToolFormatChannel::Native),
2380 "text" | "json" => Some(ToolFormatChannel::Text),
2381 _ => None,
2382 }
2383}
2384
2385pub fn is_known_tool_format(format: &str) -> bool {
2390 tool_format_channel(format).is_some()
2391}
2392
2393pub fn default_tool_format(model: &str, provider: &str) -> String {
2399 let config = effective_config();
2400 default_tool_format_with_config(&config, model, provider)
2401}
2402
2403fn default_tool_format_with_config(
2404 config: &ProvidersConfig,
2405 model: &str,
2406 provider: &str,
2407) -> String {
2408 for (name, alias) in &config.aliases {
2410 let matches = (alias.id == model && alias.provider == provider) || name == model;
2411 if matches {
2412 if let Some(ref fmt) = alias.tool_format {
2413 return fmt.clone();
2414 }
2415 }
2416 }
2417 let capabilities = crate::llm::capabilities::lookup(provider, model);
2418 if let Some(format) = capabilities.preferred_tool_format.as_deref() {
2419 if is_known_tool_format(format) {
2426 return format.to_string();
2427 }
2428 }
2429 let capability_matrix_native = capabilities.native_tools;
2430 let legacy_provider_native = config
2431 .providers
2432 .get(provider)
2433 .map(|p| p.features.iter().any(|f| f == "native_tools"))
2434 .unwrap_or(false);
2435 if capability_matrix_native || legacy_provider_native {
2436 "native".to_string()
2437 } else {
2438 "json".to_string()
2449 }
2450}
2451
2452fn with_effective_capability_tags(
2453 model_id: String,
2454 provider: String,
2455 mut model: ModelDef,
2456) -> ModelDef {
2457 model.capabilities = effective_model_capability_tags(&provider, &model_id);
2458 model
2459}
2460
2461pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
2465 let caps = crate::llm::capabilities::lookup(provider, model_id);
2466 capability_tags_from_capabilities(&caps)
2467}
2468
2469pub(crate) fn capability_tags_from_capabilities(
2470 caps: &crate::llm::capabilities::Capabilities,
2471) -> Vec<String> {
2472 let mut tags = Vec::new();
2473 tags.push("streaming".to_string());
2476 if caps.native_tools || caps.text_tool_wire_format_supported {
2477 tags.push("tools".to_string());
2478 }
2479 if !caps.tool_search.is_empty() {
2480 tags.push("tool_search".to_string());
2481 }
2482 if caps.vision || caps.vision_supported {
2483 tags.push("vision".to_string());
2484 }
2485 if caps.audio {
2486 tags.push("audio".to_string());
2487 }
2488 if caps.pdf {
2489 tags.push("pdf".to_string());
2490 }
2491 if caps.video {
2492 tags.push("video".to_string());
2493 }
2494 if caps.files_api_supported {
2495 tags.push("files".to_string());
2496 }
2497 if caps.prompt_caching {
2498 tags.push("prompt_caching".to_string());
2499 }
2500 if !caps.thinking_modes.is_empty() {
2501 tags.push("thinking".to_string());
2502 }
2503 if caps.interleaved_thinking_supported
2504 || caps
2505 .thinking_modes
2506 .iter()
2507 .any(|mode| mode == "adaptive" || mode == "effort")
2508 {
2509 tags.push("extended_thinking".to_string());
2510 }
2511 if caps.structured_output.is_some() || caps.json_schema.is_some() {
2512 tags.push("structured_output".to_string());
2513 }
2514 tags
2515}
2516
2517pub fn resolve_tier_model(
2519 target: &str,
2520 preferred_provider: Option<&str>,
2521) -> Option<(String, String)> {
2522 let config = effective_config();
2523
2524 let candidate_aliases = if let Some(provider) = preferred_provider {
2525 vec![
2526 format!("{provider}/{target}"),
2527 format!("{provider}:{target}"),
2528 format!("tier/{target}"),
2529 target.to_string(),
2530 ]
2531 } else {
2532 vec![format!("tier/{target}"), target.to_string()]
2533 };
2534
2535 for alias_name in candidate_aliases {
2536 if let Some(alias) = config.aliases.get(&alias_name) {
2537 return Some((alias.id.clone(), alias.provider.clone()));
2538 }
2539 }
2540
2541 None
2542}
2543
2544pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
2548 let config = effective_config();
2549 let mut seen = std::collections::BTreeSet::new();
2550 let mut candidates = Vec::new();
2551
2552 for alias in config.aliases.values() {
2553 let pair = (alias.id.clone(), alias.provider.clone());
2554 if seen.contains(&pair) {
2555 continue;
2556 }
2557 if model_tier(&alias.id) == target {
2558 seen.insert(pair.clone());
2559 candidates.push(pair);
2560 }
2561 }
2562
2563 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2564 provider_a
2565 .cmp(provider_b)
2566 .then_with(|| model_a.cmp(model_b))
2567 });
2568 candidates
2569}
2570
2571pub fn all_model_candidates() -> Vec<(String, String)> {
2574 let config = effective_config();
2575 let mut seen = std::collections::BTreeSet::new();
2576 let mut candidates = Vec::new();
2577
2578 for alias in config.aliases.values() {
2579 let pair = (alias.id.clone(), alias.provider.clone());
2580 if seen.insert(pair.clone()) {
2581 candidates.push(pair);
2582 }
2583 }
2584
2585 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2586 provider_a
2587 .cmp(provider_b)
2588 .then_with(|| model_a.cmp(model_b))
2589 });
2590 candidates
2591}
2592
2593pub fn pick_complementary_reviewer(
2594 options: ComplementaryReviewerOptions,
2595) -> ComplementaryReviewerSelection {
2596 let config = effective_config();
2597 let mut author = resolve_model_info(&options.author_model);
2598 if let Some(provider) = options
2599 .author_provider
2600 .as_deref()
2601 .map(str::trim)
2602 .filter(|provider| !provider.is_empty())
2603 {
2604 author.provider = provider.to_string();
2605 author.family = model_family_with_config(&config, &author.provider, &author.id);
2606 author.lineage = model_lineage_with_config(&config, &author.provider, &author.id);
2607 author.tool_format = default_tool_format_with_config(&config, &author.id, &author.provider);
2608 }
2609 let author_entry = config.models.get(&author.id);
2610 let author_identity = complementary_identity(
2611 author.id.clone(),
2612 author.provider.clone(),
2613 author.family.clone(),
2614 author.lineage.clone(),
2615 author.tier.clone(),
2616 author_entry.and_then(|model| model.pricing.clone()),
2617 );
2618
2619 let fallback =
2620 |code: ReviewerFallbackCode, fallback_reason: String| ComplementaryReviewerSelection {
2621 intent: options.intent.as_str().to_string(),
2622 reviewer: author_identity.clone(),
2623 estimated_incremental_cost: cost_estimate(
2624 author_identity.pricing.as_ref(),
2625 author_identity.pricing.as_ref(),
2626 ),
2627 author: author_identity.clone(),
2628 fallback: true,
2629 reason: format!(
2630 "using author model {} because {fallback_reason}",
2631 author_identity.id
2632 ),
2633 fallback_reason: Some(fallback_reason),
2634 fallback_code: Some(code.as_code().to_string()),
2635 };
2636
2637 if author_identity.family == "unknown" {
2638 return fallback(
2639 ReviewerFallbackCode::UnknownAuthorFamily,
2640 "author model family is unknown".to_string(),
2641 );
2642 }
2643
2644 let preferred_families = author_entry
2645 .map(|model| model.complementary_with.clone())
2646 .unwrap_or_default();
2647 let author_refs = reviewer_match_refs(&author_identity);
2648 let mut rejected_by_price = 0usize;
2649 let mut diff_family_seen = 0usize;
2650 let mut candidates = Vec::new();
2651
2652 for (id, model) in config.models.iter() {
2653 if id == &author_identity.id && model.provider == author_identity.provider {
2654 continue;
2655 }
2656 if model.deprecated || model.availability != ModelAvailability::Serverless {
2657 continue;
2658 }
2659 let family = model_family_with_config(&config, &model.provider, id);
2660 if family == "unknown" || family == author_identity.family {
2661 continue;
2662 }
2663 diff_family_seen += 1;
2664 let lineage = model_lineage_with_config(&config, &model.provider, id);
2665 let candidate_identity = complementary_identity(
2666 id.clone(),
2667 model.provider.clone(),
2668 family,
2669 lineage,
2670 model_tier_with_config(&config, id),
2671 model.pricing.clone(),
2672 );
2673 if model
2674 .avoid_as_reviewer_for
2675 .iter()
2676 .any(|selector| refs_contain_selector(&author_refs, selector))
2677 {
2678 continue;
2679 }
2680 if exceeds_price_cap(
2681 author_identity.pricing.as_ref(),
2682 candidate_identity.pricing.as_ref(),
2683 options.max_price_multiplier,
2684 ) {
2685 rejected_by_price += 1;
2686 continue;
2687 }
2688 let score = reviewer_score(
2689 &options,
2690 &author_identity,
2691 &candidate_identity,
2692 model,
2693 &preferred_families,
2694 );
2695 candidates.push(ReviewerCandidate {
2696 identity: candidate_identity,
2697 score,
2698 });
2699 }
2700
2701 candidates.sort_by(|left, right| {
2702 right
2703 .score
2704 .partial_cmp(&left.score)
2705 .unwrap_or(std::cmp::Ordering::Equal)
2706 .then_with(|| left.identity.provider.cmp(&right.identity.provider))
2707 .then_with(|| left.identity.id.cmp(&right.identity.id))
2708 });
2709
2710 let Some(best) = candidates.into_iter().next() else {
2711 if rejected_by_price > 0 {
2712 let cap = options.max_price_multiplier.unwrap_or_default();
2713 return fallback(
2714 ReviewerFallbackCode::NoDiffFamilyWithinPrice,
2715 format!("no different-family reviewer satisfied max_price_multiplier {cap}"),
2716 );
2717 }
2718 if diff_family_seen == 0 {
2719 return fallback(
2720 ReviewerFallbackCode::NoDiffFamilyServerless,
2721 "no active serverless different-family reviewer is cataloged".to_string(),
2722 );
2723 }
2724 return fallback(
2725 ReviewerFallbackCode::AllDiffFamilyExcluded,
2726 "all different-family reviewer candidates were excluded".to_string(),
2727 );
2728 };
2729
2730 let estimate = cost_estimate(
2731 best.identity.pricing.as_ref(),
2732 author_identity.pricing.as_ref(),
2733 );
2734 ComplementaryReviewerSelection {
2735 intent: options.intent.as_str().to_string(),
2736 reason: reviewer_reason(&author_identity, &best.identity, estimate.as_ref()),
2737 estimated_incremental_cost: estimate,
2738 author: author_identity,
2739 reviewer: best.identity,
2740 fallback: false,
2741 fallback_reason: None,
2742 fallback_code: None,
2743 }
2744}
2745
2746#[derive(Debug, Clone)]
2747struct ReviewerCandidate {
2748 identity: ComplementaryModelIdentity,
2749 score: f64,
2750}
2751
2752fn complementary_identity(
2753 id: String,
2754 provider: String,
2755 family: String,
2756 lineage: String,
2757 tier: String,
2758 pricing: Option<ModelPricing>,
2759) -> ComplementaryModelIdentity {
2760 ComplementaryModelIdentity {
2761 id,
2762 provider,
2763 family,
2764 lineage,
2765 tier,
2766 pricing,
2767 }
2768}
2769
2770fn reviewer_score(
2771 options: &ComplementaryReviewerOptions,
2772 author: &ComplementaryModelIdentity,
2773 candidate: &ComplementaryModelIdentity,
2774 model: &ModelDef,
2775 preferred_families: &[String],
2776) -> f64 {
2777 let candidate_refs = reviewer_match_refs(candidate);
2778 let mut score = 0.0;
2779 if let Some(rank) = preferred_families
2780 .iter()
2781 .position(|selector| refs_contain_selector(&candidate_refs, selector))
2782 {
2783 score += 1_000.0 - rank as f64;
2784 }
2785 if candidate.provider != author.provider {
2786 score += 100.0;
2787 }
2788 score += match tier_distance(&author.tier, &candidate.tier) {
2789 0 => 80.0,
2790 1 => 45.0,
2791 2 => 15.0,
2792 _ => 0.0,
2793 };
2794 for strength in intent_strengths(options.intent) {
2795 if model.strengths.iter().any(|tag| tag == strength) {
2796 score += 8.0;
2797 }
2798 }
2799 if model.capabilities.iter().any(|tag| tag == "tools") {
2800 score += 4.0;
2801 }
2802 if let (Some(author_total), Some(candidate_total)) = (
2803 pricing_total(author.pricing.as_ref()),
2804 pricing_total(candidate.pricing.as_ref()),
2805 ) {
2806 if author_total > 0.0 {
2807 let ratio = candidate_total / author_total;
2808 if ratio <= 1.0 {
2809 score += 20.0;
2810 }
2811 score -= (ratio - 1.0).abs().min(10.0) * 8.0;
2812 }
2813 }
2814 score
2815}
2816
2817fn intent_strengths(intent: ComplementaryReviewerIntent) -> &'static [&'static str] {
2818 match intent {
2819 ComplementaryReviewerIntent::Review => &["reasoning", "coding", "tool_use"],
2820 ComplementaryReviewerIntent::Critique => &["reasoning", "long_context", "tool_use"],
2821 ComplementaryReviewerIntent::PlanReview => {
2822 &["reasoning", "coding", "agentic", "long_context", "tool_use"]
2823 }
2824 }
2825}
2826
2827fn tier_distance(left: &str, right: &str) -> u8 {
2828 let left = tier_rank(left);
2829 let right = tier_rank(right);
2830 left.abs_diff(right)
2831}
2832
2833fn tier_rank(tier: &str) -> u8 {
2834 match tier {
2835 "small" => 0,
2836 "mid" => 1,
2837 "frontier" | "reasoning" => 2,
2838 _ => 1,
2839 }
2840}
2841
2842fn exceeds_price_cap(
2843 author_pricing: Option<&ModelPricing>,
2844 candidate_pricing: Option<&ModelPricing>,
2845 max_price_multiplier: Option<f64>,
2846) -> bool {
2847 let Some(max_price_multiplier) = max_price_multiplier else {
2848 return false;
2849 };
2850 let Some(author_total) = pricing_total(author_pricing) else {
2851 return false;
2852 };
2853 let Some(candidate_total) = pricing_total(candidate_pricing) else {
2854 return true;
2855 };
2856 author_total > 0.0 && candidate_total > author_total * max_price_multiplier
2857}
2858
2859fn cost_estimate(
2860 reviewer_pricing: Option<&ModelPricing>,
2861 author_pricing: Option<&ModelPricing>,
2862) -> Option<ComplementaryCostEstimate> {
2863 let reviewer_pricing = reviewer_pricing?;
2864 let total_per_mtok = reviewer_pricing.input_per_mtok + reviewer_pricing.output_per_mtok;
2865 let multiplier_vs_author = pricing_total(author_pricing)
2866 .filter(|author_total| *author_total > 0.0)
2867 .map(|author_total| total_per_mtok / author_total);
2868 Some(ComplementaryCostEstimate {
2869 input_per_mtok: reviewer_pricing.input_per_mtok,
2870 output_per_mtok: reviewer_pricing.output_per_mtok,
2871 total_per_mtok,
2872 multiplier_vs_author,
2873 })
2874}
2875
2876fn pricing_total(pricing: Option<&ModelPricing>) -> Option<f64> {
2877 pricing.map(|pricing| pricing.input_per_mtok + pricing.output_per_mtok)
2878}
2879
2880fn reviewer_reason(
2881 author: &ComplementaryModelIdentity,
2882 reviewer: &ComplementaryModelIdentity,
2883 estimate: Option<&ComplementaryCostEstimate>,
2884) -> String {
2885 let cost = estimate
2886 .and_then(|estimate| estimate.multiplier_vs_author)
2887 .map(|multiplier| format!("{multiplier:.2}x the author model price"))
2888 .unwrap_or_else(|| "price ratio unavailable".to_string());
2889 format!(
2890 "selected {} via {} because family {} differs from author family {}, tier {} matches author tier {}, and {}",
2891 reviewer.id,
2892 reviewer.provider,
2893 reviewer.family,
2894 author.family,
2895 reviewer.tier,
2896 author.tier,
2897 cost
2898 )
2899}
2900
2901fn reviewer_match_refs(identity: &ComplementaryModelIdentity) -> BTreeSet<String> {
2902 BTreeSet::from([
2903 identity.id.to_ascii_lowercase(),
2904 identity.provider.to_ascii_lowercase(),
2905 format!("{}/{}", identity.provider, identity.id).to_ascii_lowercase(),
2906 format!("{}:{}", identity.provider, identity.id).to_ascii_lowercase(),
2907 identity.family.to_ascii_lowercase(),
2908 identity.lineage.to_ascii_lowercase(),
2909 ])
2910}
2911
2912fn refs_contain_selector(refs: &BTreeSet<String>, selector: &str) -> bool {
2913 normalized_catalog_token(Some(selector))
2914 .or_else(|| Some(selector.trim().to_ascii_lowercase()))
2915 .is_some_and(|selector| refs.contains(&selector))
2916}
2917
2918use harn_glob::match_name as glob_match;
2921
2922fn dirs_or_home() -> Option<String> {
2923 crate::user_dirs::home_dir().map(|home| home.to_string_lossy().into_owned())
2924}
2925
2926pub fn resolve_base_url(pdef: &ProviderDef) -> String {
2929 if let Some(env_name) = &pdef.base_url_env {
2930 if let Ok(val) = std::env::var(env_name) {
2931 let trimmed = val.trim().trim_matches('"').trim_matches('\'');
2933 if !trimmed.is_empty() {
2934 return trimmed.to_string();
2935 }
2936 }
2937 }
2938 pdef.base_url.clone()
2939}
2940
2941const EMBEDDED_PROVIDERS_TOML: &str = include_str!("llm/providers.toml");
2945
2946fn default_config() -> ProvidersConfig {
2960 parse_config_toml(EMBEDDED_PROVIDERS_TOML)
2961 .expect("embedded providers.toml must parse — invariant checked by harn-vm tests")
2962}
2963
2964#[cfg(test)]
2965fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
2966 let mut config = default_config();
2967 config.merge_from(&overlay);
2968 config
2969}
2970
2971#[cfg(test)]
2972mod tests {
2973 use super::*;
2974
2975 fn reset_overrides() {
2976 clear_user_overrides();
2977 }
2978
2979 #[test]
2980 fn resolve_model_info_guards_bad_native_pin_on_unreliable_route() {
2981 reset_overrides();
2982 let overlay = parse_config_toml(
2989 "[aliases.guard-ds]\nid = \"deepseek/deepseek-v3.2\"\nprovider = \"openrouter\"\ntool_format = \"native\"\n",
2990 )
2991 .expect("overlay parses");
2992 set_user_overrides(Some(overlay));
2993 let resolved = resolve_model_info("guard-ds");
2994 assert_eq!(
2995 resolved.tool_format, "text",
2996 "a native pin on a native_unreliable route must be auto-corrected to text"
2997 );
2998 clear_user_overrides();
2999
3000 let overlay_ok = parse_config_toml(
3002 "[aliases.guard-ds-ok]\nid = \"deepseek/deepseek-v3-base\"\nprovider = \"openrouter\"\ntool_format = \"native\"\n",
3003 )
3004 .expect("overlay parses");
3005 set_user_overrides(Some(overlay_ok));
3006 let resolved_ok = resolve_model_info("guard-ds-ok");
3007 assert_eq!(resolved_ok.tool_format, "native");
3008 clear_user_overrides();
3009 }
3010
3011 #[test]
3012 fn auto_select_prefers_local_provider_without_cloud_credentials() {
3013 let config = parse_config_toml(
3017 "[providers.ollama]\nbase_url = \"http://localhost:11434\"\nchat_endpoint = \"/v1/chat/completions\"\n",
3018 )
3019 .expect("config parses");
3020 assert!(provider_is_local(config.providers.get("ollama").unwrap()));
3021 assert_eq!(auto_select_provider(&config), "ollama");
3022 }
3023
3024 #[test]
3025 fn auto_select_falls_back_to_documented_default_when_empty() {
3026 let config = parse_config_toml("").expect("config parses");
3027 assert_eq!(auto_select_provider(&config), FALLBACK_PROVIDER);
3028 }
3029
3030 #[test]
3031 fn suppress_routes_parse_and_merge_dedupe() {
3032 let mut base =
3033 parse_config_toml("[suppress]\nroutes = [\"together:Qwen/Qwen3-Coder-Next-FP8\"]\n")
3034 .expect("base parses");
3035 assert!(!base.is_empty(), "a suppress-only overlay is not empty");
3036 let overlay = parse_config_toml(
3037 "[suppress]\nroutes = [\"together:Qwen/Qwen3-Coder-Next-FP8\", \"ollama:img:tag\"]\n",
3038 )
3039 .expect("overlay parses");
3040 base.merge_from(&overlay);
3041 assert_eq!(
3042 base.suppress.routes,
3043 vec![
3044 "together:Qwen/Qwen3-Coder-Next-FP8".to_string(),
3045 "ollama:img:tag".to_string(),
3046 ],
3047 "merge appends new selectors without duplicating existing ones"
3048 );
3049 }
3050
3051 const PATCH_BASE_TOML: &str = r#"
3053[models."demo/patch-target"]
3054name = "Patch Target"
3055provider = "demo"
3056context_window = 128000
3057stream_timeout = 300.0
3058capabilities = ["tools", "vision"]
3059strengths = ["coding"]
3060
3061[models."demo/patch-target".pricing]
3062input_per_mtok = 1.0
3063output_per_mtok = 5.0
3064"#;
3065
3066 fn patch_base() -> ProvidersConfig {
3067 parse_config_toml(PATCH_BASE_TOML).expect("patch base parses")
3068 }
3069
3070 fn patched_row(config: &ProvidersConfig) -> &ModelDef {
3071 config
3072 .models
3073 .get("demo/patch-target")
3074 .expect("patch target row present")
3075 }
3076
3077 #[test]
3078 fn patch_models_scalar_and_nested_field_preserve_siblings() {
3079 let mut base = patch_base();
3080 let overlay = parse_config_toml(
3081 "[patch.models.\"demo/patch-target\"]\nstream_timeout = 1200.0\n\
3082 [patch.models.\"demo/patch-target\".pricing]\noutput_per_mtok = 2.5\n",
3083 )
3084 .expect("patch overlay parses");
3085 assert!(!overlay.is_empty(), "a patch-only overlay is not empty");
3086 base.merge_from(&overlay);
3087 let row = patched_row(&base);
3088 assert_eq!(row.stream_timeout, Some(1200.0), "patched scalar applies");
3089 assert_eq!(row.name, "Patch Target", "unpatched scalar is intact");
3090 assert_eq!(row.context_window, 128000, "unpatched scalar is intact");
3091 assert_eq!(
3092 row.capabilities,
3093 vec!["tools".to_string(), "vision".to_string()],
3094 "unpatched array is intact"
3095 );
3096 let pricing = row.pricing.as_ref().expect("pricing survives the patch");
3097 assert_eq!(pricing.output_per_mtok, 2.5, "patched nested field applies");
3098 assert_eq!(
3099 pricing.input_per_mtok, 1.0,
3100 "sibling nested field is preserved by the deep merge"
3101 );
3102 assert!(base.dangling_model_patches().is_empty());
3103 }
3104
3105 #[test]
3106 fn patch_models_array_replaces_wholesale() {
3107 let mut base = patch_base();
3108 let overlay =
3109 parse_config_toml("[patch.models.\"demo/patch-target\"]\ncapabilities = [\"tools\"]\n")
3110 .expect("patch overlay parses");
3111 base.merge_from(&overlay);
3112 let row = patched_row(&base);
3113 assert_eq!(
3114 row.capabilities,
3115 vec!["tools".to_string()],
3116 "arrays replace wholesale — no element-wise merge"
3117 );
3118 assert_eq!(
3119 row.strengths,
3120 vec!["coding".to_string()],
3121 "arrays the patch does not name are intact"
3122 );
3123 }
3124
3125 #[test]
3126 fn patch_models_wins_over_whole_row_in_same_overlay() {
3127 let mut base = patch_base();
3128 let overlay = parse_config_toml(
3129 "[models.\"demo/patch-target\"]\n\
3130 name = \"Replaced Row\"\nprovider = \"demo\"\ncontext_window = 64000\n\
3131 stream_timeout = 600.0\n\
3132 [patch.models.\"demo/patch-target\"]\nstream_timeout = 1200.0\n",
3133 )
3134 .expect("overlay parses");
3135 base.merge_from(&overlay);
3136 let row = patched_row(&base);
3137 assert_eq!(
3138 row.name, "Replaced Row",
3139 "the whole-row replacement lands first"
3140 );
3141 assert_eq!(row.context_window, 64000);
3142 assert_eq!(
3143 row.stream_timeout,
3144 Some(1200.0),
3145 "the same overlay's patch fields win over its whole-row fields"
3146 );
3147 }
3148
3149 #[test]
3150 fn patch_models_chained_layers_accumulate_and_later_wins() {
3151 let mut base = patch_base();
3152 let layer1 =
3153 parse_config_toml("[patch.models.\"demo/patch-target\"]\nstream_timeout = 900.0\n")
3154 .expect("layer1 parses");
3155 let layer2 = parse_config_toml(
3156 "[patch.models.\"demo/patch-target\".pricing]\noutput_per_mtok = 2.5\n",
3157 )
3158 .expect("layer2 parses");
3159 base.merge_from(&layer1);
3160 base.merge_from(&layer2);
3161 let row = patched_row(&base);
3162 assert_eq!(
3163 row.stream_timeout,
3164 Some(900.0),
3165 "layer1's field patch survives layer2 patching a different field"
3166 );
3167 assert_eq!(
3168 row.pricing
3169 .as_ref()
3170 .expect("pricing present")
3171 .output_per_mtok,
3172 2.5,
3173 "layer2's field patch applies"
3174 );
3175
3176 let layer3 =
3177 parse_config_toml("[patch.models.\"demo/patch-target\"]\nstream_timeout = 1200.0\n")
3178 .expect("layer3 parses");
3179 base.merge_from(&layer3);
3180 assert_eq!(
3181 patched_row(&base).stream_timeout,
3182 Some(1200.0),
3183 "for the same field, the later layer's patch wins"
3184 );
3185 }
3186
3187 #[test]
3188 fn patch_models_sticky_across_later_whole_row_replacement() {
3189 let mut base = patch_base();
3190 let patch_layer =
3191 parse_config_toml("[patch.models.\"demo/patch-target\"]\nstream_timeout = 1200.0\n")
3192 .expect("patch layer parses");
3193 base.merge_from(&patch_layer);
3194 let replacement_layer = parse_config_toml(
3198 "[models.\"demo/patch-target\"]\n\
3199 name = \"Refreshed Row\"\nprovider = \"demo\"\ncontext_window = 256000\n\
3200 stream_timeout = 300.0\n",
3201 )
3202 .expect("replacement layer parses");
3203 base.merge_from(&replacement_layer);
3204 let row = patched_row(&base);
3205 assert_eq!(row.name, "Refreshed Row", "the whole-row refresh lands");
3206 assert_eq!(row.context_window, 256000);
3207 assert_eq!(
3208 row.stream_timeout,
3209 Some(1200.0),
3210 "the sticky patch re-applies on top of the refreshed row"
3211 );
3212 }
3213
3214 #[test]
3215 fn patch_models_dangling_patch_reports_and_applies_when_row_arrives() {
3216 let mut base = patch_base();
3217 let dangling =
3218 parse_config_toml("[patch.models.\"demo/not-yet-cataloged\"]\nstream_timeout = 42.0\n")
3219 .expect("dangling patch parses");
3220 base.merge_from(&dangling);
3221 assert_eq!(
3222 base.dangling_model_patches(),
3223 vec!["demo/not-yet-cataloged"],
3224 "a patch with no matching row is reported, not dropped"
3225 );
3226 assert_eq!(
3227 patched_row(&base).stream_timeout,
3228 Some(300.0),
3229 "existing rows are untouched by a dangling patch"
3230 );
3231
3232 let late_row = parse_config_toml(
3234 "[models.\"demo/not-yet-cataloged\"]\n\
3235 name = \"Late Arrival\"\nprovider = \"demo\"\ncontext_window = 8192\n",
3236 )
3237 .expect("late row parses");
3238 base.merge_from(&late_row);
3239 assert!(base.dangling_model_patches().is_empty());
3240 let row = base
3241 .models
3242 .get("demo/not-yet-cataloged")
3243 .expect("late row present");
3244 assert_eq!(row.stream_timeout, Some(42.0), "the held patch applied");
3245 assert_eq!(row.name, "Late Arrival");
3246 }
3247
3248 #[test]
3249 fn patch_models_type_error_keeps_unpatched_row() {
3250 let mut base = patch_base();
3251 let bad =
3252 parse_config_toml("[patch.models.\"demo/patch-target\"]\nstream_timeout = \"soon\"\n")
3253 .expect("the patch overlay itself is valid TOML");
3254 base.merge_from(&bad);
3255 let row = patched_row(&base);
3256 assert_eq!(
3257 row.stream_timeout,
3258 Some(300.0),
3259 "a type-invalid patch keeps the unpatched row"
3260 );
3261 assert_eq!(row.name, "Patch Target", "the rest of the row is intact");
3262 }
3263
3264 #[test]
3265 fn model_rows_roundtrip_through_toml_value_for_patching() {
3266 let config = default_config();
3272 assert!(!config.models.is_empty());
3273 for (id, row) in &config.models {
3274 let value = toml::Value::try_from(row)
3275 .unwrap_or_else(|error| panic!("serialize model row {id}: {error}"));
3276 let roundtripped = ModelDef::deserialize(value)
3277 .unwrap_or_else(|error| panic!("deserialize model row {id}: {error}"));
3278 assert_eq!(&roundtripped, row, "model row {id} must round-trip");
3279 }
3280 }
3281
3282 #[test]
3283 fn test_glob_match_prefix() {
3284 assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
3285 assert!(glob_match("gpt-*", "gpt-4o"));
3286 assert!(!glob_match("claude-*", "gpt-4o"));
3287 }
3288
3289 #[test]
3290 fn test_glob_match_suffix() {
3291 assert!(glob_match("*-latest", "llama3.2-latest"));
3292 assert!(!glob_match("*-latest", "llama3.2"));
3293 }
3294
3295 #[test]
3296 fn test_glob_match_middle() {
3297 assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
3298 assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
3299 }
3300
3301 #[test]
3302 fn test_glob_match_exact() {
3303 assert!(glob_match("gpt-4o", "gpt-4o"));
3304 assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
3305 }
3306
3307 #[test]
3308 fn test_infer_provider_from_defaults() {
3309 let _guard = crate::llm::env_guard();
3310 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3311 unsafe {
3312 std::env::remove_var("HARN_DEFAULT_PROVIDER");
3313 }
3314
3315 assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
3316 assert_eq!(infer_provider("gpt-4o"), "openai");
3317 assert_eq!(infer_provider("o1-preview"), "openai");
3318 assert_eq!(infer_provider("o3-mini"), "openai");
3319 assert_eq!(infer_provider("o4-mini"), "openai");
3320 assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
3321 assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
3322 assert_eq!(infer_provider("llama3.2:latest"), "ollama");
3323 assert_eq!(infer_provider("unknown-model"), "anthropic");
3324
3325 unsafe {
3326 match prev_default_provider {
3327 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3328 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3329 }
3330 }
3331 }
3332
3333 #[test]
3334 fn test_infer_provider_prefix_rules() {
3335 assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
3336 assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
3337 assert_eq!(infer_provider("local:owner/model"), "ollama");
3339 assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
3340 }
3341
3342 #[test]
3343 fn test_openrouter_inference_requires_one_slash() {
3344 let _guard = crate::llm::env_guard();
3345 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3346 unsafe {
3347 std::env::remove_var("HARN_DEFAULT_PROVIDER");
3348 }
3349
3350 assert_eq!(infer_provider("org/model"), "openrouter");
3351 assert_eq!(infer_provider("org/team/model"), "anthropic");
3352
3353 unsafe {
3354 match prev_default_provider {
3355 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3356 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3357 }
3358 }
3359 }
3360
3361 #[test]
3362 fn test_cerebras_inference_beats_openrouter_slash_fallback() {
3363 let _guard = crate::llm::env_guard();
3364 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3365 unsafe {
3366 std::env::remove_var("HARN_DEFAULT_PROVIDER");
3367 }
3368
3369 assert_eq!(infer_provider("cerebras/gpt-oss-120b"), "cerebras");
3370 assert_eq!(infer_provider("cerebras/zai-glm-4.7"), "cerebras");
3371 assert_eq!(infer_provider("cerebras/llama-3.3-70b"), "cerebras");
3372
3373 unsafe {
3374 match prev_default_provider {
3375 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3376 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3377 }
3378 }
3379 }
3380
3381 #[test]
3382 fn test_direct_catalog_model_id_resolves_to_catalog_provider() {
3383 let _guard = crate::llm::env_guard();
3388 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3389 unsafe {
3390 std::env::remove_var("HARN_DEFAULT_PROVIDER");
3391 }
3392
3393 for model in ["gpt-oss-120b", "zai-glm-4.7", "llama-3.3-70b"] {
3394 assert_eq!(
3395 infer_provider(model),
3396 "cerebras",
3397 "{model} should route to its catalog provider"
3398 );
3399 let resolved = resolve_model_info(model);
3400 assert_eq!(resolved.id, model);
3401 assert_eq!(resolved.provider, "cerebras");
3402 }
3403
3404 unsafe {
3405 match prev_default_provider {
3406 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3407 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3408 }
3409 }
3410 }
3411
3412 #[test]
3413 fn test_equivalent_model_catalog_entries_use_capability_compatible_routes() {
3414 reset_overrides();
3415
3416 assert_eq!(
3417 wire_model_id("groq/openai/gpt-oss-120b"),
3418 "openai/gpt-oss-120b"
3419 );
3420 assert_eq!(wire_model_id("gpt-oss-120b"), "gpt-oss-120b");
3421
3422 let equivalents = equivalent_model_catalog_entries("gpt-oss-120b");
3423 let ids = equivalents
3424 .iter()
3425 .map(|(id, _)| id.as_str())
3426 .collect::<Vec<_>>();
3427
3428 assert!(
3429 ids.contains(&"groq/openai/gpt-oss-120b"),
3430 "Cerebras GPT-OSS should surface the Groq serving variant"
3431 );
3432 assert!(
3433 !ids.contains(&"gpt-oss-120b"),
3434 "equivalence results should not include the source row"
3435 );
3436 assert!(equivalents.iter().all(|(_, model)| {
3437 model.equivalence_group.as_deref() == Some("openai-gpt-oss-120b")
3438 }));
3439 }
3440
3441 #[test]
3442 fn fireworks_gpt_oss_route_has_real_context_window() {
3443 reset_overrides();
3450
3451 let entry = model_catalog_entry("accounts/fireworks/models/gpt-oss-120b")
3452 .expect("Fireworks gpt-oss-120b must be in the model catalog");
3453 assert_eq!(entry.context_window, 131_072);
3454 assert_eq!(entry.provider, "fireworks");
3455 assert_eq!(
3456 entry.equivalence_group.as_deref(),
3457 Some("openai-gpt-oss-120b"),
3458 );
3459 }
3460
3461 #[test]
3462 fn test_user_catalog_overlay_re_homes_model_provider() {
3463 reset_overrides();
3467 let mut overlay = ProvidersConfig::default();
3468 overlay.models.insert(
3469 "gpt-4o".to_string(),
3470 ModelDef {
3471 name: "GPT-4o via OpenRouter".to_string(),
3472 provider: "openrouter".to_string(),
3473 context_window: 128_000,
3474 logical_model: None,
3475 equivalence_group: None,
3476 served_variant: None,
3477 wire_model: None,
3478 api_dialect: None,
3479 rate_limits: None,
3480 performance: None,
3481 architecture: None,
3482 local_memory: None,
3483 runtime_context_window: None,
3484 stream_timeout: None,
3485 capabilities: Vec::new(),
3486 pricing: None,
3487 deprecated: false,
3488 deprecation_note: None,
3489 superseded_by: None,
3490 fast_mode: None,
3491 quality_tags: Vec::new(),
3492 availability: ModelAvailability::default(),
3493 tier: None,
3494 open_weight: None,
3495 strengths: Vec::new(),
3496 benchmarks: std::collections::BTreeMap::new(),
3497 family: None,
3498 lineage: None,
3499 complementary_with: Vec::new(),
3500 avoid_as_reviewer_for: Vec::new(),
3501 },
3502 );
3503 set_user_overrides(Some(overlay));
3504
3505 assert_eq!(infer_provider("gpt-4o"), "openrouter");
3506
3507 reset_overrides();
3508 }
3509
3510 #[test]
3511 fn test_resolve_model_info_normalizes_provider_prefixes() {
3512 let local = resolve_model_info("local:gemma-4-e4b-it");
3513 assert_eq!(local.id, "gemma-4-e4b-it");
3514 assert_eq!(local.provider, "ollama");
3515
3516 let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
3517 assert_eq!(ollama.id, "qwen3:30b-a3b");
3518 assert_eq!(ollama.provider, "ollama");
3519
3520 let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
3521 assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
3522 assert_eq!(hf.provider, "huggingface");
3523
3524 let cerebras = resolve_model_info("cerebras/gpt-oss-120b");
3525 assert_eq!(cerebras.id, "gpt-oss-120b");
3526 assert_eq!(cerebras.provider, "cerebras");
3527
3528 let cerebras_glm = resolve_model_info("cerebras/zai-glm-4.7");
3529 assert_eq!(cerebras_glm.id, "zai-glm-4.7");
3530 assert_eq!(cerebras_glm.provider, "cerebras");
3531 }
3532
3533 #[test]
3534 fn test_model_tier_from_defaults() {
3535 assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
3539 assert_eq!(model_tier("gpt-4o"), "frontier");
3540 assert_eq!(model_tier("Qwen/Qwen3.5-9B"), "small");
3541 assert_eq!(model_tier("deepseek-v4-flash"), "mid");
3542 assert_eq!(model_tier("deepseek-v4-pro"), "frontier");
3543 assert_eq!(model_tier("MiniMax-M2.7"), "frontier");
3544 assert_eq!(model_tier("glm-5.1"), "frontier");
3545 assert_eq!(model_tier("definitely-not-a-real-model"), "mid");
3547 }
3548
3549 #[test]
3550 fn test_model_family_preserves_underlying_hosted_lineage() {
3551 assert_eq!(
3552 model_family("openrouter", "anthropic/claude-sonnet-4-6"),
3553 "anthropic-claude"
3554 );
3555 assert_eq!(
3556 model_family("openrouter", "google/gemini-2.5-flash"),
3557 "google-gemini"
3558 );
3559 assert_eq!(
3560 model_family("openrouter", "openai/o3-mini"),
3561 "openai-reasoning"
3562 );
3563 assert_eq!(model_lineage("openrouter", "openai/gpt-5.5"), "openai-gpt5");
3564 assert_eq!(
3565 model_lineage("openrouter", "openai/o3-mini"),
3566 "openai-reasoning"
3567 );
3568 assert_eq!(
3569 model_lineage("anthropic", "claude-opus-4-8"),
3570 "claude-opus-adaptive"
3571 );
3572 assert_eq!(model_lineage("llamacpp", "qwen3.6-35b-a3b"), "qwen3");
3573 }
3574
3575 #[test]
3576 fn test_complementary_reviewer_uses_different_family() {
3577 let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
3578 author_model: "claude-sonnet-4-6".to_string(),
3579 author_provider: None,
3580 intent: ComplementaryReviewerIntent::PlanReview,
3581 max_price_multiplier: Some(3.0),
3582 });
3583
3584 assert!(!selection.fallback, "{selection:?}");
3585 assert_eq!(selection.author.family, "anthropic-claude");
3586 assert_ne!(selection.reviewer.family, selection.author.family);
3587 assert_eq!(selection.reviewer.tier, "frontier");
3588 assert!(selection.estimated_incremental_cost.is_some());
3589 assert_eq!(selection.fallback_code, None, "{selection:?}");
3592 }
3593
3594 #[test]
3595 fn test_complementary_reviewer_falls_back_deterministically_on_price_cap() {
3596 let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
3597 author_model: "gpt-4o-mini".to_string(),
3598 author_provider: Some("openai".to_string()),
3599 intent: ComplementaryReviewerIntent::Review,
3600 max_price_multiplier: Some(0.01),
3601 });
3602
3603 assert!(selection.fallback, "{selection:?}");
3604 assert_eq!(selection.reviewer.id, "gpt-4o-mini");
3605 assert_eq!(selection.reviewer.family, selection.author.family);
3606 assert!(selection
3607 .fallback_reason
3608 .as_deref()
3609 .is_some_and(|reason| reason.contains("max_price_multiplier")));
3610 assert_eq!(
3614 selection.fallback_code.as_deref(),
3615 Some(ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code()),
3616 "{selection:?}"
3617 );
3618 assert_eq!(
3619 ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code(),
3620 "no_diff_family_within_price"
3621 );
3622 }
3623
3624 #[test]
3625 fn test_reviewer_fallback_codes_are_stable_strings() {
3626 assert_eq!(
3629 ReviewerFallbackCode::UnknownAuthorFamily.as_code(),
3630 "unknown_author_family"
3631 );
3632 assert_eq!(
3633 ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code(),
3634 "no_diff_family_within_price"
3635 );
3636 assert_eq!(
3637 ReviewerFallbackCode::NoDiffFamilyServerless.as_code(),
3638 "no_diff_family_serverless"
3639 );
3640 assert_eq!(
3641 ReviewerFallbackCode::AllDiffFamilyExcluded.as_code(),
3642 "all_diff_family_excluded"
3643 );
3644 }
3645
3646 #[test]
3647 fn test_resolve_model_unknown_alias() {
3648 let (id, provider) = resolve_model("gpt-4o");
3649 assert_eq!(id, "gpt-4o");
3650 assert!(provider.is_none());
3651 }
3652
3653 #[test]
3654 fn test_provider_names() {
3655 let names = provider_names();
3656 assert!(names.len() >= 7);
3657 assert!(names.contains(&"anthropic".to_string()));
3658 assert!(names.contains(&"together".to_string()));
3659 assert!(names.contains(&"local".to_string()));
3660 assert!(names.contains(&"mlx".to_string()));
3661 assert!(names.contains(&"openai".to_string()));
3662 assert!(names.contains(&"ollama".to_string()));
3663 assert!(names.contains(&"bedrock".to_string()));
3664 assert!(names.contains(&"azure_openai".to_string()));
3665 assert!(names.contains(&"vertex".to_string()));
3666 }
3667
3668 #[test]
3669 fn global_provider_file_is_an_overlay_on_builtin_defaults() {
3670 let mut overlay = ProvidersConfig {
3671 default_provider: Some("ollama".to_string()),
3672 ..Default::default()
3673 };
3674 overlay.aliases.insert(
3675 "quickstart".to_string(),
3676 AliasDef {
3677 id: "llama3.2".to_string(),
3678 provider: "ollama".to_string(),
3679 tool_format: None,
3680 },
3681 );
3682
3683 let merged = merge_global_config(overlay);
3684
3685 assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
3686 assert!(merged.providers.contains_key("anthropic"));
3687 assert!(merged.providers.contains_key("ollama"));
3688 assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
3689 }
3690
3691 #[test]
3692 fn partial_provider_overlay_preserves_builtin_provider_metadata() {
3693 let overlay = parse_config_toml(
3694 r#"
3695 [providers.ollama]
3696 base_url = "http://localhost:11435"
3697 extra_headers = { "x-local" = "1" }
3698 "#,
3699 )
3700 .expect("provider overlay parses");
3701
3702 let merged = merge_global_config(overlay);
3703 let ollama = merged
3704 .providers
3705 .get("ollama")
3706 .expect("ollama remains configured");
3707
3708 assert_eq!(ollama.base_url, "http://localhost:11435");
3709 assert_eq!(ollama.auth_style, "none");
3710 assert_eq!(ollama.chat_endpoint, "/api/chat");
3711 assert_eq!(ollama.completion_endpoint.as_deref(), Some("/api/generate"));
3712 assert_eq!(ollama.cost_per_1k_in, Some(0.0));
3713 assert_eq!(ollama.cost_per_1k_out, Some(0.0));
3714 assert_eq!(
3715 ollama
3716 .healthcheck
3717 .as_ref()
3718 .and_then(|healthcheck| healthcheck.path.as_deref()),
3719 Some("/api/tags")
3720 );
3721 assert_eq!(
3722 ollama.extra_headers.get("x-local").map(String::as_str),
3723 Some("1")
3724 );
3725 }
3726
3727 #[test]
3728 fn partial_provider_overlay_can_explicitly_replace_default_auth_style() {
3729 let overlay = parse_config_toml(
3730 r#"
3731 [providers.ollama]
3732 auth_style = "bearer"
3733 auth_env = "OLLAMA_API_KEY"
3734 "#,
3735 )
3736 .expect("provider overlay parses");
3737
3738 let merged = merge_global_config(overlay);
3739 let ollama = merged
3740 .providers
3741 .get("ollama")
3742 .expect("ollama remains configured");
3743
3744 assert_eq!(ollama.auth_style, "bearer");
3745 assert_eq!(auth_env_names(&ollama.auth_env), vec!["OLLAMA_API_KEY"]);
3746 assert_eq!(ollama.chat_endpoint, "/api/chat");
3747 }
3748
3749 #[test]
3750 fn test_resolve_tier_model_default_aliases() {
3751 let (model, provider) = resolve_tier_model("frontier", None)
3756 .expect("frontier alias must resolve from the embedded catalog");
3757 assert_eq!(provider, "anthropic");
3758 assert!(
3759 model_catalog_entry(&model)
3760 .is_some_and(|entry| entry.provider == "anthropic" && !entry.deprecated),
3761 "frontier alias must point at a registered, non-deprecated anthropic model (got {model})"
3762 );
3763
3764 let (model, provider) = resolve_tier_model("small", None)
3765 .expect("small alias must resolve from the embedded catalog");
3766 assert!(
3767 [
3768 "openrouter",
3769 "huggingface",
3770 "local",
3771 "llamacpp",
3772 "mlx",
3773 "ollama"
3774 ]
3775 .contains(&provider.as_str()),
3776 "small tier should resolve to an open-weight provider (got {provider} / {model})"
3777 );
3778
3779 let (model, provider) = resolve_tier_model("mid", None)
3780 .expect("mid alias must resolve from the embedded catalog");
3781 assert_eq!(provider, "openrouter");
3782 assert_eq!(model, "qwen/qwen3.6-flash");
3783 }
3784
3785 #[test]
3786 fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
3787 let (model, provider) = resolve_tier_model("mid", Some("openai"))
3791 .expect("mid tier scoped to openai must resolve");
3792 assert_eq!(provider, "openai");
3793 assert_eq!(model, "gpt-5.4-mini");
3794 assert!(
3795 model_catalog_entry(&model).is_some(),
3796 "mid/openai alias must point at a registered model (got {model})"
3797 );
3798 }
3799
3800 #[test]
3801 fn test_provider_config_anthropic() {
3802 let pdef = provider_config("anthropic").unwrap();
3803 assert_eq!(pdef.auth_style, "header");
3804 assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
3805 }
3806
3807 #[test]
3808 fn test_provider_config_mlx() {
3809 let pdef = provider_config("mlx").unwrap();
3810 assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
3811 assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
3812 assert_eq!(
3813 pdef.healthcheck.unwrap().path.as_deref(),
3814 Some("/v1/models")
3815 );
3816
3817 let (model, provider) = resolve_model("mlx-qwen36-27b");
3818 assert_eq!(model, "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit");
3819 assert_eq!(provider.as_deref(), Some("mlx"));
3820 }
3821
3822 #[test]
3823 fn test_enterprise_provider_defaults_and_inference() {
3824 let bedrock = provider_config("bedrock").unwrap();
3825 assert_eq!(bedrock.auth_style, "aws_sigv4");
3826 assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
3827 assert_eq!(
3828 infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
3829 "bedrock"
3830 );
3831 assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
3832
3833 let azure = provider_config("azure_openai").unwrap();
3834 assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
3835 assert_eq!(
3836 auth_env_names(&azure.auth_env),
3837 vec![
3838 "AZURE_OPENAI_API_KEY".to_string(),
3839 "AZURE_OPENAI_AD_TOKEN".to_string(),
3840 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
3841 ]
3842 );
3843
3844 let vertex = provider_config("vertex").unwrap();
3845 assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
3846 assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
3847 }
3848
3849 #[test]
3850 fn test_default_provider_env_override_for_unknown_model() {
3851 let _guard = crate::llm::env_guard();
3852 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3853 unsafe {
3854 std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
3855 }
3856
3857 let inference = infer_provider_detail("unknown-model");
3858
3859 unsafe {
3860 match prev_default_provider {
3861 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3862 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3863 }
3864 }
3865
3866 assert_eq!(inference.provider, "openai");
3867 assert_eq!(
3868 inference.source,
3869 crate::llm::provider::ProviderInferenceSource::DefaultFallback
3870 );
3871 }
3872
3873 #[test]
3874 fn test_unknown_model_family_ignores_default_provider_fallback() {
3875 let _guard = crate::llm::env_guard();
3876 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3877 unsafe {
3878 std::env::set_var("HARN_DEFAULT_PROVIDER", "ollama");
3879 }
3880
3881 let unknown = resolve_model_info("mystery-model-xyz");
3882 let known_family = resolve_model_info("deepseek-mystery-model");
3883
3884 unsafe {
3885 match prev_default_provider {
3886 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3887 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3888 }
3889 }
3890
3891 assert_eq!(unknown.provider, "ollama");
3892 assert_eq!(unknown.family, "unknown");
3893 assert_eq!(unknown.lineage, "unknown");
3894 assert_eq!(known_family.family, "deepseek");
3895 assert_eq!(known_family.lineage, "deepseek");
3896 }
3897
3898 #[test]
3899 fn test_resolve_base_url_no_env() {
3900 let pdef = ProviderDef {
3901 base_url: "https://example.com".to_string(),
3902 ..Default::default()
3903 };
3904 assert_eq!(resolve_base_url(&pdef), "https://example.com");
3905 }
3906
3907 #[test]
3908 fn test_default_config_roundtrip() {
3909 let config = default_config();
3910 assert!(!config.providers.is_empty());
3911 assert!(!config.inference_rules.is_empty());
3912 assert_eq!(config.tier_defaults.default, "mid");
3915 let frontiers = config
3917 .models
3918 .iter()
3919 .filter(|(_, m)| m.tier.as_deref() == Some("frontier"))
3920 .count();
3921 assert!(
3922 frontiers >= 4,
3923 "expected at least 4 frontier-tagged models, got {frontiers}"
3924 );
3925 }
3926
3927 #[test]
3928 fn test_local_ollama_catalog_metadata() {
3929 reset_overrides();
3930
3931 let devstral =
3932 model_catalog_entry("devstral-small-2:24b").expect("devstral-small-2 catalog entry");
3933 assert_eq!(devstral.context_window, 262_144);
3934 assert!(!devstral.capabilities.iter().any(|cap| cap == "vision"));
3935
3936 let gemma4 = model_catalog_entry("gemma4:26b").expect("gemma4 catalog entry");
3937 assert_eq!(gemma4.context_window, 262_144);
3938 assert!(gemma4.capabilities.iter().any(|cap| cap == "vision"));
3939 }
3940
3941 #[test]
3942 fn local_gemma4_source_tags_match_structured_capability_tags() {
3943 reset_overrides();
3944 let config = default_config();
3945 for id in [
3946 "gemma-4-e2b-it",
3947 "gemma-4-e4b-it",
3948 "gemma-4-12b-it",
3949 "gemma-4-26b-a4b-it",
3950 "gemma-4-31b-it",
3951 ] {
3952 let source = config
3953 .models
3954 .get(id)
3955 .unwrap_or_else(|| panic!("{id} should be in the embedded catalog"));
3956 let derived = effective_model_capability_tags(&source.provider, id);
3957 assert_eq!(
3958 source.capabilities, derived,
3959 "{}/{} source capabilities must match derived capability_tags",
3960 source.provider, id
3961 );
3962 }
3963 }
3964
3965 #[test]
3966 fn capability_tags_include_structured_capability_flags() {
3967 let caps = crate::llm::capabilities::Capabilities {
3968 native_tools: true,
3969 tool_search: vec!["web".to_string()],
3970 vision_supported: true,
3971 audio: true,
3972 pdf: true,
3973 video: true,
3974 files_api_supported: true,
3975 prompt_caching: true,
3976 thinking_modes: vec!["enabled".to_string()],
3977 structured_output: Some("native".to_string()),
3978 ..Default::default()
3979 };
3980
3981 assert_eq!(
3982 capability_tags_from_capabilities(&caps),
3983 vec![
3984 "streaming",
3985 "tools",
3986 "tool_search",
3987 "vision",
3988 "audio",
3989 "pdf",
3990 "video",
3991 "files",
3992 "prompt_caching",
3993 "thinking",
3994 "structured_output",
3995 ]
3996 );
3997 }
3998
3999 #[test]
4000 fn test_external_config_overlays_default_catalog() {
4001 let mut config = default_config();
4002 let mut overlay = ProvidersConfig {
4003 default_provider: Some("ollama".to_string()),
4004 ..Default::default()
4005 };
4006 overlay.providers.insert(
4007 "custom".to_string(),
4008 ProviderDef {
4009 base_url: "https://llm.example.test/v1".to_string(),
4010 chat_endpoint: "/chat/completions".to_string(),
4011 ..Default::default()
4012 },
4013 );
4014
4015 config.merge_from(&overlay);
4016
4017 assert_eq!(config.default_provider.as_deref(), Some("ollama"));
4018 assert!(config.providers.contains_key("custom"));
4019 assert!(config.providers.contains_key("anthropic"));
4020 assert!(config.providers.contains_key("ollama"));
4021 }
4022
4023 #[test]
4024 fn test_model_params_empty() {
4025 let params = model_params("claude-sonnet-4-20250514");
4026 assert!(params.is_empty());
4027 }
4028
4029 #[test]
4030 fn test_user_overrides_add_provider_and_alias() {
4031 reset_overrides();
4032 let mut overlay = ProvidersConfig::default();
4033 overlay.providers.insert(
4034 "acme".to_string(),
4035 ProviderDef {
4036 base_url: "https://llm.acme.test/v1".to_string(),
4037 chat_endpoint: "/chat/completions".to_string(),
4038 ..Default::default()
4039 },
4040 );
4041 overlay.aliases.insert(
4042 "acme-fast".to_string(),
4043 AliasDef {
4044 id: "acme/model-fast".to_string(),
4045 provider: "acme".to_string(),
4046 tool_format: Some("native".to_string()),
4047 },
4048 );
4049 set_user_overrides(Some(overlay));
4050
4051 let (model, provider) = resolve_model("acme-fast");
4052 assert_eq!(model, "acme/model-fast");
4053 assert_eq!(provider.as_deref(), Some("acme"));
4054 assert!(provider_names().contains(&"acme".to_string()));
4055 assert_eq!(
4056 provider_config("acme").map(|provider| provider.base_url),
4057 Some("https://llm.acme.test/v1".to_string())
4058 );
4059
4060 reset_overrides();
4061 }
4062
4063 #[test]
4064 fn test_default_tool_format_uses_capability_matrix() {
4065 reset_overrides();
4066
4067 assert_eq!(
4068 default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
4069 "native"
4070 );
4071 assert_eq!(
4076 default_tool_format("devstral-small-2:24b", "ollama"),
4077 "json"
4078 );
4079 assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "native");
4083 assert_eq!(
4086 default_tool_format("deepseek/deepseek-v3.2", "openrouter"),
4087 "text"
4088 );
4089 assert_eq!(
4090 default_tool_format("qwen/qwen3-coder-flash", "openrouter"),
4091 "text"
4092 );
4093 assert_eq!(
4094 default_tool_format("qwen/qwen3.6-flash", "openrouter"),
4095 "native"
4096 );
4097 assert_eq!(default_tool_format("z-ai/glm-5.2", "openrouter"), "text");
4098 assert_eq!(
4104 default_tool_format("openai/gpt-oss-120b", "openrouter"),
4105 "text"
4106 );
4107 assert_eq!(
4108 default_tool_format("accounts/fireworks/models/gpt-oss-120b", "fireworks"),
4109 "text"
4110 );
4111 assert_eq!(default_tool_format("gpt-oss-120b", "cerebras"), "native");
4112 assert_eq!(
4113 default_tool_format("openai/gpt-oss-120b", "deepinfra"),
4114 "text"
4115 );
4116 assert_eq!(default_tool_format("openai/gpt-oss-120b", "groq"), "native");
4117 }
4118
4119 #[test]
4120 fn test_default_tool_format_unpinned_text_channel_is_json() {
4121 reset_overrides();
4122
4123 assert_eq!(default_tool_format("mystery-model-xyz", "ollama"), "json");
4129 }
4130
4131 #[test]
4132 fn test_claude_family_defaults_native_without_host_pin() {
4133 reset_overrides();
4134
4135 for (model, provider) in [
4142 ("claude-sonnet-4-6", "anthropic"),
4143 ("claude-sonnet-5", "anthropic"),
4144 ("anthropic/claude-nova-1", "anthropic"),
4145 ("anthropic/claude-sonnet-4.6", "openrouter"),
4146 ("anthropic/claude-sonnet-5", "openrouter"),
4147 ("anthropic/claude-opus-4-5-20251101", "openrouter"),
4148 ("anthropic/claude-sonnet-next", "openrouter"),
4149 ("anthropic/claude-nova-1", "openrouter"),
4150 ("anthropic.claude-sonnet-4-6", "bedrock"),
4151 ] {
4152 assert_eq!(
4153 default_tool_format(model, provider),
4154 "native",
4155 "{provider}:{model} must default native without a host pin"
4156 );
4157 }
4158
4159 let overlay = parse_config_toml(
4163 "[aliases.probe-sonnet]\nid = \"claude-sonnet-4-6\"\nprovider = \"anthropic\"\n",
4164 )
4165 .expect("overlay parses");
4166 set_user_overrides(Some(overlay));
4167 let resolved = resolve_model_info("probe-sonnet");
4168 assert_eq!(resolved.provider, "anthropic");
4169 assert_eq!(
4170 resolved.tool_format, "native",
4171 "an unpinned claude alias must inherit the family-level native default"
4172 );
4173 clear_user_overrides();
4174
4175 let overlay = parse_config_toml(
4179 "[aliases.probe-sonnet-json]\nid = \"claude-sonnet-4-6\"\nprovider = \"anthropic\"\ntool_format = \"json\"\n",
4180 )
4181 .expect("overlay parses");
4182 set_user_overrides(Some(overlay));
4183 let pinned = resolve_model_info("probe-sonnet-json");
4184 assert_eq!(
4185 pinned.tool_format, "json",
4186 "an explicit host pin must win over the claude family default"
4187 );
4188 clear_user_overrides();
4189
4190 assert_eq!(
4193 default_tool_format("mystery-model-xyz", "openrouter"),
4194 "json"
4195 );
4196 }
4197
4198 #[test]
4199 fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
4200 reset_overrides();
4201 let mut overlay = ProvidersConfig::default();
4202 overlay.models.insert(
4203 "acme/model-fast".to_string(),
4204 ModelDef {
4205 name: "Acme Fast".to_string(),
4206 provider: "acme".to_string(),
4207 context_window: 65_536,
4208 logical_model: None,
4209 equivalence_group: None,
4210 served_variant: None,
4211 wire_model: None,
4212 api_dialect: None,
4213 rate_limits: None,
4214 performance: None,
4215 architecture: None,
4216 local_memory: None,
4217 runtime_context_window: None,
4218 stream_timeout: Some(42.0),
4219 capabilities: vec!["tools".to_string(), "streaming".to_string()],
4220 pricing: Some(ModelPricing {
4221 input_per_mtok: 1.25,
4222 output_per_mtok: 2.5,
4223 cache_read_per_mtok: Some(0.25),
4224 cache_write_per_mtok: None,
4225 }),
4226 deprecated: false,
4227 deprecation_note: None,
4228 superseded_by: None,
4229 fast_mode: None,
4230 quality_tags: Vec::new(),
4231 availability: ModelAvailability::default(),
4232 tier: None,
4233 open_weight: None,
4234 strengths: Vec::new(),
4235 benchmarks: std::collections::BTreeMap::new(),
4236 family: None,
4237 lineage: None,
4238 complementary_with: Vec::new(),
4239 avoid_as_reviewer_for: Vec::new(),
4240 },
4241 );
4242 overlay
4243 .qc_defaults
4244 .insert("acme".to_string(), "acme/model-cheap".to_string());
4245 set_user_overrides(Some(overlay));
4246
4247 let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
4248 assert_eq!(entry.context_window, 65_536);
4249 assert_eq!(
4250 entry.capabilities,
4251 vec!["streaming".to_string(), "tools".to_string()]
4252 );
4253 assert_eq!(
4254 entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
4255 Some(1.25)
4256 );
4257 assert_eq!(
4258 pricing_per_1k_for("acme", "acme/model-fast"),
4259 Some((0.00125, 0.0025))
4260 );
4261 assert_eq!(
4262 qc_default_model("acme").as_deref(),
4263 Some("acme/model-cheap")
4264 );
4265
4266 reset_overrides();
4267 }
4268
4269 #[test]
4270 fn test_user_overrides_prepend_inference_rules() {
4271 reset_overrides();
4272 let mut overlay = ProvidersConfig::default();
4273 overlay.inference_rules.push(InferenceRule {
4274 pattern: Some("internal-*".to_string()),
4275 contains: None,
4276 exact: None,
4277 provider: "openai".to_string(),
4278 });
4279 set_user_overrides(Some(overlay));
4280
4281 assert_eq!(infer_provider("internal-foo"), "openai");
4282
4283 reset_overrides();
4284 }
4285
4286 #[test]
4293 fn embedded_providers_toml_parses_and_is_not_trivially_empty() {
4294 let config = default_config();
4295 assert!(
4296 config.providers.len() >= 10,
4297 "expected >=10 providers in embedded catalog, got {}",
4298 config.providers.len()
4299 );
4300 assert!(
4301 config.models.len() >= 20,
4302 "expected >=20 models in embedded catalog, got {}",
4303 config.models.len()
4304 );
4305 assert!(
4306 config.aliases.len() >= 15,
4307 "expected >=15 aliases in embedded catalog, got {}",
4308 config.aliases.len()
4309 );
4310 assert_eq!(config.default_provider.as_deref(), Some("anthropic"));
4311 }
4312
4313 #[test]
4314 fn embedded_catalog_every_deprecated_model_has_a_note() {
4315 let config = default_config();
4316 let offenders: Vec<&str> = config
4317 .models
4318 .iter()
4319 .filter(|(_, model)| {
4320 model.deprecated
4321 && model
4322 .deprecation_note
4323 .as_deref()
4324 .unwrap_or("")
4325 .trim()
4326 .is_empty()
4327 })
4328 .map(|(id, _)| id.as_str())
4329 .collect();
4330 assert!(
4331 offenders.is_empty(),
4332 "deprecated models missing a deprecation_note: {offenders:?}"
4333 );
4334 }
4335
4336 #[test]
4337 fn embedded_cerebras_catalog_separates_public_and_dedicated_routes() {
4338 let config = default_config();
4339 for id in ["gpt-oss-120b", "zai-glm-4.7"] {
4340 let model = config.models.get(id).expect("current public Cerebras row");
4341 assert_eq!(model.provider, "cerebras");
4342 assert_eq!(model.availability, ModelAvailability::Serverless);
4343 assert!(!model.deprecated);
4344 }
4345
4346 let llama = config
4347 .models
4348 .get("llama-3.3-70b")
4349 .expect("legacy Cerebras row");
4350 assert_eq!(llama.provider, "cerebras");
4351 assert_eq!(llama.availability, ModelAvailability::Dedicated);
4352 assert!(llama.deprecated);
4353 }
4354
4355 #[test]
4356 fn embedded_openrouter_gpt_oss_120b_has_no_fragment_bleed() {
4357 let config = default_config();
4365 let model = config
4366 .models
4367 .get("openai/gpt-oss-120b")
4368 .expect("openrouter gpt-oss-120b row");
4369 assert_eq!(model.provider, "openrouter");
4370 assert_eq!(
4371 model.open_weight,
4372 Some(true),
4373 "gpt-oss-120b is Apache-2.0 open weight, not the bled-in open_weight=false"
4374 );
4375 assert!(
4376 !model.strengths.iter().any(|s| s == "vision"),
4377 "gpt-oss-120b is text-only; the bled-in `vision` strength must be gone: {:?}",
4378 model.strengths
4379 );
4380 assert!(
4381 !model.strengths.is_empty(),
4382 "gpt-oss-120b must carry its own strengths, not None"
4383 );
4384
4385 let group_tiers: std::collections::BTreeSet<_> = config
4388 .models
4389 .values()
4390 .filter(|m| {
4391 m.equivalence_group.as_deref() == Some("openai-gpt-oss-120b") && !m.deprecated
4392 })
4393 .map(|m| m.tier.clone())
4394 .collect();
4395 assert_eq!(
4396 group_tiers.len(),
4397 1,
4398 "openai-gpt-oss-120b group must share one tier, got {group_tiers:?}"
4399 );
4400 }
4401
4402 #[test]
4403 fn embedded_catalog_every_model_targets_a_registered_provider() {
4404 let config = default_config();
4405 let known: std::collections::BTreeSet<&str> =
4406 config.providers.keys().map(String::as_str).collect();
4407 let orphans: Vec<(&str, &str)> = config
4408 .models
4409 .iter()
4410 .filter(|(_, model)| !known.contains(model.provider.as_str()))
4411 .map(|(id, model)| (id.as_str(), model.provider.as_str()))
4412 .collect();
4413 assert!(
4414 orphans.is_empty(),
4415 "models reference unknown providers: {orphans:?}"
4416 );
4417 }
4418
4419 #[test]
4420 fn embedded_catalog_every_alias_targets_a_registered_provider() {
4421 let config = default_config();
4422 let known: std::collections::BTreeSet<&str> =
4423 config.providers.keys().map(String::as_str).collect();
4424 let orphans: Vec<(&str, &str)> = config
4425 .aliases
4426 .iter()
4427 .filter(|(_, alias)| !known.contains(alias.provider.as_str()))
4428 .map(|(name, alias)| (name.as_str(), alias.provider.as_str()))
4429 .collect();
4430 assert!(
4431 orphans.is_empty(),
4432 "aliases reference unknown providers: {orphans:?}"
4433 );
4434 }
4435
4436 #[test]
4437 fn embedded_catalog_every_qc_default_targets_a_known_model() {
4438 let config = default_config();
4439 let orphans: Vec<(&str, &str)> = config
4440 .qc_defaults
4441 .iter()
4442 .filter(|(_, model_id)| !config.models.contains_key(model_id.as_str()))
4443 .map(|(provider, model_id)| (provider.as_str(), model_id.as_str()))
4444 .collect();
4445 assert!(
4446 orphans.is_empty(),
4447 "qc_defaults reference unknown models: {orphans:?}"
4448 );
4449 }
4450
4451 #[test]
4452 fn embedded_catalog_pricing_rates_are_non_negative() {
4453 let config = default_config();
4454 for (id, model) in &config.models {
4455 let Some(pricing) = &model.pricing else {
4456 continue;
4457 };
4458 assert!(
4459 pricing.input_per_mtok >= 0.0 && pricing.output_per_mtok >= 0.0,
4460 "{id}: negative pricing — in={} out={}",
4461 pricing.input_per_mtok,
4462 pricing.output_per_mtok
4463 );
4464 if let Some(rate) = pricing.cache_read_per_mtok {
4465 assert!(rate >= 0.0, "{id}: negative cache_read rate {rate}");
4466 }
4467 if let Some(rate) = pricing.cache_write_per_mtok {
4468 assert!(rate >= 0.0, "{id}: negative cache_write rate {rate}");
4469 }
4470 }
4471 }
4472
4473 #[test]
4474 fn model_availability_parses_known_strings() {
4475 assert_eq!(
4476 ModelAvailability::parse("serverless"),
4477 Some(ModelAvailability::Serverless)
4478 );
4479 assert_eq!(
4480 ModelAvailability::parse("dedicated"),
4481 Some(ModelAvailability::Dedicated)
4482 );
4483 assert_eq!(
4484 ModelAvailability::parse("unknown"),
4485 Some(ModelAvailability::Unknown)
4486 );
4487 assert_eq!(ModelAvailability::parse("provisioned"), None);
4488 for value in [
4489 ModelAvailability::Serverless,
4490 ModelAvailability::Dedicated,
4491 ModelAvailability::Unknown,
4492 ] {
4493 assert_eq!(ModelAvailability::parse(value.as_str()), Some(value));
4494 }
4495 }
4496
4497 #[test]
4498 fn embedded_catalog_marks_together_dedicated_route_as_dedicated() {
4499 let config = default_config();
4500 let model = config
4501 .models
4502 .get("Qwen/Qwen3-Coder-Next-FP8")
4503 .expect("Together Qwen3 Coder Next FP8 is cataloged");
4504 assert_eq!(model.provider, "together");
4505 assert_eq!(model.availability, ModelAvailability::Dedicated);
4506 }
4507
4508 #[test]
4509 fn embedded_catalog_dedicated_models_are_not_targeted_by_tier_aliases() {
4510 let config = default_config();
4514 let dedicated: std::collections::BTreeSet<(&str, &str)> = config
4515 .models
4516 .iter()
4517 .filter(|(_, model)| model.availability == ModelAvailability::Dedicated)
4518 .map(|(id, model)| (model.provider.as_str(), id.as_str()))
4519 .collect();
4520 for (name, alias) in &config.aliases {
4521 if matches!(
4522 name.as_str(),
4523 "frontier"
4524 | "mid"
4525 | "small"
4526 | "tier/frontier"
4527 | "tier/mid"
4528 | "tier/small"
4529 | "sonnet"
4530 | "opus"
4531 | "haiku"
4532 ) {
4533 assert!(
4534 !dedicated.contains(&(alias.provider.as_str(), alias.id.as_str())),
4535 "tier alias `{name}` targets dedicated-only route `{}/{}`",
4536 alias.provider,
4537 alias.id,
4538 );
4539 }
4540 }
4541 }
4542
4543 #[test]
4544 fn embedded_catalog_tier_aliases_resolve_to_active_models() {
4545 for alias in ["frontier", "mid", "small"] {
4549 let (model, _provider) = resolve_tier_model(alias, None)
4550 .unwrap_or_else(|| panic!("tier alias `{alias}` must resolve"));
4551 let entry = model_catalog_entry(&model).unwrap_or_else(|| {
4552 panic!("tier alias `{alias}` -> `{model}` must be a registered catalog entry")
4553 });
4554 assert!(
4555 !entry.deprecated,
4556 "tier alias `{alias}` resolves to deprecated model `{model}` ({:?})",
4557 entry.deprecation_note
4558 );
4559 }
4560 }
4561
4562 #[test]
4563 fn opus_alias_tracks_claude_opus_4_8_with_fast_mode() {
4564 let (model, provider) = resolve_model("opus");
4567 assert_eq!(model, "claude-opus-4-8");
4568 assert_eq!(provider.as_deref(), Some("anthropic"));
4569
4570 let opus48 = model_catalog_entry("claude-opus-4-8").expect("opus 4.8 catalog entry");
4571 assert!(!opus48.deprecated, "newest Opus must not be deprecated");
4572 let fast = opus48.fast_mode.expect("opus 4.8 advertises fast mode");
4573 assert_eq!(fast.param, "speed");
4574 assert_eq!(fast.value, "fast");
4575 assert_eq!(fast.status.as_deref(), Some("research_preview"));
4576 let fast_pricing = fast.pricing.expect("fast mode carries premium pricing");
4577 let standard = opus48.pricing.expect("opus 4.8 standard pricing");
4578 assert!(
4579 fast_pricing.input_per_mtok > standard.input_per_mtok,
4580 "fast mode must be premium-priced relative to standard"
4581 );
4582 }
4583
4584 #[test]
4585 fn superseded_opus_models_point_at_claude_opus_4_8() {
4586 for model in ["claude-opus-4-7", "claude-opus-4-6"] {
4589 let entry =
4590 model_catalog_entry(model).unwrap_or_else(|| panic!("{model} catalog entry"));
4591 assert!(entry.deprecated, "{model} should be deprecated");
4592 assert_eq!(
4593 entry.superseded_by.as_deref(),
4594 Some("claude-opus-4-8"),
4595 "{model} should be superseded by claude-opus-4-8"
4596 );
4597 }
4598 }
4599
4600 #[test]
4601 fn opus_46_no_longer_advertises_fast_mode() {
4602 let opus46 = model_catalog_entry("claude-opus-4-6").expect("opus 4.6 catalog entry");
4603 assert!(
4604 opus46.fast_mode.is_none(),
4605 "Anthropic removed Opus 4.6 fast mode on 2026-06-29; Harn should not advertise it"
4606 );
4607
4608 let opus47 = model_catalog_entry("claude-opus-4-7").expect("opus 4.7 catalog entry");
4609 assert!(
4610 opus47.fast_mode.is_some(),
4611 "Opus 4.7 still advertises its own fast-mode tier"
4612 );
4613 }
4614
4615 #[test]
4616 fn gpt_5_5_fast_mode_rides_service_tier() {
4617 let entry = model_catalog_entry("gpt-5.5").expect("gpt-5.5 catalog entry");
4620 let fast = entry.fast_mode.expect("gpt-5.5 advertises a fast tier");
4621 assert_eq!(fast.param, "service_tier");
4622 assert_eq!(fast.status.as_deref(), Some("ga"));
4623 }
4624}