1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::{BTreeMap, BTreeSet};
4use std::sync::atomic::{AtomicBool, Ordering};
5use std::sync::{OnceLock, RwLock};
6
7static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
8static CONFIG_PATH: OnceLock<String> = OnceLock::new();
9static RUNTIME_CATALOG_OVERLAY: OnceLock<RwLock<Option<ProvidersConfig>>> = OnceLock::new();
10
11thread_local! {
12 static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
17}
18
19#[derive(Debug, Clone, Deserialize, Default)]
20pub struct ProvidersConfig {
21 #[serde(default)]
22 pub default_provider: Option<String>,
23 #[serde(default)]
24 pub providers: BTreeMap<String, ProviderDef>,
25 #[serde(default)]
26 pub aliases: BTreeMap<String, AliasDef>,
27 #[serde(default)]
28 pub alias_tool_calling: BTreeMap<String, AliasToolCallingDef>,
29 #[serde(default)]
30 pub models: BTreeMap<String, ModelDef>,
31 #[serde(default)]
32 pub qc_defaults: BTreeMap<String, String>,
33 #[serde(default)]
34 pub inference_rules: Vec<InferenceRule>,
35 #[serde(default)]
36 pub tier_rules: Vec<TierRule>,
37 #[serde(default)]
38 pub tier_defaults: TierDefaults,
39 #[serde(default)]
40 pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
41 #[serde(default)]
42 pub model_roles: BTreeMap<String, BTreeMap<String, toml::Value>>,
43 #[serde(default)]
44 pub suppress: SuppressDef,
45 #[serde(default)]
46 pub patch: PatchDef,
47}
48
49#[derive(Debug, Clone, Deserialize, Default, PartialEq)]
82pub struct PatchDef {
83 #[serde(default)]
86 pub models: BTreeMap<String, toml::Value>,
87}
88
89#[derive(Debug, Clone, Deserialize, Default, PartialEq, Eq)]
104pub struct SuppressDef {
105 #[serde(default)]
110 pub routes: Vec<String>,
111}
112
113impl ProvidersConfig {
114 pub fn is_empty(&self) -> bool {
115 self.default_provider.is_none()
116 && self.providers.is_empty()
117 && self.aliases.is_empty()
118 && self.alias_tool_calling.is_empty()
119 && self.models.is_empty()
120 && self.qc_defaults.is_empty()
121 && self.inference_rules.is_empty()
122 && self.tier_rules.is_empty()
123 && self.model_defaults.is_empty()
124 && self.model_roles.is_empty()
125 && self.suppress.routes.is_empty()
126 && self.patch.models.is_empty()
127 && self.tier_defaults.default == default_mid()
128 }
129
130 pub fn dangling_model_patches(&self) -> Vec<&str> {
136 self.patch
137 .models
138 .keys()
139 .filter(|id| !self.models.contains_key(*id))
140 .map(String::as_str)
141 .collect()
142 }
143
144 pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
145 for (name, provider) in &overlay.providers {
146 match self.providers.get_mut(name) {
147 Some(existing) => existing.merge_from(provider),
148 None => {
149 self.providers.insert(name.clone(), provider.clone());
150 }
151 }
152 }
153 self.aliases.extend(overlay.aliases.clone());
154 self.alias_tool_calling
155 .extend(overlay.alias_tool_calling.clone());
156 self.models.extend(overlay.models.clone());
157 self.qc_defaults.extend(overlay.qc_defaults.clone());
158
159 if !overlay.patch.models.is_empty() || !self.patch.models.is_empty() {
174 for (id, patch) in &overlay.patch.models {
175 match self.patch.models.get_mut(id) {
176 Some(existing) => deep_merge_toml(existing, patch),
177 None => {
178 self.patch.models.insert(id.clone(), patch.clone());
179 }
180 }
181 }
182 apply_model_patches(&mut self.models, &self.patch.models);
183 }
184
185 if overlay.default_provider.is_some() {
186 self.default_provider = overlay.default_provider.clone();
187 }
188
189 if !overlay.inference_rules.is_empty() {
190 let mut merged = overlay.inference_rules.clone();
191 merged.extend(self.inference_rules.clone());
192 self.inference_rules = merged;
193 }
194
195 if !overlay.tier_rules.is_empty() {
196 let mut merged = overlay.tier_rules.clone();
197 merged.extend(self.tier_rules.clone());
198 self.tier_rules = merged;
199 }
200
201 if overlay.tier_defaults.default != default_mid() {
202 self.tier_defaults = overlay.tier_defaults.clone();
203 }
204
205 for (pattern, defaults) in &overlay.model_defaults {
206 self.model_defaults
207 .entry(pattern.clone())
208 .or_default()
209 .extend(defaults.clone());
210 }
211
212 for (role, defaults) in &overlay.model_roles {
213 self.model_roles
214 .entry(role.clone())
215 .or_default()
216 .extend(defaults.clone());
217 }
218
219 for route in &overlay.suppress.routes {
220 if !self.suppress.routes.contains(route) {
221 self.suppress.routes.push(route.clone());
222 }
223 }
224 }
225}
226
227fn deep_merge_toml(base: &mut toml::Value, overlay: &toml::Value) {
233 match (base, overlay) {
234 (toml::Value::Table(base_table), toml::Value::Table(overlay_table)) => {
235 for (key, overlay_value) in overlay_table {
236 match base_table.get_mut(key) {
237 Some(base_value) => deep_merge_toml(base_value, overlay_value),
238 None => {
239 base_table.insert(key.clone(), overlay_value.clone());
240 }
241 }
242 }
243 }
244 (base_slot, overlay_value) => *base_slot = overlay_value.clone(),
245 }
246}
247
248static MODEL_PATCH_TYPE_ERROR_WARNED: AtomicBool = AtomicBool::new(false);
252
253fn apply_model_patches(
263 models: &mut BTreeMap<String, ModelDef>,
264 patches: &BTreeMap<String, toml::Value>,
265) {
266 for (id, patch) in patches {
267 let Some(base) = models.get(id) else {
268 continue;
269 };
270 match patched_model_row(base, patch) {
271 Ok(patched) => {
272 models.insert(id.clone(), patched);
273 }
274 Err(error) => {
275 if !MODEL_PATCH_TYPE_ERROR_WARNED.swap(true, Ordering::Relaxed) {
276 eprintln!(
277 "[llm_config] invalid [patch.models.\"{id}\"] overlay \
278 (keeping the unpatched row): {error}"
279 );
280 }
281 }
282 }
283 }
284}
285
286fn patched_model_row(base: &ModelDef, patch: &toml::Value) -> Result<ModelDef, String> {
289 let mut value = toml::Value::try_from(base)
290 .map_err(|error| format!("serialize base row for patching: {error}"))?;
291 deep_merge_toml(&mut value, patch);
292 ModelDef::deserialize(value).map_err(|error| error.to_string())
293}
294
295#[derive(Debug, Clone)]
296pub struct ProviderDef {
297 pub display_name: Option<String>,
298 pub icon: Option<String>,
299 pub protocol: Option<String>,
303 pub base_url: String,
304 pub base_url_env: Option<String>,
305 pub auth_style: String,
306 pub auth_header: Option<String>,
307 pub auth_env: AuthEnv,
308 pub extra_headers: BTreeMap<String, String>,
309 pub chat_endpoint: String,
310 pub completion_endpoint: Option<String>,
311 pub command: Option<String>,
312 pub args: Vec<String>,
313 pub env: BTreeMap<String, String>,
314 pub cwd: Option<String>,
315 pub mcp_servers: Vec<serde_json::Value>,
316 pub healthcheck: Option<HealthcheckDef>,
317 pub local_runtime: Option<LocalRuntimeDef>,
321 pub features: Vec<String>,
322 pub fallback: Option<String>,
324 pub retry_count: Option<u32>,
326 pub retry_delay_ms: Option<u64>,
328 pub rpm: Option<u32>,
330 pub rate_limits: Option<RateLimitsDef>,
334 pub cost_per_1k_in: Option<f64>,
336 pub cost_per_1k_out: Option<f64>,
338 pub latency_p50_ms: Option<u64>,
340 pub performance: Option<ServingPerformanceDef>,
342 #[doc(hidden)]
343 pub auth_style_explicit: bool,
344}
345
346#[derive(Debug, Clone, Deserialize)]
347struct ProviderDefWire {
348 #[serde(default)]
349 display_name: Option<String>,
350 #[serde(default)]
351 icon: Option<String>,
352 #[serde(default)]
353 protocol: Option<String>,
354 #[serde(default)]
355 base_url: String,
356 #[serde(default)]
357 base_url_env: Option<String>,
358 #[serde(default)]
359 auth_style: Option<String>,
360 #[serde(default)]
361 auth_header: Option<String>,
362 #[serde(default)]
363 auth_env: AuthEnv,
364 #[serde(default)]
365 extra_headers: BTreeMap<String, String>,
366 #[serde(default)]
367 chat_endpoint: String,
368 #[serde(default)]
369 completion_endpoint: Option<String>,
370 #[serde(default)]
371 command: Option<String>,
372 #[serde(default)]
373 args: Vec<String>,
374 #[serde(default)]
375 env: BTreeMap<String, String>,
376 #[serde(default)]
377 cwd: Option<String>,
378 #[serde(default)]
379 mcp_servers: Vec<serde_json::Value>,
380 #[serde(default)]
381 healthcheck: Option<HealthcheckDef>,
382 #[serde(default)]
383 local_runtime: Option<LocalRuntimeDef>,
384 #[serde(default)]
385 features: Vec<String>,
386 #[serde(default)]
387 fallback: Option<String>,
388 #[serde(default)]
389 retry_count: Option<u32>,
390 #[serde(default)]
391 retry_delay_ms: Option<u64>,
392 #[serde(default)]
393 rpm: Option<u32>,
394 #[serde(default)]
395 rate_limits: Option<RateLimitsDef>,
396 #[serde(default)]
397 cost_per_1k_in: Option<f64>,
398 #[serde(default)]
399 cost_per_1k_out: Option<f64>,
400 #[serde(default)]
401 latency_p50_ms: Option<u64>,
402 #[serde(default)]
403 performance: Option<ServingPerformanceDef>,
404}
405
406impl<'de> Deserialize<'de> for ProviderDef {
407 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
408 where
409 D: serde::Deserializer<'de>,
410 {
411 let wire = ProviderDefWire::deserialize(deserializer)?;
412 let auth_style_explicit = wire.auth_style.is_some();
413 Ok(Self {
414 display_name: wire.display_name,
415 icon: wire.icon,
416 protocol: wire.protocol,
417 base_url: wire.base_url,
418 base_url_env: wire.base_url_env,
419 auth_style: wire.auth_style.unwrap_or_else(default_bearer),
420 auth_header: wire.auth_header,
421 auth_env: wire.auth_env,
422 extra_headers: wire.extra_headers,
423 chat_endpoint: wire.chat_endpoint,
424 completion_endpoint: wire.completion_endpoint,
425 command: wire.command,
426 args: wire.args,
427 env: wire.env,
428 cwd: wire.cwd,
429 mcp_servers: wire.mcp_servers,
430 healthcheck: wire.healthcheck,
431 local_runtime: wire.local_runtime,
432 features: wire.features,
433 fallback: wire.fallback,
434 retry_count: wire.retry_count,
435 retry_delay_ms: wire.retry_delay_ms,
436 rpm: wire.rpm,
437 rate_limits: wire.rate_limits,
438 cost_per_1k_in: wire.cost_per_1k_in,
439 cost_per_1k_out: wire.cost_per_1k_out,
440 latency_p50_ms: wire.latency_p50_ms,
441 performance: wire.performance,
442 auth_style_explicit,
443 })
444 }
445}
446
447impl Default for ProviderDef {
448 fn default() -> Self {
449 Self {
450 display_name: None,
451 icon: None,
452 protocol: None,
453 base_url: String::new(),
454 base_url_env: None,
455 auth_style: default_bearer(),
456 auth_header: None,
457 auth_env: AuthEnv::None,
458 extra_headers: BTreeMap::new(),
459 chat_endpoint: String::new(),
460 completion_endpoint: None,
461 command: None,
462 args: Vec::new(),
463 env: BTreeMap::new(),
464 cwd: None,
465 mcp_servers: Vec::new(),
466 healthcheck: None,
467 local_runtime: None,
468 features: Vec::new(),
469 fallback: None,
470 retry_count: None,
471 retry_delay_ms: None,
472 rpm: None,
473 rate_limits: None,
474 cost_per_1k_in: None,
475 cost_per_1k_out: None,
476 latency_p50_ms: None,
477 performance: None,
478 auth_style_explicit: false,
479 }
480 }
481}
482
483impl ProviderDef {
484 fn merge_from(&mut self, overlay: &ProviderDef) {
485 merge_option(&mut self.display_name, &overlay.display_name);
486 merge_option(&mut self.icon, &overlay.icon);
487 merge_option(&mut self.protocol, &overlay.protocol);
488 merge_string(&mut self.base_url, &overlay.base_url);
489 merge_option(&mut self.base_url_env, &overlay.base_url_env);
490 let overlay_uses_default_auth_style = overlay.auth_style == default_bearer();
491 if overlay.auth_style_explicit
492 || !overlay_uses_default_auth_style
493 || self.auth_style == default_bearer()
494 {
495 self.auth_style = overlay.auth_style.clone();
496 self.auth_style_explicit |=
497 overlay.auth_style_explicit || !overlay_uses_default_auth_style;
498 }
499 merge_option(&mut self.auth_header, &overlay.auth_header);
500 if !overlay.auth_env.is_none() {
501 self.auth_env = overlay.auth_env.clone();
502 }
503 self.extra_headers.extend(overlay.extra_headers.clone());
504 merge_string(&mut self.chat_endpoint, &overlay.chat_endpoint);
505 merge_option(&mut self.completion_endpoint, &overlay.completion_endpoint);
506 merge_option(&mut self.command, &overlay.command);
507 merge_vec(&mut self.args, &overlay.args);
508 self.env.extend(overlay.env.clone());
509 merge_option(&mut self.cwd, &overlay.cwd);
510 merge_vec(&mut self.mcp_servers, &overlay.mcp_servers);
511 merge_option(&mut self.healthcheck, &overlay.healthcheck);
512 merge_option(&mut self.local_runtime, &overlay.local_runtime);
513 merge_vec(&mut self.features, &overlay.features);
514 merge_option(&mut self.fallback, &overlay.fallback);
515 merge_option(&mut self.retry_count, &overlay.retry_count);
516 merge_option(&mut self.retry_delay_ms, &overlay.retry_delay_ms);
517 merge_option(&mut self.rpm, &overlay.rpm);
518 merge_option(&mut self.rate_limits, &overlay.rate_limits);
519 merge_option(&mut self.cost_per_1k_in, &overlay.cost_per_1k_in);
520 merge_option(&mut self.cost_per_1k_out, &overlay.cost_per_1k_out);
521 merge_option(&mut self.latency_p50_ms, &overlay.latency_p50_ms);
522 merge_option(&mut self.performance, &overlay.performance);
523 }
524}
525
526fn merge_option<T: Clone>(base: &mut Option<T>, overlay: &Option<T>) {
527 if overlay.is_some() {
528 *base = overlay.clone();
529 }
530}
531
532fn merge_string(base: &mut String, overlay: &str) {
533 if !overlay.is_empty() {
534 *base = overlay.to_string();
535 }
536}
537
538fn merge_vec<T: Clone>(base: &mut Vec<T>, overlay: &[T]) {
539 if !overlay.is_empty() {
540 *base = overlay.to_vec();
541 }
542}
543
544fn default_bearer() -> String {
545 "bearer".to_string()
546}
547
548#[derive(Debug, Clone, Deserialize, Default)]
551#[serde(untagged)]
552pub enum AuthEnv {
553 #[default]
554 None,
555 Single(String),
556 Multiple(Vec<String>),
557}
558
559impl AuthEnv {
560 fn is_none(&self) -> bool {
561 matches!(self, AuthEnv::None)
562 }
563}
564
565#[derive(Debug, Clone, Deserialize)]
566pub struct HealthcheckDef {
567 pub method: String,
568 #[serde(default)]
569 pub path: Option<String>,
570 #[serde(default)]
571 pub url: Option<String>,
572 #[serde(default)]
573 pub body: Option<String>,
574}
575
576#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
577pub struct LocalRuntimeDef {
578 #[serde(default, skip_serializing_if = "Option::is_none")]
581 pub kind: Option<String>,
582 #[serde(default, skip_serializing_if = "Option::is_none")]
584 pub command: Option<String>,
585 #[serde(default, skip_serializing_if = "Vec::is_empty")]
588 pub prefix_args: Vec<String>,
589 #[serde(default, skip_serializing_if = "Option::is_none")]
592 pub model_source: Option<String>,
593 #[serde(default, skip_serializing_if = "Option::is_none")]
595 pub model_source_env: Option<String>,
596 #[serde(default, skip_serializing_if = "Option::is_none")]
598 pub default_port: Option<u16>,
599 #[serde(default, skip_serializing_if = "Option::is_none")]
601 pub model_arg: Option<String>,
602 #[serde(default, skip_serializing_if = "Option::is_none")]
603 pub served_model_arg: Option<String>,
604 #[serde(default, skip_serializing_if = "Option::is_none")]
605 pub host_arg: Option<String>,
606 #[serde(default, skip_serializing_if = "Option::is_none")]
607 pub port_arg: Option<String>,
608 #[serde(default, skip_serializing_if = "Option::is_none")]
609 pub ctx_arg: Option<String>,
610 #[serde(default, skip_serializing_if = "Option::is_none")]
611 pub parallel_arg: Option<String>,
612 #[serde(default, skip_serializing_if = "Option::is_none")]
613 pub gpu_layers_arg: Option<String>,
614 #[serde(default, skip_serializing_if = "Option::is_none")]
615 pub cache_type_k_arg: Option<String>,
616 #[serde(default, skip_serializing_if = "Option::is_none")]
617 pub cache_type_v_arg: Option<String>,
618 #[serde(default, skip_serializing_if = "Option::is_none")]
619 pub cache_ram_arg: Option<String>,
620 #[serde(default, skip_serializing_if = "Option::is_none")]
622 pub enable_lora_arg: Option<String>,
623 #[serde(default, skip_serializing_if = "Option::is_none")]
625 pub lora_modules_arg: Option<String>,
626 #[serde(default, skip_serializing_if = "Option::is_none")]
628 pub max_lora_rank_arg: Option<String>,
629 #[serde(default, skip_serializing_if = "Vec::is_empty")]
631 pub default_args: Vec<String>,
632 #[serde(default, skip_serializing_if = "Option::is_none")]
634 pub stop: Option<String>,
635 #[serde(default, skip_serializing_if = "Option::is_none")]
637 pub source_url: Option<String>,
638 #[serde(default, skip_serializing_if = "Option::is_none")]
640 pub last_verified: Option<String>,
641 #[serde(default, skip_serializing_if = "Option::is_none")]
643 pub notes: Option<String>,
644}
645
646#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
647pub struct LocalMemoryDef {
648 #[serde(default, skip_serializing_if = "Option::is_none")]
650 pub measured_resident_gib: Option<f64>,
651 #[serde(default, skip_serializing_if = "Option::is_none")]
653 pub measured_context_window: Option<u64>,
654 #[serde(default, skip_serializing_if = "Option::is_none")]
656 pub measured_cache_type: Option<String>,
657 #[serde(default, skip_serializing_if = "Option::is_none")]
659 pub base_resident_gib: Option<f64>,
660 #[serde(default, skip_serializing_if = "Option::is_none")]
663 pub kv_cache_gib_per_1k_ctx: Option<f64>,
664 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
666 pub cache_type_multipliers: BTreeMap<String, f64>,
667 #[serde(default, skip_serializing_if = "Option::is_none")]
669 pub default_cache_type: Option<String>,
670 #[serde(default, skip_serializing_if = "Option::is_none")]
672 pub safety_margin_gib: Option<f64>,
673 #[serde(default, skip_serializing_if = "Option::is_none")]
675 pub max_recommended_context: Option<u64>,
676 #[serde(default, skip_serializing_if = "Option::is_none")]
678 pub source_url: Option<String>,
679 #[serde(default, skip_serializing_if = "Option::is_none")]
681 pub last_verified: Option<String>,
682 #[serde(default, skip_serializing_if = "Option::is_none")]
684 pub notes: Option<String>,
685}
686
687impl LocalMemoryDef {
688 pub fn is_empty(&self) -> bool {
689 self.measured_resident_gib.is_none()
690 && self.measured_context_window.is_none()
691 && self.measured_cache_type.is_none()
692 && self.base_resident_gib.is_none()
693 && self.kv_cache_gib_per_1k_ctx.is_none()
694 && self.cache_type_multipliers.is_empty()
695 && self.default_cache_type.is_none()
696 && self.safety_margin_gib.is_none()
697 && self.max_recommended_context.is_none()
698 && self.source_url.is_none()
699 && self.last_verified.is_none()
700 && self.notes.is_none()
701 }
702}
703
704#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
705pub struct AliasDef {
706 pub id: String,
707 pub provider: String,
708 #[serde(default)]
713 pub tool_format: Option<String>,
714}
715
716#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
717pub struct AliasToolCallingDef {
718 #[serde(default)]
719 #[serde(skip_serializing_if = "Option::is_none")]
720 pub native: Option<String>,
721 #[serde(default)]
722 #[serde(skip_serializing_if = "Option::is_none")]
723 pub text: Option<String>,
724 #[serde(default)]
725 #[serde(skip_serializing_if = "Option::is_none")]
726 pub streaming_native: Option<String>,
727 #[serde(default)]
728 #[serde(skip_serializing_if = "Option::is_none")]
729 pub fallback_mode: Option<String>,
730 #[serde(default)]
731 #[serde(skip_serializing_if = "Option::is_none")]
732 pub failure_reason: Option<String>,
733 #[serde(default)]
734 #[serde(skip_serializing_if = "Option::is_none")]
735 pub last_probe_at: Option<String>,
736}
737
738#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
739pub struct ModelPricing {
740 pub input_per_mtok: f64,
741 pub output_per_mtok: f64,
742 #[serde(default)]
743 pub cache_read_per_mtok: Option<f64>,
744 #[serde(default)]
745 pub cache_write_per_mtok: Option<f64>,
746}
747
748#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
751pub struct RateLimitsDef {
752 #[serde(default, skip_serializing_if = "Option::is_none")]
754 pub rpm: Option<u32>,
755 #[serde(default, skip_serializing_if = "Option::is_none")]
757 pub rph: Option<u32>,
758 #[serde(default, skip_serializing_if = "Option::is_none")]
760 pub rpd: Option<u32>,
761 #[serde(default, skip_serializing_if = "Option::is_none")]
763 pub tpm: Option<u64>,
764 #[serde(default, skip_serializing_if = "Option::is_none")]
766 pub tph: Option<u64>,
767 #[serde(default, skip_serializing_if = "Option::is_none")]
769 pub tpd: Option<u64>,
770 #[serde(default, skip_serializing_if = "Option::is_none")]
772 pub input_tpm: Option<u64>,
773 #[serde(default, skip_serializing_if = "Option::is_none")]
775 pub output_tpm: Option<u64>,
776 #[serde(default, skip_serializing_if = "Option::is_none")]
778 pub concurrency: Option<u32>,
779 #[serde(default, skip_serializing_if = "Option::is_none")]
781 pub tier: Option<String>,
782 #[serde(default, skip_serializing_if = "Option::is_none")]
784 pub source_url: Option<String>,
785 #[serde(default, skip_serializing_if = "Option::is_none")]
787 pub last_verified: Option<String>,
788 #[serde(default, skip_serializing_if = "Option::is_none")]
790 pub notes: Option<String>,
791}
792
793impl RateLimitsDef {
794 pub fn is_empty(&self) -> bool {
795 self.rpm.is_none()
796 && self.rph.is_none()
797 && self.rpd.is_none()
798 && self.tpm.is_none()
799 && self.tph.is_none()
800 && self.tpd.is_none()
801 && self.input_tpm.is_none()
802 && self.output_tpm.is_none()
803 && self.concurrency.is_none()
804 && self.tier.is_none()
805 && self.source_url.is_none()
806 && self.last_verified.is_none()
807 && self.notes.is_none()
808 }
809
810 pub fn with_rpm_fallback(mut self, rpm: Option<u32>) -> Option<Self> {
811 if self.rpm.is_none() {
812 self.rpm = rpm;
813 }
814 (!self.is_empty()).then_some(self)
815 }
816}
817
818#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
822pub struct ServingPerformanceDef {
823 #[serde(default, skip_serializing_if = "Option::is_none")]
825 pub observed_ttft_ms: Option<u64>,
826 #[serde(default, skip_serializing_if = "Option::is_none")]
828 pub output_tokens_per_sec: Option<f64>,
829 #[serde(default, skip_serializing_if = "Option::is_none")]
832 pub time_to_answer_s: Option<f64>,
833 #[serde(default, skip_serializing_if = "Option::is_none")]
836 pub source: Option<String>,
837 #[serde(default, skip_serializing_if = "Option::is_none")]
839 pub source_url: Option<String>,
840 #[serde(default, skip_serializing_if = "Option::is_none")]
842 pub last_verified: Option<String>,
843 #[serde(default, skip_serializing_if = "Option::is_none")]
845 pub sample_size: Option<u32>,
846 #[serde(default, skip_serializing_if = "Option::is_none")]
848 pub notes: Option<String>,
849}
850
851impl ServingPerformanceDef {
852 pub fn is_empty(&self) -> bool {
853 self.observed_ttft_ms.is_none()
854 && self.output_tokens_per_sec.is_none()
855 && self.time_to_answer_s.is_none()
856 && self.source.is_none()
857 && self.source_url.is_none()
858 && self.last_verified.is_none()
859 && self.sample_size.is_none()
860 && self.notes.is_none()
861 }
862}
863
864#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
868pub struct ModelArchitectureDef {
869 #[serde(default, skip_serializing_if = "Option::is_none")]
871 pub parameter_count_b: Option<f64>,
872 #[serde(default, skip_serializing_if = "Option::is_none")]
874 pub active_parameter_count_b: Option<f64>,
875 #[serde(default, skip_serializing_if = "Option::is_none")]
877 pub moe: Option<bool>,
878 #[serde(default, skip_serializing_if = "Option::is_none")]
880 pub quantization: Option<String>,
881 #[serde(default, skip_serializing_if = "Option::is_none")]
883 pub precision: Option<String>,
884 #[serde(default, skip_serializing_if = "Option::is_none")]
886 pub license: Option<String>,
887 #[serde(default, skip_serializing_if = "Option::is_none")]
889 pub tokenizer: Option<String>,
890 #[serde(default, skip_serializing_if = "Option::is_none")]
892 pub knowledge_cutoff: Option<String>,
893 #[serde(default, skip_serializing_if = "Option::is_none")]
895 pub source_url: Option<String>,
896 #[serde(default, skip_serializing_if = "Option::is_none")]
898 pub last_verified: Option<String>,
899}
900
901impl ModelArchitectureDef {
902 pub fn is_empty(&self) -> bool {
903 self.parameter_count_b.is_none()
904 && self.active_parameter_count_b.is_none()
905 && self.moe.is_none()
906 && self.quantization.is_none()
907 && self.precision.is_none()
908 && self.license.is_none()
909 && self.tokenizer.is_none()
910 && self.knowledge_cutoff.is_none()
911 && self.source_url.is_none()
912 && self.last_verified.is_none()
913 }
914}
915
916#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
927pub struct FastModeDef {
928 pub param: String,
931 pub value: String,
933 #[serde(default)]
936 pub beta_header: Option<String>,
937 #[serde(default)]
939 pub otps_speedup: Option<f64>,
940 #[serde(default)]
943 pub status: Option<String>,
944 #[serde(default)]
947 pub pricing: Option<ModelPricing>,
948 #[serde(default)]
950 pub note: Option<String>,
951}
952
953#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
954pub struct ModelDef {
955 pub name: String,
956 pub provider: String,
957 pub context_window: u64,
958 #[serde(default)]
961 pub logical_model: Option<String>,
962 #[serde(default)]
966 pub equivalence_group: Option<String>,
967 #[serde(default)]
970 pub served_variant: Option<String>,
971 #[serde(default)]
975 pub wire_model: Option<String>,
976 #[serde(default)]
979 pub api_dialect: Option<String>,
980 #[serde(default)]
982 pub rate_limits: Option<RateLimitsDef>,
983 #[serde(default)]
985 pub performance: Option<ServingPerformanceDef>,
986 #[serde(default)]
988 pub architecture: Option<ModelArchitectureDef>,
989 #[serde(default)]
991 pub local_memory: Option<LocalMemoryDef>,
992 #[serde(default)]
993 pub runtime_context_window: Option<u64>,
994 #[serde(default)]
995 pub stream_timeout: Option<f64>,
996 #[serde(default)]
997 pub capabilities: Vec<String>,
998 #[serde(default)]
999 pub pricing: Option<ModelPricing>,
1000 #[serde(default)]
1001 pub deprecated: bool,
1002 #[serde(default)]
1003 pub deprecation_note: Option<String>,
1004 #[serde(default)]
1012 pub superseded_by: Option<String>,
1013 #[serde(default)]
1017 pub fast_mode: Option<FastModeDef>,
1018 #[serde(default)]
1019 pub quality_tags: Vec<String>,
1020 #[serde(default)]
1026 pub availability: ModelAvailability,
1027 #[serde(default)]
1034 pub tier: Option<String>,
1035 #[serde(default)]
1040 pub open_weight: Option<bool>,
1041 #[serde(default)]
1046 pub strengths: Vec<String>,
1047 #[serde(default)]
1053 pub benchmarks: BTreeMap<String, f64>,
1054 #[serde(default)]
1059 pub family: Option<String>,
1060 #[serde(default)]
1062 pub lineage: Option<String>,
1063 #[serde(default)]
1065 pub complementary_with: Vec<String>,
1066 #[serde(default)]
1069 pub avoid_as_reviewer_for: Vec<String>,
1070}
1071
1072#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Default)]
1073#[serde(rename_all = "snake_case")]
1074pub enum ModelAvailability {
1075 #[default]
1079 Serverless,
1080 Dedicated,
1084 Unknown,
1088}
1089
1090impl ModelAvailability {
1091 pub fn as_str(self) -> &'static str {
1092 match self {
1093 Self::Serverless => "serverless",
1094 Self::Dedicated => "dedicated",
1095 Self::Unknown => "unknown",
1096 }
1097 }
1098
1099 pub fn parse(value: &str) -> Option<Self> {
1100 match value {
1101 "serverless" => Some(Self::Serverless),
1102 "dedicated" => Some(Self::Dedicated),
1103 "unknown" => Some(Self::Unknown),
1104 _ => None,
1105 }
1106 }
1107}
1108
1109#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
1110pub struct ResolvedModel {
1111 pub id: String,
1112 pub provider: String,
1113 pub alias: Option<String>,
1114 pub tool_format: String,
1115 pub tier: String,
1116 pub family: String,
1117 pub lineage: String,
1118}
1119
1120#[derive(Debug, Clone, PartialEq)]
1121pub struct ComplementaryReviewerOptions {
1122 pub author_model: String,
1123 pub author_provider: Option<String>,
1124 pub intent: ComplementaryReviewerIntent,
1125 pub max_price_multiplier: Option<f64>,
1126}
1127
1128#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1129pub enum ComplementaryReviewerIntent {
1130 Review,
1131 Critique,
1132 PlanReview,
1133}
1134
1135impl ComplementaryReviewerIntent {
1136 pub fn parse(value: &str) -> Option<Self> {
1137 match value {
1138 "review" => Some(Self::Review),
1139 "critique" => Some(Self::Critique),
1140 "plan_review" => Some(Self::PlanReview),
1141 _ => None,
1142 }
1143 }
1144
1145 pub fn as_str(self) -> &'static str {
1146 match self {
1147 Self::Review => "review",
1148 Self::Critique => "critique",
1149 Self::PlanReview => "plan_review",
1150 }
1151 }
1152}
1153
1154#[derive(Debug, Clone, Serialize, PartialEq)]
1155pub struct ComplementaryReviewerSelection {
1156 pub intent: String,
1157 pub author: ComplementaryModelIdentity,
1158 pub reviewer: ComplementaryModelIdentity,
1159 pub fallback: bool,
1160 pub fallback_reason: Option<String>,
1161 #[serde(skip_serializing_if = "Option::is_none")]
1167 pub fallback_code: Option<String>,
1168 pub reason: String,
1169 pub estimated_incremental_cost: Option<ComplementaryCostEstimate>,
1170}
1171
1172#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1177pub enum ReviewerFallbackCode {
1178 UnknownAuthorFamily,
1181 NoDiffFamilyWithinPrice,
1183 NoDiffFamilyServerless,
1185 AllDiffFamilyExcluded,
1188}
1189
1190impl ReviewerFallbackCode {
1191 pub fn as_code(self) -> &'static str {
1192 match self {
1193 Self::UnknownAuthorFamily => "unknown_author_family",
1194 Self::NoDiffFamilyWithinPrice => "no_diff_family_within_price",
1195 Self::NoDiffFamilyServerless => "no_diff_family_serverless",
1196 Self::AllDiffFamilyExcluded => "all_diff_family_excluded",
1197 }
1198 }
1199}
1200
1201#[derive(Debug, Clone, Serialize, PartialEq)]
1202pub struct ComplementaryModelIdentity {
1203 pub id: String,
1204 pub provider: String,
1205 pub family: String,
1206 pub lineage: String,
1207 pub tier: String,
1208 #[serde(skip_serializing_if = "Option::is_none")]
1209 pub pricing: Option<ModelPricing>,
1210}
1211
1212#[derive(Debug, Clone, Serialize, PartialEq)]
1213pub struct ComplementaryCostEstimate {
1214 pub input_per_mtok: f64,
1215 pub output_per_mtok: f64,
1216 pub total_per_mtok: f64,
1217 #[serde(skip_serializing_if = "Option::is_none")]
1218 pub multiplier_vs_author: Option<f64>,
1219}
1220
1221#[derive(Debug, Clone, Deserialize)]
1222pub struct InferenceRule {
1223 #[serde(default)]
1224 pub pattern: Option<String>,
1225 #[serde(default)]
1226 pub contains: Option<String>,
1227 #[serde(default)]
1228 pub exact: Option<String>,
1229 pub provider: String,
1230}
1231
1232#[derive(Debug, Clone, Deserialize)]
1233pub struct TierRule {
1234 #[serde(default)]
1235 pub pattern: Option<String>,
1236 #[serde(default)]
1237 pub contains: Option<String>,
1238 #[serde(default)]
1239 pub exact: Option<String>,
1240 pub tier: String,
1241}
1242
1243#[derive(Debug, Clone, Deserialize)]
1244pub struct TierDefaults {
1245 #[serde(default = "default_mid")]
1246 pub default: String,
1247}
1248
1249impl Default for TierDefaults {
1250 fn default() -> Self {
1251 Self {
1252 default: default_mid(),
1253 }
1254 }
1255}
1256
1257fn default_mid() -> String {
1258 "mid".to_string()
1259}
1260
1261pub fn load_config() -> &'static ProvidersConfig {
1263 CONFIG.get_or_init(|| {
1264 let mut config = default_config();
1265 let verbose_config_logging = matches!(
1266 std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
1267 Some("1" | "true" | "TRUE" | "yes" | "YES")
1268 ) || matches!(
1269 std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
1270 Some("1" | "true" | "TRUE" | "yes" | "YES")
1271 );
1272 if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
1273 if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
1274 config.merge_from(&overlay);
1275 let _ = CONFIG_PATH.set(path);
1276 return config;
1277 }
1278 }
1279 if should_load_home_config() {
1280 if let Some(home) = dirs_or_home() {
1281 let path = format!("{home}/.config/harn/providers.toml");
1282 if let Some(overlay) = read_external_config(&path, false) {
1283 config.merge_from(&overlay);
1284 let _ = CONFIG_PATH.set(path);
1285 return config;
1286 }
1287 }
1288 }
1289 config
1290 })
1291}
1292
1293fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
1294 match std::fs::read_to_string(path) {
1295 Ok(content) => match parse_config_toml(&content) {
1299 Ok(config) => {
1300 if verbose {
1301 eprintln!(
1302 "[llm_config] Loaded {} providers, {} aliases from {}",
1303 config.providers.len(),
1304 config.aliases.len(),
1305 path
1306 );
1307 }
1308 Some(config)
1309 }
1310 Err(error) => {
1311 eprintln!("[llm_config] TOML parse error in {path}: {error}");
1312 None
1313 }
1314 },
1315 Err(error) => {
1316 if verbose {
1317 eprintln!("[llm_config] Cannot read {path}: {error}");
1318 }
1319 None
1320 }
1321 }
1322}
1323
1324fn should_load_home_config() -> bool {
1325 !cfg!(test)
1328}
1329
1330pub fn parse_config_toml(src: &str) -> Result<ProvidersConfig, toml::de::Error> {
1333 toml::from_str::<ProvidersConfig>(src)
1334}
1335
1336pub fn loaded_config_path() -> Option<std::path::PathBuf> {
1339 let _ = load_config();
1341 CONFIG_PATH.get().map(std::path::PathBuf::from)
1342}
1343
1344pub fn set_user_overrides(config: Option<ProvidersConfig>) {
1348 USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
1349}
1350
1351pub fn clear_user_overrides() {
1353 set_user_overrides(None);
1354}
1355
1356pub fn set_runtime_catalog_overlay(config: Option<ProvidersConfig>) {
1361 *runtime_catalog_overlay()
1362 .write()
1363 .expect("runtime catalog overlay poisoned") = config;
1364}
1365
1366pub fn clear_runtime_catalog_overlay() {
1367 set_runtime_catalog_overlay(None);
1368}
1369
1370pub(crate) fn effective_config() -> ProvidersConfig {
1371 let user_overrides = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1372 effective_config_with_user_overrides(user_overrides.as_ref())
1373}
1374
1375pub fn embedded_config(explicit_overlay: Option<&ProvidersConfig>) -> ProvidersConfig {
1393 let mut config = default_config();
1394 if let Some(overlay) = explicit_overlay {
1395 config.merge_from(overlay);
1396 }
1397 config
1398}
1399
1400pub(crate) fn effective_config_with_user_overrides(
1401 user_overrides: Option<&ProvidersConfig>,
1402) -> ProvidersConfig {
1403 let mut merged = load_config().clone();
1404 if let Some(overlay) = runtime_catalog_overlay()
1405 .read()
1406 .expect("runtime catalog overlay poisoned")
1407 .as_ref()
1408 {
1409 merged.merge_from(overlay);
1410 }
1411 if let Some(overlay) = user_overrides {
1412 merged.merge_from(overlay);
1413 }
1414 merged
1415}
1416
1417fn runtime_catalog_overlay() -> &'static RwLock<Option<ProvidersConfig>> {
1418 RUNTIME_CATALOG_OVERLAY.get_or_init(|| RwLock::new(None))
1419}
1420
1421pub fn resolve_model(alias: &str) -> (String, Option<String>) {
1423 let config = effective_config();
1424 if let Some(a) = config.aliases.get(alias) {
1425 return (a.id.clone(), Some(a.provider.clone()));
1426 }
1427 (normalize_model_id(alias), None)
1428}
1429
1430pub fn normalize_model_id(raw: &str) -> String {
1437 for prefix in PROVIDER_SELECTOR_PREFIXES {
1438 if let Some(stripped) = raw.strip_prefix(prefix) {
1439 return stripped.to_string();
1440 }
1441 }
1442 raw.to_string()
1443}
1444
1445const PROVIDER_SELECTOR_PREFIXES: &[&str] =
1446 &["ollama:", "local:", "huggingface:", "hf:", "cerebras/"];
1447
1448pub fn resolve_model_info(selector: &str) -> ResolvedModel {
1451 let config = effective_config();
1452 if let Some(alias) = config.aliases.get(selector) {
1453 let id = alias.id.clone();
1454 let provider = alias.provider.clone();
1455 let requested = alias
1456 .tool_format
1457 .clone()
1458 .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
1459 let tool_format = guard_tool_format(&provider, &id, &requested, Some(selector));
1460 return ResolvedModel {
1461 tier: model_tier_with_config(&config, &id),
1462 family: model_family_with_config(&config, &provider, &id),
1463 lineage: model_lineage_with_config(&config, &provider, &id),
1464 id,
1465 provider,
1466 alias: Some(selector.to_string()),
1467 tool_format,
1468 };
1469 }
1470
1471 let id = normalize_model_id(selector);
1472 let inference = infer_provider_with_config(&config, selector);
1473 let source = inference.source;
1474 let provider = inference.provider;
1475 let requested = default_tool_format_with_config(&config, &id, &provider);
1476 let tool_format = guard_tool_format(&provider, &id, &requested, None);
1477 let tier = model_tier_with_config(&config, &id);
1478 let family = model_family_with_inference_source(&config, &provider, &id, source);
1479 let lineage = model_lineage_with_inference_source(&config, &provider, &id, source);
1480 ResolvedModel {
1481 id,
1482 provider,
1483 alias: None,
1484 tool_format,
1485 tier,
1486 family,
1487 lineage,
1488 }
1489}
1490
1491fn guard_tool_format(provider: &str, model: &str, requested: &str, alias: Option<&str>) -> String {
1498 let decision = crate::llm::capabilities::validate_tool_format(provider, model, requested);
1499 if let Some(reason) = &decision.correction {
1500 tracing::warn!(
1501 target: "harn::llm::tool_format",
1502 alias = alias.unwrap_or(""),
1503 "{reason}"
1504 );
1505 }
1506 decision.effective
1507}
1508
1509pub fn infer_provider(model_id: &str) -> String {
1511 infer_provider_detail(model_id).provider
1512}
1513
1514pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
1516 let config = effective_config();
1517 infer_provider_with_config(&config, model_id)
1518}
1519
1520fn infer_provider_with_config(
1521 config: &ProvidersConfig,
1522 model_id: &str,
1523) -> crate::llm::provider::ProviderInference {
1524 if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
1525 return crate::llm::provider::ProviderInference::builtin("ollama");
1526 }
1527 if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
1528 return crate::llm::provider::ProviderInference::builtin("huggingface");
1529 }
1530 let normalized_id = normalize_model_id(model_id);
1536 if let Some(model) = config
1537 .models
1538 .get(model_id)
1539 .or_else(|| config.models.get(&normalized_id))
1540 {
1541 return crate::llm::provider::ProviderInference::builtin(model.provider.clone());
1542 }
1543 for rule in &config.inference_rules {
1544 if let Some(exact) = &rule.exact {
1545 if model_id == exact {
1546 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1547 }
1548 }
1549 if let Some(pattern) = &rule.pattern {
1550 if glob_match(pattern, model_id) {
1551 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1552 }
1553 }
1554 if let Some(substr) = &rule.contains {
1555 if model_id.contains(substr.as_str()) {
1556 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1557 }
1558 }
1559 }
1560 crate::llm::provider::infer_provider_from_model_id(
1561 model_id,
1562 &default_provider_with_config(config),
1563 )
1564}
1565
1566pub fn default_provider() -> String {
1567 let config = effective_config();
1568 default_provider_with_config(&config)
1569}
1570
1571fn default_provider_with_config(config: &ProvidersConfig) -> String {
1572 std::env::var("HARN_DEFAULT_PROVIDER")
1573 .ok()
1574 .map(|value| value.trim().to_string())
1575 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1576 .or_else(|| {
1577 config
1578 .default_provider
1579 .as_deref()
1580 .map(str::trim)
1581 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1582 .map(str::to_string)
1583 })
1584 .unwrap_or_else(|| auto_select_provider(config))
1585}
1586
1587const FALLBACK_PROVIDER: &str = "anthropic";
1593
1594static AUTO_PROVIDER_WARNED: AtomicBool = AtomicBool::new(false);
1595
1596fn provider_has_credentials(def: &ProviderDef) -> bool {
1598 auth_env_names(&def.auth_env)
1599 .iter()
1600 .any(|name| std::env::var(name).is_ok_and(|value| !value.trim().is_empty()))
1601}
1602
1603fn provider_is_local(def: &ProviderDef) -> bool {
1606 def.local_runtime.is_some() || matches!(def.auth_env, AuthEnv::None)
1607}
1608
1609fn warn_auto_provider_once(message: &str) {
1611 if !AUTO_PROVIDER_WARNED.swap(true, Ordering::Relaxed) {
1612 crate::events::log_warn("llm_config", message);
1613 }
1614}
1615
1616fn auto_select_provider(config: &ProvidersConfig) -> String {
1622 const PREFERRED: &[&str] = &[
1625 "anthropic",
1626 "openai",
1627 "google",
1628 "azure-openai",
1629 "groq",
1630 "mistral",
1631 "deepseek",
1632 "xai",
1633 "openrouter",
1634 ];
1635 for name in PREFERRED {
1636 if config
1637 .providers
1638 .get(*name)
1639 .is_some_and(provider_has_credentials)
1640 {
1641 if *name != FALLBACK_PROVIDER {
1642 warn_auto_provider_once(&format!(
1643 "no default provider configured; using '{name}' (its API key is set). \
1644 Set HARN_DEFAULT_PROVIDER or `default_provider` to silence this."
1645 ));
1646 }
1647 return (*name).to_string();
1648 }
1649 }
1650 for (name, def) in &config.providers {
1651 if provider_has_credentials(def) {
1652 warn_auto_provider_once(&format!(
1653 "no default provider configured; using '{name}' (its API key is set). \
1654 Set HARN_DEFAULT_PROVIDER or `default_provider` to silence this."
1655 ));
1656 return name.clone();
1657 }
1658 }
1659 for (name, def) in &config.providers {
1661 if provider_is_local(def) {
1662 warn_auto_provider_once(&format!(
1663 "no provider API keys found; using local provider '{name}'. \
1664 Set an API key + HARN_DEFAULT_PROVIDER to use a cloud provider."
1665 ));
1666 return name.clone();
1667 }
1668 }
1669 warn_auto_provider_once(&format!(
1671 "no LLM provider configured and no API keys detected; defaulting to \
1672 '{FALLBACK_PROVIDER}'. Set ANTHROPIC_API_KEY (or another provider's key plus \
1673 HARN_DEFAULT_PROVIDER), or run a local model with `harn local launch`."
1674 ));
1675 FALLBACK_PROVIDER.to_string()
1676}
1677
1678pub fn model_tier(model_id: &str) -> String {
1680 let config = effective_config();
1681 model_tier_with_config(&config, model_id)
1682}
1683
1684pub(crate) fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
1685 if let Some(model) = config.models.get(model_id) {
1687 if let Some(tier) = model.tier.as_deref() {
1688 let trimmed = tier.trim();
1689 if !trimmed.is_empty() {
1690 return trimmed.to_string();
1691 }
1692 }
1693 }
1694 for rule in &config.tier_rules {
1698 if let Some(exact) = &rule.exact {
1699 if model_id == exact {
1700 return rule.tier.clone();
1701 }
1702 }
1703 if let Some(pattern) = &rule.pattern {
1704 if glob_match(pattern, model_id) {
1705 return rule.tier.clone();
1706 }
1707 }
1708 if let Some(substr) = &rule.contains {
1709 if model_id.contains(substr.as_str()) {
1710 return rule.tier.clone();
1711 }
1712 }
1713 }
1714 config.tier_defaults.default.clone()
1715}
1716
1717pub fn model_family(provider: &str, model_id: &str) -> String {
1719 let config = effective_config();
1720 model_family_with_config(&config, provider, model_id)
1721}
1722
1723pub(crate) fn model_family_with_config(
1724 config: &ProvidersConfig,
1725 provider: &str,
1726 model_id: &str,
1727) -> String {
1728 catalog_family_token(config, model_id)
1729 .unwrap_or_else(|| derive_model_family(provider, model_id))
1730}
1731
1732fn model_family_with_inference_source(
1733 config: &ProvidersConfig,
1734 provider: &str,
1735 model_id: &str,
1736 source: crate::llm::provider::ProviderInferenceSource,
1737) -> String {
1738 if let Some(family) = catalog_family_token(config, model_id) {
1739 return family;
1740 }
1741 let id_family = derive_model_family("", model_id);
1742 if id_family != "unknown" {
1743 return id_family;
1744 }
1745 if matches!(
1746 source,
1747 crate::llm::provider::ProviderInferenceSource::DefaultFallback
1748 ) {
1749 return "unknown".to_string();
1750 }
1751 derive_model_family(provider, model_id)
1752}
1753
1754pub fn model_lineage(provider: &str, model_id: &str) -> String {
1756 let config = effective_config();
1757 model_lineage_with_config(&config, provider, model_id)
1758}
1759
1760pub(crate) fn model_lineage_with_config(
1761 config: &ProvidersConfig,
1762 provider: &str,
1763 model_id: &str,
1764) -> String {
1765 catalog_lineage_token(config, model_id)
1766 .unwrap_or_else(|| derive_model_lineage(provider, model_id))
1767}
1768
1769fn model_lineage_with_inference_source(
1770 config: &ProvidersConfig,
1771 provider: &str,
1772 model_id: &str,
1773 source: crate::llm::provider::ProviderInferenceSource,
1774) -> String {
1775 if let Some(lineage) = catalog_lineage_token(config, model_id) {
1776 return lineage;
1777 }
1778 let id_lineage = derive_model_lineage("", model_id);
1779 if id_lineage != "unknown" {
1780 return id_lineage;
1781 }
1782 if matches!(
1783 source,
1784 crate::llm::provider::ProviderInferenceSource::DefaultFallback
1785 ) {
1786 return "unknown".to_string();
1787 }
1788 derive_model_lineage(provider, model_id)
1789}
1790
1791fn catalog_family_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1792 config
1793 .models
1794 .get(model_id)
1795 .and_then(|model| normalized_catalog_token(model.family.as_deref()))
1796}
1797
1798fn catalog_lineage_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1799 config
1800 .models
1801 .get(model_id)
1802 .and_then(|model| normalized_catalog_token(model.lineage.as_deref()))
1803}
1804
1805fn normalized_catalog_token(value: Option<&str>) -> Option<String> {
1806 value
1807 .map(str::trim)
1808 .filter(|value| !value.is_empty())
1809 .map(|value| value.to_ascii_lowercase().replace('_', "-"))
1810}
1811
1812fn derive_model_family(provider: &str, model_id: &str) -> String {
1813 let id = model_id.to_ascii_lowercase();
1814 if contains_any(&id, &["claude", "anthropic.claude"]) {
1815 return "anthropic-claude".to_string();
1816 }
1817 if contains_any(&id, &["gemini", "google/gemini"]) {
1818 return "google-gemini".to_string();
1819 }
1820 if contains_any(&id, &["deepseek"]) {
1821 return "deepseek".to_string();
1822 }
1823 if contains_any(&id, &["qwen"]) {
1824 return "qwen".to_string();
1825 }
1826 if contains_any(&id, &["kimi", "moonshot"]) {
1827 return "kimi".to_string();
1828 }
1829 if contains_any(&id, &["glm", "z-ai/glm", "zhipu"]) {
1830 return "glm".to_string();
1831 }
1832 if contains_any(&id, &["mistral", "mixtral", "devstral"]) {
1833 return "mistral".to_string();
1834 }
1835 if contains_any(&id, &["minimax"]) {
1836 return "minimax".to_string();
1837 }
1838 if contains_any(&id, &["llama"]) {
1839 return "llama".to_string();
1840 }
1841 if contains_any(&id, &["gemma"]) {
1842 return "gemma".to_string();
1843 }
1844 if is_openai_reasoning_model(&id) {
1845 return "openai-reasoning".to_string();
1846 }
1847 if contains_any(&id, &["gpt-oss", "openai/gpt", "gpt-"]) {
1848 return "openai-gpt".to_string();
1849 }
1850 match provider {
1851 "anthropic" | "bedrock" | "vertex-anthropic" => "anthropic-claude".to_string(),
1852 "openai" | "azure" | "azure_openai" => "openai-gpt".to_string(),
1853 "gemini" | "vertex" | "google" => "google-gemini".to_string(),
1854 "deepseek" => "deepseek".to_string(),
1855 "zai" => "glm".to_string(),
1856 "minimax" => "minimax".to_string(),
1857 other if !other.is_empty() => normalize_identifier_token(other),
1858 _ => "unknown".to_string(),
1859 }
1860}
1861
1862fn derive_model_lineage(provider: &str, model_id: &str) -> String {
1863 let id = model_id.to_ascii_lowercase();
1864 if contains_any(&id, &["haiku"]) {
1865 return "claude-haiku".to_string();
1866 }
1867 if contains_any(&id, &["opus-4-7", "opus-4-8", "opus-mythos"]) {
1868 return "claude-opus-adaptive".to_string();
1869 }
1870 if contains_any(&id, &["claude"]) {
1871 return "claude-sonnet-opus".to_string();
1872 }
1873 if contains_any(&id, &["gpt-5"]) {
1874 return "openai-gpt5".to_string();
1875 }
1876 if is_openai_reasoning_model(&id) {
1877 return "openai-reasoning".to_string();
1878 }
1879 if contains_any(&id, &["gpt-", "gpt_"]) {
1880 return "openai-legacy".to_string();
1881 }
1882 if contains_any(&id, &["gemini"]) {
1883 if contains_any(&id, &["flash"]) {
1884 return "gemini-flash".to_string();
1885 }
1886 return "gemini-pro".to_string();
1887 }
1888 if contains_any(&id, &["qwen3", "qwen/qwen3"]) {
1889 return "qwen3".to_string();
1890 }
1891 if contains_any(&id, &["gemma4", "gemma-4"]) {
1892 return "gemma4".to_string();
1893 }
1894 let family = derive_model_family(provider, model_id);
1895 if family == "unknown" {
1896 "unknown".to_string()
1897 } else {
1898 family
1899 }
1900}
1901
1902fn contains_any(haystack: &str, needles: &[&str]) -> bool {
1903 needles.iter().any(|needle| haystack.contains(needle))
1904}
1905
1906fn starts_with_any(haystack: &str, prefixes: &[&str]) -> bool {
1907 prefixes.iter().any(|prefix| haystack.starts_with(prefix))
1908}
1909
1910fn is_openai_reasoning_model(id: &str) -> bool {
1911 starts_with_any(id, &["o1", "o3", "o4"])
1912 || contains_any(
1913 id,
1914 &[
1915 "/o1", "/o3", "/o4", ":o1", ":o3", ":o4", ".o1", ".o3", ".o4",
1916 ],
1917 )
1918}
1919
1920fn normalize_identifier_token(value: &str) -> String {
1921 value
1922 .trim()
1923 .to_ascii_lowercase()
1924 .chars()
1925 .map(|ch| {
1926 if ch.is_ascii_alphanumeric() || ch == '-' {
1927 ch
1928 } else {
1929 '-'
1930 }
1931 })
1932 .collect::<String>()
1933 .split('-')
1934 .filter(|part| !part.is_empty())
1935 .collect::<Vec<_>>()
1936 .join("-")
1937}
1938
1939pub fn provider_config(name: &str) -> Option<ProviderDef> {
1941 effective_config().providers.get(name).cloned()
1942}
1943
1944pub fn provider_protocol(name: &str) -> Option<String> {
1945 provider_config(name).and_then(|def| def.protocol)
1946}
1947
1948pub fn provider_uses_acp(name: &str) -> bool {
1949 provider_protocol(name)
1950 .as_deref()
1951 .is_some_and(|protocol| protocol.eq_ignore_ascii_case("acp"))
1952}
1953
1954pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
1957 let config = effective_config();
1958 let mut params = BTreeMap::new();
1959 for (pattern, defaults) in &config.model_defaults {
1960 if glob_match(pattern, model_id) {
1961 for (k, v) in defaults {
1962 params.insert(k.clone(), v.clone());
1963 }
1964 }
1965 }
1966 params
1967}
1968
1969pub fn model_role_defaults(role: &str) -> BTreeMap<String, toml::Value> {
1983 let normalized = normalize_model_role_name(role);
1984 if normalized.is_empty() {
1985 return BTreeMap::new();
1986 }
1987 let config = effective_config();
1988 let mut params = BTreeMap::new();
1989 for key in role_lookup_keys(&normalized) {
1990 extend_model_role_defaults(&config, &key, &mut params);
1991 }
1992 apply_model_role_env_overrides(&normalized, &mut params);
1993 params
1994}
1995
1996fn extend_model_role_defaults(
1997 config: &ProvidersConfig,
1998 role: &str,
1999 params: &mut BTreeMap<String, toml::Value>,
2000) {
2001 for (configured_role, defaults) in &config.model_roles {
2002 if normalize_model_role_name(configured_role) == role {
2003 params.extend(defaults.clone());
2004 }
2005 }
2006 if let Some(defaults) = config.model_roles.get(role) {
2007 params.extend(defaults.clone());
2008 }
2009}
2010
2011fn normalize_model_role_name(role: &str) -> String {
2012 role.trim().to_ascii_lowercase().replace('-', "_")
2013}
2014
2015fn role_lookup_keys(role: &str) -> Vec<String> {
2016 if role == "merge" {
2017 vec!["fast_apply".to_string(), "merge".to_string()]
2018 } else if role == "fast_apply" {
2019 vec!["merge".to_string(), "fast_apply".to_string()]
2020 } else {
2021 vec![role.to_string()]
2022 }
2023}
2024
2025fn role_env_token(role: &str) -> String {
2026 role.chars()
2027 .map(|ch| {
2028 if ch.is_ascii_alphanumeric() {
2029 ch.to_ascii_uppercase()
2030 } else {
2031 '_'
2032 }
2033 })
2034 .collect::<String>()
2035 .split('_')
2036 .filter(|part| !part.is_empty())
2037 .collect::<Vec<_>>()
2038 .join("_")
2039}
2040
2041fn apply_model_role_env_overrides(role: &str, params: &mut BTreeMap<String, toml::Value>) {
2042 for alias in role_env_aliases(role) {
2043 apply_model_role_env_var(&format!("HARN_LLM_{alias}_PROVIDER"), "provider", params);
2044 apply_model_role_env_var(&format!("HARN_LLM_{alias}_MODEL"), "model", params);
2045 apply_model_role_env_var(
2046 &format!("HARN_LLM_{alias}_ROUTE_POLICY"),
2047 "route_policy",
2048 params,
2049 );
2050 apply_model_role_env_var(
2051 &format!("HARN_LLM_ROLE_{alias}_PROVIDER"),
2052 "provider",
2053 params,
2054 );
2055 apply_model_role_env_var(&format!("HARN_LLM_ROLE_{alias}_MODEL"), "model", params);
2056 apply_model_role_env_var(
2057 &format!("HARN_LLM_ROLE_{alias}_ROUTE_POLICY"),
2058 "route_policy",
2059 params,
2060 );
2061 }
2062}
2063
2064fn role_env_aliases(role: &str) -> Vec<String> {
2065 let token = role_env_token(role);
2066 if token.is_empty() {
2067 return Vec::new();
2068 }
2069 if token == "MERGE" {
2070 vec!["FAST_APPLY".to_string(), "MERGE".to_string()]
2071 } else if token == "FAST_APPLY" {
2072 vec!["MERGE".to_string(), "FAST_APPLY".to_string()]
2073 } else {
2074 vec![token]
2075 }
2076}
2077
2078fn apply_model_role_env_var(
2079 env_name: &str,
2080 option_name: &str,
2081 params: &mut BTreeMap<String, toml::Value>,
2082) {
2083 let Ok(value) = std::env::var(env_name) else {
2084 return;
2085 };
2086 let trimmed = value.trim();
2087 if trimmed.is_empty() {
2088 return;
2089 }
2090 params.insert(
2091 option_name.to_string(),
2092 toml::Value::String(trimmed.to_string()),
2093 );
2094}
2095
2096pub fn provider_names() -> Vec<String> {
2098 effective_config().providers.keys().cloned().collect()
2099}
2100
2101pub fn known_model_names() -> Vec<String> {
2103 effective_config().aliases.keys().cloned().collect()
2104}
2105
2106pub fn alias_entries() -> Vec<(String, AliasDef)> {
2107 effective_config().aliases.into_iter().collect()
2108}
2109
2110pub fn alias_tool_calling_entry(alias: &str) -> Option<AliasToolCallingDef> {
2111 effective_config().alias_tool_calling.get(alias).cloned()
2112}
2113
2114pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
2116 let config = effective_config();
2117 model_catalog_entries_with_config(&config)
2118}
2119
2120pub(crate) fn model_catalog_entries_with_config(
2121 config: &ProvidersConfig,
2122) -> Vec<(String, ModelDef)> {
2123 sorted_model_entries_with_config(config)
2124 .into_iter()
2125 .map(|(id, model)| {
2126 let provider = model.provider.clone();
2127 (
2128 id.clone(),
2129 with_effective_capability_tags(id, provider, model),
2130 )
2131 })
2132 .collect()
2133}
2134
2135pub(crate) fn sorted_model_entries_with_config(
2136 config: &ProvidersConfig,
2137) -> Vec<(String, ModelDef)> {
2138 let mut entries: Vec<_> = config
2139 .models
2140 .iter()
2141 .map(|(id, model)| (id.clone(), model.clone()))
2142 .collect();
2143 entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
2144 model_a
2145 .provider
2146 .cmp(&model_b.provider)
2147 .then_with(|| id_a.cmp(id_b))
2148 });
2149 entries
2150}
2151
2152pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
2153 effective_config()
2154 .models
2155 .get(model_id)
2156 .cloned()
2157 .map(|model| {
2158 let provider = model.provider.clone();
2159 with_effective_capability_tags(model_id.to_string(), provider, model)
2160 })
2161}
2162
2163pub fn model_rate_limits(model_id: &str) -> Option<RateLimitsDef> {
2164 model_catalog_entry(model_id).and_then(|model| model.rate_limits)
2165}
2166
2167pub fn wire_model_id(model_id: &str) -> String {
2168 model_catalog_entry(model_id)
2169 .and_then(|model| model.wire_model)
2170 .unwrap_or_else(|| model_id.to_string())
2171}
2172
2173pub fn provider_rate_limits(provider: &str) -> Option<RateLimitsDef> {
2174 provider_config(provider).and_then(|provider| {
2175 provider
2176 .rate_limits
2177 .unwrap_or_default()
2178 .with_rpm_fallback(provider.rpm)
2179 })
2180}
2181
2182pub fn model_equivalence_group(model_id: &str) -> Option<String> {
2183 model_catalog_entry(model_id).and_then(|model| {
2184 model
2185 .equivalence_group
2186 .or(model.logical_model)
2187 .filter(|group| !group.trim().is_empty())
2188 })
2189}
2190
2191pub fn equivalent_model_catalog_entries(selector: &str) -> Vec<(String, ModelDef)> {
2195 let resolved = resolve_model_info(selector);
2196 let Some(group) = model_equivalence_group(&resolved.id) else {
2197 return Vec::new();
2198 };
2199 let config = effective_config();
2200 let Some(source) = config.models.get(&resolved.id) else {
2201 return Vec::new();
2202 };
2203 let source_caps = crate::llm::capabilities::lookup(&source.provider, &resolved.id);
2204 let source_context = source
2205 .runtime_context_window
2206 .unwrap_or(source.context_window);
2207
2208 sorted_model_entries_with_config(&config)
2209 .into_iter()
2210 .filter(|(id, model)| !(id == &resolved.id && model.provider == resolved.provider))
2211 .filter(|(_, model)| !model.deprecated)
2212 .filter(|(_, model)| model.availability != ModelAvailability::Dedicated)
2213 .filter(|(_, model)| {
2214 model.equivalence_group.as_deref() == Some(group.as_str())
2215 || model.logical_model.as_deref() == Some(group.as_str())
2216 })
2217 .filter(|(id, model)| {
2218 let caps = crate::llm::capabilities::lookup(&model.provider, id);
2219 let candidate_context = model.runtime_context_window.unwrap_or(model.context_window);
2220 candidate_context >= source_context
2221 && (!source_caps.native_tools || caps.native_tools)
2222 && (!source_caps.text_tool_wire_format_supported
2223 || caps.text_tool_wire_format_supported)
2224 && (!source_caps.reasoning_effort_supported || caps.reasoning_effort_supported)
2225 && source_caps.structured_output_mode == caps.structured_output_mode
2226 })
2227 .map(|(id, model)| {
2228 let provider = model.provider.clone();
2229 (
2230 id.clone(),
2231 with_effective_capability_tags(id, provider, model),
2232 )
2233 })
2234 .collect()
2235}
2236
2237pub fn qc_default_model(provider: &str) -> Option<String> {
2238 std::env::var("BURIN_QC_MODEL")
2239 .ok()
2240 .filter(|value| !value.trim().is_empty())
2241 .or_else(|| {
2242 effective_config()
2243 .qc_defaults
2244 .get(&provider.to_lowercase())
2245 .cloned()
2246 })
2247}
2248
2249pub fn default_model_for_provider(provider: &str) -> String {
2250 if provider_uses_acp(provider) {
2251 return "default".to_string();
2252 }
2253 match provider {
2254 "local" => std::env::var("LOCAL_LLM_MODEL")
2255 .or_else(|_| std::env::var("HARN_LLM_MODEL"))
2256 .unwrap_or_else(|_| "gemma-4-26b-a4b-it".to_string()),
2257 "mlx" => std::env::var("MLX_MODEL_ID")
2258 .unwrap_or_else(|_| "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit".to_string()),
2259 "openai" => "gpt-4o-mini".to_string(),
2260 "ollama" => "llama3.2".to_string(),
2261 "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
2262 _ => "claude-sonnet-4-6".to_string(),
2263 }
2264}
2265
2266pub fn qc_defaults() -> BTreeMap<String, String> {
2267 effective_config().qc_defaults
2268}
2269
2270pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
2271 effective_config()
2272 .models
2273 .get(model_id)
2274 .and_then(|model| model.pricing.clone())
2275}
2276
2277pub fn model_fast_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
2282 effective_config()
2283 .models
2284 .get(model_id)
2285 .and_then(|model| model.fast_mode.as_ref())
2286 .and_then(|fast_mode| fast_mode.pricing.clone())
2287}
2288
2289pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
2290 model_pricing_per_mtok(model_id)
2291 .map(|pricing| {
2292 (
2293 pricing.input_per_mtok / 1000.0,
2294 pricing.output_per_mtok / 1000.0,
2295 )
2296 })
2297 .or_else(|| {
2298 let (input, output, _) = provider_economics(provider);
2299 match (input, output) {
2300 (Some(input), Some(output)) => Some((input, output)),
2301 _ => None,
2302 }
2303 })
2304}
2305
2306pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
2307 match auth_env {
2308 AuthEnv::None => Vec::new(),
2309 AuthEnv::Single(name) => vec![name.clone()],
2310 AuthEnv::Multiple(names) => names.clone(),
2311 }
2312}
2313
2314pub fn provider_key_available(provider: &str) -> bool {
2315 let Some(pdef) = provider_config(provider) else {
2316 return provider == "ollama";
2317 };
2318 if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
2319 return true;
2320 }
2321 auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
2322 std::env::var(env_name)
2323 .ok()
2324 .is_some_and(|value| !value.trim().is_empty())
2325 })
2326}
2327
2328pub fn available_provider_names() -> Vec<String> {
2329 provider_names()
2330 .into_iter()
2331 .filter(|provider| provider_key_available(provider))
2332 .collect()
2333}
2334
2335pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
2337 provider_config(provider)
2338 .map(|p| p.features.iter().any(|f| f == feature))
2339 .unwrap_or(false)
2340}
2341
2342pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
2346 provider_config(provider)
2347 .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
2348 .unwrap_or((None, None, None))
2349}
2350
2351#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2360pub enum ToolFormatChannel {
2361 Native,
2363 Text,
2365}
2366
2367pub fn tool_format_channel(format: &str) -> Option<ToolFormatChannel> {
2375 match format {
2376 "native" => Some(ToolFormatChannel::Native),
2377 "text" | "json" => Some(ToolFormatChannel::Text),
2378 _ => None,
2379 }
2380}
2381
2382pub fn is_known_tool_format(format: &str) -> bool {
2387 tool_format_channel(format).is_some()
2388}
2389
2390pub fn default_tool_format(model: &str, provider: &str) -> String {
2396 let config = effective_config();
2397 default_tool_format_with_config(&config, model, provider)
2398}
2399
2400fn default_tool_format_with_config(
2401 config: &ProvidersConfig,
2402 model: &str,
2403 provider: &str,
2404) -> String {
2405 for (name, alias) in &config.aliases {
2407 let matches = (alias.id == model && alias.provider == provider) || name == model;
2408 if matches {
2409 if let Some(ref fmt) = alias.tool_format {
2410 return fmt.clone();
2411 }
2412 }
2413 }
2414 let capabilities = crate::llm::capabilities::lookup(provider, model);
2415 if let Some(format) = capabilities.preferred_tool_format.as_deref() {
2416 if is_known_tool_format(format) {
2423 return format.to_string();
2424 }
2425 }
2426 let capability_matrix_native = capabilities.native_tools;
2427 let legacy_provider_native = config
2428 .providers
2429 .get(provider)
2430 .map(|p| p.features.iter().any(|f| f == "native_tools"))
2431 .unwrap_or(false);
2432 if capability_matrix_native || legacy_provider_native {
2433 "native".to_string()
2434 } else {
2435 "json".to_string()
2446 }
2447}
2448
2449fn with_effective_capability_tags(
2450 model_id: String,
2451 provider: String,
2452 mut model: ModelDef,
2453) -> ModelDef {
2454 model.capabilities = effective_model_capability_tags(&provider, &model_id);
2455 model
2456}
2457
2458pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
2462 let caps = crate::llm::capabilities::lookup(provider, model_id);
2463 capability_tags_from_capabilities(&caps)
2464}
2465
2466pub(crate) fn capability_tags_from_capabilities(
2467 caps: &crate::llm::capabilities::Capabilities,
2468) -> Vec<String> {
2469 let mut tags = Vec::new();
2470 tags.push("streaming".to_string());
2473 if caps.native_tools || caps.text_tool_wire_format_supported {
2474 tags.push("tools".to_string());
2475 }
2476 if !caps.tool_search.is_empty() {
2477 tags.push("tool_search".to_string());
2478 }
2479 if caps.vision || caps.vision_supported {
2480 tags.push("vision".to_string());
2481 }
2482 if caps.audio {
2483 tags.push("audio".to_string());
2484 }
2485 if caps.pdf {
2486 tags.push("pdf".to_string());
2487 }
2488 if caps.video {
2489 tags.push("video".to_string());
2490 }
2491 if caps.files_api_supported {
2492 tags.push("files".to_string());
2493 }
2494 if caps.prompt_caching {
2495 tags.push("prompt_caching".to_string());
2496 }
2497 if !caps.thinking_modes.is_empty() {
2498 tags.push("thinking".to_string());
2499 }
2500 if caps.interleaved_thinking_supported
2501 || caps
2502 .thinking_modes
2503 .iter()
2504 .any(|mode| mode == "adaptive" || mode == "effort")
2505 {
2506 tags.push("extended_thinking".to_string());
2507 }
2508 if caps.structured_output.is_some() || caps.json_schema.is_some() {
2509 tags.push("structured_output".to_string());
2510 }
2511 tags
2512}
2513
2514pub fn resolve_tier_model(
2516 target: &str,
2517 preferred_provider: Option<&str>,
2518) -> Option<(String, String)> {
2519 let config = effective_config();
2520
2521 let candidate_aliases = if let Some(provider) = preferred_provider {
2522 vec![
2523 format!("{provider}/{target}"),
2524 format!("{provider}:{target}"),
2525 format!("tier/{target}"),
2526 target.to_string(),
2527 ]
2528 } else {
2529 vec![format!("tier/{target}"), target.to_string()]
2530 };
2531
2532 for alias_name in candidate_aliases {
2533 if let Some(alias) = config.aliases.get(&alias_name) {
2534 return Some((alias.id.clone(), alias.provider.clone()));
2535 }
2536 }
2537
2538 None
2539}
2540
2541pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
2545 let config = effective_config();
2546 let mut seen = std::collections::BTreeSet::new();
2547 let mut candidates = Vec::new();
2548
2549 for alias in config.aliases.values() {
2550 let pair = (alias.id.clone(), alias.provider.clone());
2551 if seen.contains(&pair) {
2552 continue;
2553 }
2554 if model_tier(&alias.id) == target {
2555 seen.insert(pair.clone());
2556 candidates.push(pair);
2557 }
2558 }
2559
2560 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2561 provider_a
2562 .cmp(provider_b)
2563 .then_with(|| model_a.cmp(model_b))
2564 });
2565 candidates
2566}
2567
2568pub fn all_model_candidates() -> Vec<(String, String)> {
2571 let config = effective_config();
2572 let mut seen = std::collections::BTreeSet::new();
2573 let mut candidates = Vec::new();
2574
2575 for alias in config.aliases.values() {
2576 let pair = (alias.id.clone(), alias.provider.clone());
2577 if seen.insert(pair.clone()) {
2578 candidates.push(pair);
2579 }
2580 }
2581
2582 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2583 provider_a
2584 .cmp(provider_b)
2585 .then_with(|| model_a.cmp(model_b))
2586 });
2587 candidates
2588}
2589
2590pub fn pick_complementary_reviewer(
2591 options: ComplementaryReviewerOptions,
2592) -> ComplementaryReviewerSelection {
2593 let config = effective_config();
2594 let mut author = resolve_model_info(&options.author_model);
2595 if let Some(provider) = options
2596 .author_provider
2597 .as_deref()
2598 .map(str::trim)
2599 .filter(|provider| !provider.is_empty())
2600 {
2601 author.provider = provider.to_string();
2602 author.family = model_family_with_config(&config, &author.provider, &author.id);
2603 author.lineage = model_lineage_with_config(&config, &author.provider, &author.id);
2604 author.tool_format = default_tool_format_with_config(&config, &author.id, &author.provider);
2605 }
2606 let author_entry = config.models.get(&author.id);
2607 let author_identity = complementary_identity(
2608 author.id.clone(),
2609 author.provider.clone(),
2610 author.family.clone(),
2611 author.lineage.clone(),
2612 author.tier.clone(),
2613 author_entry.and_then(|model| model.pricing.clone()),
2614 );
2615
2616 let fallback =
2617 |code: ReviewerFallbackCode, fallback_reason: String| ComplementaryReviewerSelection {
2618 intent: options.intent.as_str().to_string(),
2619 reviewer: author_identity.clone(),
2620 estimated_incremental_cost: cost_estimate(
2621 author_identity.pricing.as_ref(),
2622 author_identity.pricing.as_ref(),
2623 ),
2624 author: author_identity.clone(),
2625 fallback: true,
2626 reason: format!(
2627 "using author model {} because {fallback_reason}",
2628 author_identity.id
2629 ),
2630 fallback_reason: Some(fallback_reason),
2631 fallback_code: Some(code.as_code().to_string()),
2632 };
2633
2634 if author_identity.family == "unknown" {
2635 return fallback(
2636 ReviewerFallbackCode::UnknownAuthorFamily,
2637 "author model family is unknown".to_string(),
2638 );
2639 }
2640
2641 let preferred_families = author_entry
2642 .map(|model| model.complementary_with.clone())
2643 .unwrap_or_default();
2644 let author_refs = reviewer_match_refs(&author_identity);
2645 let mut rejected_by_price = 0usize;
2646 let mut diff_family_seen = 0usize;
2647 let mut candidates = Vec::new();
2648
2649 for (id, model) in config.models.iter() {
2650 if id == &author_identity.id && model.provider == author_identity.provider {
2651 continue;
2652 }
2653 if model.deprecated || model.availability != ModelAvailability::Serverless {
2654 continue;
2655 }
2656 let family = model_family_with_config(&config, &model.provider, id);
2657 if family == "unknown" || family == author_identity.family {
2658 continue;
2659 }
2660 diff_family_seen += 1;
2661 let lineage = model_lineage_with_config(&config, &model.provider, id);
2662 let candidate_identity = complementary_identity(
2663 id.clone(),
2664 model.provider.clone(),
2665 family,
2666 lineage,
2667 model_tier_with_config(&config, id),
2668 model.pricing.clone(),
2669 );
2670 if model
2671 .avoid_as_reviewer_for
2672 .iter()
2673 .any(|selector| refs_contain_selector(&author_refs, selector))
2674 {
2675 continue;
2676 }
2677 if exceeds_price_cap(
2678 author_identity.pricing.as_ref(),
2679 candidate_identity.pricing.as_ref(),
2680 options.max_price_multiplier,
2681 ) {
2682 rejected_by_price += 1;
2683 continue;
2684 }
2685 let score = reviewer_score(
2686 &options,
2687 &author_identity,
2688 &candidate_identity,
2689 model,
2690 &preferred_families,
2691 );
2692 candidates.push(ReviewerCandidate {
2693 identity: candidate_identity,
2694 score,
2695 });
2696 }
2697
2698 candidates.sort_by(|left, right| {
2699 right
2700 .score
2701 .partial_cmp(&left.score)
2702 .unwrap_or(std::cmp::Ordering::Equal)
2703 .then_with(|| left.identity.provider.cmp(&right.identity.provider))
2704 .then_with(|| left.identity.id.cmp(&right.identity.id))
2705 });
2706
2707 let Some(best) = candidates.into_iter().next() else {
2708 if rejected_by_price > 0 {
2709 let cap = options.max_price_multiplier.unwrap_or_default();
2710 return fallback(
2711 ReviewerFallbackCode::NoDiffFamilyWithinPrice,
2712 format!("no different-family reviewer satisfied max_price_multiplier {cap}"),
2713 );
2714 }
2715 if diff_family_seen == 0 {
2716 return fallback(
2717 ReviewerFallbackCode::NoDiffFamilyServerless,
2718 "no active serverless different-family reviewer is cataloged".to_string(),
2719 );
2720 }
2721 return fallback(
2722 ReviewerFallbackCode::AllDiffFamilyExcluded,
2723 "all different-family reviewer candidates were excluded".to_string(),
2724 );
2725 };
2726
2727 let estimate = cost_estimate(
2728 best.identity.pricing.as_ref(),
2729 author_identity.pricing.as_ref(),
2730 );
2731 ComplementaryReviewerSelection {
2732 intent: options.intent.as_str().to_string(),
2733 reason: reviewer_reason(&author_identity, &best.identity, estimate.as_ref()),
2734 estimated_incremental_cost: estimate,
2735 author: author_identity,
2736 reviewer: best.identity,
2737 fallback: false,
2738 fallback_reason: None,
2739 fallback_code: None,
2740 }
2741}
2742
2743#[derive(Debug, Clone)]
2744struct ReviewerCandidate {
2745 identity: ComplementaryModelIdentity,
2746 score: f64,
2747}
2748
2749fn complementary_identity(
2750 id: String,
2751 provider: String,
2752 family: String,
2753 lineage: String,
2754 tier: String,
2755 pricing: Option<ModelPricing>,
2756) -> ComplementaryModelIdentity {
2757 ComplementaryModelIdentity {
2758 id,
2759 provider,
2760 family,
2761 lineage,
2762 tier,
2763 pricing,
2764 }
2765}
2766
2767fn reviewer_score(
2768 options: &ComplementaryReviewerOptions,
2769 author: &ComplementaryModelIdentity,
2770 candidate: &ComplementaryModelIdentity,
2771 model: &ModelDef,
2772 preferred_families: &[String],
2773) -> f64 {
2774 let candidate_refs = reviewer_match_refs(candidate);
2775 let mut score = 0.0;
2776 if let Some(rank) = preferred_families
2777 .iter()
2778 .position(|selector| refs_contain_selector(&candidate_refs, selector))
2779 {
2780 score += 1_000.0 - rank as f64;
2781 }
2782 if candidate.provider != author.provider {
2783 score += 100.0;
2784 }
2785 score += match tier_distance(&author.tier, &candidate.tier) {
2786 0 => 80.0,
2787 1 => 45.0,
2788 2 => 15.0,
2789 _ => 0.0,
2790 };
2791 for strength in intent_strengths(options.intent) {
2792 if model.strengths.iter().any(|tag| tag == strength) {
2793 score += 8.0;
2794 }
2795 }
2796 if model.capabilities.iter().any(|tag| tag == "tools") {
2797 score += 4.0;
2798 }
2799 if let (Some(author_total), Some(candidate_total)) = (
2800 pricing_total(author.pricing.as_ref()),
2801 pricing_total(candidate.pricing.as_ref()),
2802 ) {
2803 if author_total > 0.0 {
2804 let ratio = candidate_total / author_total;
2805 if ratio <= 1.0 {
2806 score += 20.0;
2807 }
2808 score -= (ratio - 1.0).abs().min(10.0) * 8.0;
2809 }
2810 }
2811 score
2812}
2813
2814fn intent_strengths(intent: ComplementaryReviewerIntent) -> &'static [&'static str] {
2815 match intent {
2816 ComplementaryReviewerIntent::Review => &["reasoning", "coding", "tool_use"],
2817 ComplementaryReviewerIntent::Critique => &["reasoning", "long_context", "tool_use"],
2818 ComplementaryReviewerIntent::PlanReview => {
2819 &["reasoning", "coding", "agentic", "long_context", "tool_use"]
2820 }
2821 }
2822}
2823
2824fn tier_distance(left: &str, right: &str) -> u8 {
2825 let left = tier_rank(left);
2826 let right = tier_rank(right);
2827 left.abs_diff(right)
2828}
2829
2830fn tier_rank(tier: &str) -> u8 {
2831 match tier {
2832 "small" => 0,
2833 "mid" => 1,
2834 "frontier" | "reasoning" => 2,
2835 _ => 1,
2836 }
2837}
2838
2839fn exceeds_price_cap(
2840 author_pricing: Option<&ModelPricing>,
2841 candidate_pricing: Option<&ModelPricing>,
2842 max_price_multiplier: Option<f64>,
2843) -> bool {
2844 let Some(max_price_multiplier) = max_price_multiplier else {
2845 return false;
2846 };
2847 let Some(author_total) = pricing_total(author_pricing) else {
2848 return false;
2849 };
2850 let Some(candidate_total) = pricing_total(candidate_pricing) else {
2851 return true;
2852 };
2853 author_total > 0.0 && candidate_total > author_total * max_price_multiplier
2854}
2855
2856fn cost_estimate(
2857 reviewer_pricing: Option<&ModelPricing>,
2858 author_pricing: Option<&ModelPricing>,
2859) -> Option<ComplementaryCostEstimate> {
2860 let reviewer_pricing = reviewer_pricing?;
2861 let total_per_mtok = reviewer_pricing.input_per_mtok + reviewer_pricing.output_per_mtok;
2862 let multiplier_vs_author = pricing_total(author_pricing)
2863 .filter(|author_total| *author_total > 0.0)
2864 .map(|author_total| total_per_mtok / author_total);
2865 Some(ComplementaryCostEstimate {
2866 input_per_mtok: reviewer_pricing.input_per_mtok,
2867 output_per_mtok: reviewer_pricing.output_per_mtok,
2868 total_per_mtok,
2869 multiplier_vs_author,
2870 })
2871}
2872
2873fn pricing_total(pricing: Option<&ModelPricing>) -> Option<f64> {
2874 pricing.map(|pricing| pricing.input_per_mtok + pricing.output_per_mtok)
2875}
2876
2877fn reviewer_reason(
2878 author: &ComplementaryModelIdentity,
2879 reviewer: &ComplementaryModelIdentity,
2880 estimate: Option<&ComplementaryCostEstimate>,
2881) -> String {
2882 let cost = estimate
2883 .and_then(|estimate| estimate.multiplier_vs_author)
2884 .map(|multiplier| format!("{multiplier:.2}x the author model price"))
2885 .unwrap_or_else(|| "price ratio unavailable".to_string());
2886 format!(
2887 "selected {} via {} because family {} differs from author family {}, tier {} matches author tier {}, and {}",
2888 reviewer.id,
2889 reviewer.provider,
2890 reviewer.family,
2891 author.family,
2892 reviewer.tier,
2893 author.tier,
2894 cost
2895 )
2896}
2897
2898fn reviewer_match_refs(identity: &ComplementaryModelIdentity) -> BTreeSet<String> {
2899 BTreeSet::from([
2900 identity.id.to_ascii_lowercase(),
2901 identity.provider.to_ascii_lowercase(),
2902 format!("{}/{}", identity.provider, identity.id).to_ascii_lowercase(),
2903 format!("{}:{}", identity.provider, identity.id).to_ascii_lowercase(),
2904 identity.family.to_ascii_lowercase(),
2905 identity.lineage.to_ascii_lowercase(),
2906 ])
2907}
2908
2909fn refs_contain_selector(refs: &BTreeSet<String>, selector: &str) -> bool {
2910 normalized_catalog_token(Some(selector))
2911 .or_else(|| Some(selector.trim().to_ascii_lowercase()))
2912 .is_some_and(|selector| refs.contains(&selector))
2913}
2914
2915use harn_glob::match_name as glob_match;
2918
2919fn dirs_or_home() -> Option<String> {
2920 crate::user_dirs::home_dir().map(|home| home.to_string_lossy().into_owned())
2921}
2922
2923pub fn resolve_base_url(pdef: &ProviderDef) -> String {
2926 if let Some(env_name) = &pdef.base_url_env {
2927 if let Ok(val) = std::env::var(env_name) {
2928 let trimmed = val.trim().trim_matches('"').trim_matches('\'');
2930 if !trimmed.is_empty() {
2931 return trimmed.to_string();
2932 }
2933 }
2934 }
2935 pdef.base_url.clone()
2936}
2937
2938const EMBEDDED_PROVIDERS_TOML: &str = include_str!("llm/providers.toml");
2942
2943fn default_config() -> ProvidersConfig {
2957 parse_config_toml(EMBEDDED_PROVIDERS_TOML)
2958 .expect("embedded providers.toml must parse — invariant checked by harn-vm tests")
2959}
2960
2961#[cfg(test)]
2962fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
2963 let mut config = default_config();
2964 config.merge_from(&overlay);
2965 config
2966}
2967
2968#[cfg(test)]
2969mod tests {
2970 use super::*;
2971
2972 fn reset_overrides() {
2973 clear_user_overrides();
2974 }
2975
2976 #[test]
2977 fn resolve_model_info_guards_bad_native_pin_on_unreliable_route() {
2978 reset_overrides();
2979 let overlay = parse_config_toml(
2986 "[aliases.guard-ds]\nid = \"deepseek/deepseek-v3.2\"\nprovider = \"openrouter\"\ntool_format = \"native\"\n",
2987 )
2988 .expect("overlay parses");
2989 set_user_overrides(Some(overlay));
2990 let resolved = resolve_model_info("guard-ds");
2991 assert_eq!(
2992 resolved.tool_format, "text",
2993 "a native pin on a native_unreliable route must be auto-corrected to text"
2994 );
2995 clear_user_overrides();
2996
2997 let overlay_ok = parse_config_toml(
2999 "[aliases.guard-ds-ok]\nid = \"deepseek/deepseek-v3-base\"\nprovider = \"openrouter\"\ntool_format = \"native\"\n",
3000 )
3001 .expect("overlay parses");
3002 set_user_overrides(Some(overlay_ok));
3003 let resolved_ok = resolve_model_info("guard-ds-ok");
3004 assert_eq!(resolved_ok.tool_format, "native");
3005 clear_user_overrides();
3006 }
3007
3008 #[test]
3009 fn auto_select_prefers_local_provider_without_cloud_credentials() {
3010 let config = parse_config_toml(
3014 "[providers.ollama]\nbase_url = \"http://localhost:11434\"\nchat_endpoint = \"/v1/chat/completions\"\n",
3015 )
3016 .expect("config parses");
3017 assert!(provider_is_local(config.providers.get("ollama").unwrap()));
3018 assert_eq!(auto_select_provider(&config), "ollama");
3019 }
3020
3021 #[test]
3022 fn auto_select_falls_back_to_documented_default_when_empty() {
3023 let config = parse_config_toml("").expect("config parses");
3024 assert_eq!(auto_select_provider(&config), FALLBACK_PROVIDER);
3025 }
3026
3027 #[test]
3028 fn suppress_routes_parse_and_merge_dedupe() {
3029 let mut base =
3030 parse_config_toml("[suppress]\nroutes = [\"together:Qwen/Qwen3-Coder-Next-FP8\"]\n")
3031 .expect("base parses");
3032 assert!(!base.is_empty(), "a suppress-only overlay is not empty");
3033 let overlay = parse_config_toml(
3034 "[suppress]\nroutes = [\"together:Qwen/Qwen3-Coder-Next-FP8\", \"ollama:img:tag\"]\n",
3035 )
3036 .expect("overlay parses");
3037 base.merge_from(&overlay);
3038 assert_eq!(
3039 base.suppress.routes,
3040 vec![
3041 "together:Qwen/Qwen3-Coder-Next-FP8".to_string(),
3042 "ollama:img:tag".to_string(),
3043 ],
3044 "merge appends new selectors without duplicating existing ones"
3045 );
3046 }
3047
3048 const PATCH_BASE_TOML: &str = r#"
3050[models."demo/patch-target"]
3051name = "Patch Target"
3052provider = "demo"
3053context_window = 128000
3054stream_timeout = 300.0
3055capabilities = ["tools", "vision"]
3056strengths = ["coding"]
3057
3058[models."demo/patch-target".pricing]
3059input_per_mtok = 1.0
3060output_per_mtok = 5.0
3061"#;
3062
3063 fn patch_base() -> ProvidersConfig {
3064 parse_config_toml(PATCH_BASE_TOML).expect("patch base parses")
3065 }
3066
3067 fn patched_row(config: &ProvidersConfig) -> &ModelDef {
3068 config
3069 .models
3070 .get("demo/patch-target")
3071 .expect("patch target row present")
3072 }
3073
3074 #[test]
3075 fn patch_models_scalar_and_nested_field_preserve_siblings() {
3076 let mut base = patch_base();
3077 let overlay = parse_config_toml(
3078 "[patch.models.\"demo/patch-target\"]\nstream_timeout = 1200.0\n\
3079 [patch.models.\"demo/patch-target\".pricing]\noutput_per_mtok = 2.5\n",
3080 )
3081 .expect("patch overlay parses");
3082 assert!(!overlay.is_empty(), "a patch-only overlay is not empty");
3083 base.merge_from(&overlay);
3084 let row = patched_row(&base);
3085 assert_eq!(row.stream_timeout, Some(1200.0), "patched scalar applies");
3086 assert_eq!(row.name, "Patch Target", "unpatched scalar is intact");
3087 assert_eq!(row.context_window, 128000, "unpatched scalar is intact");
3088 assert_eq!(
3089 row.capabilities,
3090 vec!["tools".to_string(), "vision".to_string()],
3091 "unpatched array is intact"
3092 );
3093 let pricing = row.pricing.as_ref().expect("pricing survives the patch");
3094 assert_eq!(pricing.output_per_mtok, 2.5, "patched nested field applies");
3095 assert_eq!(
3096 pricing.input_per_mtok, 1.0,
3097 "sibling nested field is preserved by the deep merge"
3098 );
3099 assert!(base.dangling_model_patches().is_empty());
3100 }
3101
3102 #[test]
3103 fn patch_models_array_replaces_wholesale() {
3104 let mut base = patch_base();
3105 let overlay =
3106 parse_config_toml("[patch.models.\"demo/patch-target\"]\ncapabilities = [\"tools\"]\n")
3107 .expect("patch overlay parses");
3108 base.merge_from(&overlay);
3109 let row = patched_row(&base);
3110 assert_eq!(
3111 row.capabilities,
3112 vec!["tools".to_string()],
3113 "arrays replace wholesale — no element-wise merge"
3114 );
3115 assert_eq!(
3116 row.strengths,
3117 vec!["coding".to_string()],
3118 "arrays the patch does not name are intact"
3119 );
3120 }
3121
3122 #[test]
3123 fn patch_models_wins_over_whole_row_in_same_overlay() {
3124 let mut base = patch_base();
3125 let overlay = parse_config_toml(
3126 "[models.\"demo/patch-target\"]\n\
3127 name = \"Replaced Row\"\nprovider = \"demo\"\ncontext_window = 64000\n\
3128 stream_timeout = 600.0\n\
3129 [patch.models.\"demo/patch-target\"]\nstream_timeout = 1200.0\n",
3130 )
3131 .expect("overlay parses");
3132 base.merge_from(&overlay);
3133 let row = patched_row(&base);
3134 assert_eq!(
3135 row.name, "Replaced Row",
3136 "the whole-row replacement lands first"
3137 );
3138 assert_eq!(row.context_window, 64000);
3139 assert_eq!(
3140 row.stream_timeout,
3141 Some(1200.0),
3142 "the same overlay's patch fields win over its whole-row fields"
3143 );
3144 }
3145
3146 #[test]
3147 fn patch_models_chained_layers_accumulate_and_later_wins() {
3148 let mut base = patch_base();
3149 let layer1 =
3150 parse_config_toml("[patch.models.\"demo/patch-target\"]\nstream_timeout = 900.0\n")
3151 .expect("layer1 parses");
3152 let layer2 = parse_config_toml(
3153 "[patch.models.\"demo/patch-target\".pricing]\noutput_per_mtok = 2.5\n",
3154 )
3155 .expect("layer2 parses");
3156 base.merge_from(&layer1);
3157 base.merge_from(&layer2);
3158 let row = patched_row(&base);
3159 assert_eq!(
3160 row.stream_timeout,
3161 Some(900.0),
3162 "layer1's field patch survives layer2 patching a different field"
3163 );
3164 assert_eq!(
3165 row.pricing
3166 .as_ref()
3167 .expect("pricing present")
3168 .output_per_mtok,
3169 2.5,
3170 "layer2's field patch applies"
3171 );
3172
3173 let layer3 =
3174 parse_config_toml("[patch.models.\"demo/patch-target\"]\nstream_timeout = 1200.0\n")
3175 .expect("layer3 parses");
3176 base.merge_from(&layer3);
3177 assert_eq!(
3178 patched_row(&base).stream_timeout,
3179 Some(1200.0),
3180 "for the same field, the later layer's patch wins"
3181 );
3182 }
3183
3184 #[test]
3185 fn patch_models_sticky_across_later_whole_row_replacement() {
3186 let mut base = patch_base();
3187 let patch_layer =
3188 parse_config_toml("[patch.models.\"demo/patch-target\"]\nstream_timeout = 1200.0\n")
3189 .expect("patch layer parses");
3190 base.merge_from(&patch_layer);
3191 let replacement_layer = parse_config_toml(
3195 "[models.\"demo/patch-target\"]\n\
3196 name = \"Refreshed Row\"\nprovider = \"demo\"\ncontext_window = 256000\n\
3197 stream_timeout = 300.0\n",
3198 )
3199 .expect("replacement layer parses");
3200 base.merge_from(&replacement_layer);
3201 let row = patched_row(&base);
3202 assert_eq!(row.name, "Refreshed Row", "the whole-row refresh lands");
3203 assert_eq!(row.context_window, 256000);
3204 assert_eq!(
3205 row.stream_timeout,
3206 Some(1200.0),
3207 "the sticky patch re-applies on top of the refreshed row"
3208 );
3209 }
3210
3211 #[test]
3212 fn patch_models_dangling_patch_reports_and_applies_when_row_arrives() {
3213 let mut base = patch_base();
3214 let dangling =
3215 parse_config_toml("[patch.models.\"demo/not-yet-cataloged\"]\nstream_timeout = 42.0\n")
3216 .expect("dangling patch parses");
3217 base.merge_from(&dangling);
3218 assert_eq!(
3219 base.dangling_model_patches(),
3220 vec!["demo/not-yet-cataloged"],
3221 "a patch with no matching row is reported, not dropped"
3222 );
3223 assert_eq!(
3224 patched_row(&base).stream_timeout,
3225 Some(300.0),
3226 "existing rows are untouched by a dangling patch"
3227 );
3228
3229 let late_row = parse_config_toml(
3231 "[models.\"demo/not-yet-cataloged\"]\n\
3232 name = \"Late Arrival\"\nprovider = \"demo\"\ncontext_window = 8192\n",
3233 )
3234 .expect("late row parses");
3235 base.merge_from(&late_row);
3236 assert!(base.dangling_model_patches().is_empty());
3237 let row = base
3238 .models
3239 .get("demo/not-yet-cataloged")
3240 .expect("late row present");
3241 assert_eq!(row.stream_timeout, Some(42.0), "the held patch applied");
3242 assert_eq!(row.name, "Late Arrival");
3243 }
3244
3245 #[test]
3246 fn patch_models_type_error_keeps_unpatched_row() {
3247 let mut base = patch_base();
3248 let bad =
3249 parse_config_toml("[patch.models.\"demo/patch-target\"]\nstream_timeout = \"soon\"\n")
3250 .expect("the patch overlay itself is valid TOML");
3251 base.merge_from(&bad);
3252 let row = patched_row(&base);
3253 assert_eq!(
3254 row.stream_timeout,
3255 Some(300.0),
3256 "a type-invalid patch keeps the unpatched row"
3257 );
3258 assert_eq!(row.name, "Patch Target", "the rest of the row is intact");
3259 }
3260
3261 #[test]
3262 fn model_rows_roundtrip_through_toml_value_for_patching() {
3263 let config = default_config();
3269 assert!(!config.models.is_empty());
3270 for (id, row) in &config.models {
3271 let value = toml::Value::try_from(row)
3272 .unwrap_or_else(|error| panic!("serialize model row {id}: {error}"));
3273 let roundtripped = ModelDef::deserialize(value)
3274 .unwrap_or_else(|error| panic!("deserialize model row {id}: {error}"));
3275 assert_eq!(&roundtripped, row, "model row {id} must round-trip");
3276 }
3277 }
3278
3279 #[test]
3280 fn test_glob_match_prefix() {
3281 assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
3282 assert!(glob_match("gpt-*", "gpt-4o"));
3283 assert!(!glob_match("claude-*", "gpt-4o"));
3284 }
3285
3286 #[test]
3287 fn test_glob_match_suffix() {
3288 assert!(glob_match("*-latest", "llama3.2-latest"));
3289 assert!(!glob_match("*-latest", "llama3.2"));
3290 }
3291
3292 #[test]
3293 fn test_glob_match_middle() {
3294 assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
3295 assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
3296 }
3297
3298 #[test]
3299 fn test_glob_match_exact() {
3300 assert!(glob_match("gpt-4o", "gpt-4o"));
3301 assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
3302 }
3303
3304 #[test]
3305 fn test_infer_provider_from_defaults() {
3306 let _guard = crate::llm::env_guard();
3307 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3308 unsafe {
3309 std::env::remove_var("HARN_DEFAULT_PROVIDER");
3310 }
3311
3312 assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
3313 assert_eq!(infer_provider("gpt-4o"), "openai");
3314 assert_eq!(infer_provider("o1-preview"), "openai");
3315 assert_eq!(infer_provider("o3-mini"), "openai");
3316 assert_eq!(infer_provider("o4-mini"), "openai");
3317 assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
3318 assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
3319 assert_eq!(infer_provider("llama3.2:latest"), "ollama");
3320 assert_eq!(infer_provider("unknown-model"), "anthropic");
3321
3322 unsafe {
3323 match prev_default_provider {
3324 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3325 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3326 }
3327 }
3328 }
3329
3330 #[test]
3331 fn test_infer_provider_prefix_rules() {
3332 assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
3333 assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
3334 assert_eq!(infer_provider("local:owner/model"), "ollama");
3336 assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
3337 }
3338
3339 #[test]
3340 fn test_openrouter_inference_requires_one_slash() {
3341 let _guard = crate::llm::env_guard();
3342 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3343 unsafe {
3344 std::env::remove_var("HARN_DEFAULT_PROVIDER");
3345 }
3346
3347 assert_eq!(infer_provider("org/model"), "openrouter");
3348 assert_eq!(infer_provider("org/team/model"), "anthropic");
3349
3350 unsafe {
3351 match prev_default_provider {
3352 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3353 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3354 }
3355 }
3356 }
3357
3358 #[test]
3359 fn test_cerebras_inference_beats_openrouter_slash_fallback() {
3360 let _guard = crate::llm::env_guard();
3361 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3362 unsafe {
3363 std::env::remove_var("HARN_DEFAULT_PROVIDER");
3364 }
3365
3366 assert_eq!(infer_provider("cerebras/gpt-oss-120b"), "cerebras");
3367 assert_eq!(infer_provider("cerebras/zai-glm-4.7"), "cerebras");
3368 assert_eq!(infer_provider("cerebras/llama-3.3-70b"), "cerebras");
3369
3370 unsafe {
3371 match prev_default_provider {
3372 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3373 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3374 }
3375 }
3376 }
3377
3378 #[test]
3379 fn test_direct_catalog_model_id_resolves_to_catalog_provider() {
3380 let _guard = crate::llm::env_guard();
3385 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3386 unsafe {
3387 std::env::remove_var("HARN_DEFAULT_PROVIDER");
3388 }
3389
3390 for model in ["gpt-oss-120b", "zai-glm-4.7", "llama-3.3-70b"] {
3391 assert_eq!(
3392 infer_provider(model),
3393 "cerebras",
3394 "{model} should route to its catalog provider"
3395 );
3396 let resolved = resolve_model_info(model);
3397 assert_eq!(resolved.id, model);
3398 assert_eq!(resolved.provider, "cerebras");
3399 }
3400
3401 unsafe {
3402 match prev_default_provider {
3403 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3404 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3405 }
3406 }
3407 }
3408
3409 #[test]
3410 fn test_equivalent_model_catalog_entries_use_capability_compatible_routes() {
3411 reset_overrides();
3412
3413 assert_eq!(
3414 wire_model_id("groq/openai/gpt-oss-120b"),
3415 "openai/gpt-oss-120b"
3416 );
3417 assert_eq!(wire_model_id("gpt-oss-120b"), "gpt-oss-120b");
3418
3419 let equivalents = equivalent_model_catalog_entries("gpt-oss-120b");
3420 let ids = equivalents
3421 .iter()
3422 .map(|(id, _)| id.as_str())
3423 .collect::<Vec<_>>();
3424
3425 assert!(
3426 ids.contains(&"groq/openai/gpt-oss-120b"),
3427 "Cerebras GPT-OSS should surface the Groq serving variant"
3428 );
3429 assert!(
3430 !ids.contains(&"gpt-oss-120b"),
3431 "equivalence results should not include the source row"
3432 );
3433 assert!(equivalents.iter().all(|(_, model)| {
3434 model.equivalence_group.as_deref() == Some("openai-gpt-oss-120b")
3435 }));
3436 }
3437
3438 #[test]
3439 fn fireworks_gpt_oss_route_has_real_context_window() {
3440 reset_overrides();
3447
3448 let entry = model_catalog_entry("accounts/fireworks/models/gpt-oss-120b")
3449 .expect("Fireworks gpt-oss-120b must be in the model catalog");
3450 assert_eq!(entry.context_window, 131_072);
3451 assert_eq!(entry.provider, "fireworks");
3452 assert_eq!(
3453 entry.equivalence_group.as_deref(),
3454 Some("openai-gpt-oss-120b"),
3455 );
3456 }
3457
3458 #[test]
3459 fn test_user_catalog_overlay_re_homes_model_provider() {
3460 reset_overrides();
3464 let mut overlay = ProvidersConfig::default();
3465 overlay.models.insert(
3466 "gpt-4o".to_string(),
3467 ModelDef {
3468 name: "GPT-4o via OpenRouter".to_string(),
3469 provider: "openrouter".to_string(),
3470 context_window: 128_000,
3471 logical_model: None,
3472 equivalence_group: None,
3473 served_variant: None,
3474 wire_model: None,
3475 api_dialect: None,
3476 rate_limits: None,
3477 performance: None,
3478 architecture: None,
3479 local_memory: None,
3480 runtime_context_window: None,
3481 stream_timeout: None,
3482 capabilities: Vec::new(),
3483 pricing: None,
3484 deprecated: false,
3485 deprecation_note: None,
3486 superseded_by: None,
3487 fast_mode: None,
3488 quality_tags: Vec::new(),
3489 availability: ModelAvailability::default(),
3490 tier: None,
3491 open_weight: None,
3492 strengths: Vec::new(),
3493 benchmarks: std::collections::BTreeMap::new(),
3494 family: None,
3495 lineage: None,
3496 complementary_with: Vec::new(),
3497 avoid_as_reviewer_for: Vec::new(),
3498 },
3499 );
3500 set_user_overrides(Some(overlay));
3501
3502 assert_eq!(infer_provider("gpt-4o"), "openrouter");
3503
3504 reset_overrides();
3505 }
3506
3507 #[test]
3508 fn test_resolve_model_info_normalizes_provider_prefixes() {
3509 let local = resolve_model_info("local:gemma-4-e4b-it");
3510 assert_eq!(local.id, "gemma-4-e4b-it");
3511 assert_eq!(local.provider, "ollama");
3512
3513 let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
3514 assert_eq!(ollama.id, "qwen3:30b-a3b");
3515 assert_eq!(ollama.provider, "ollama");
3516
3517 let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
3518 assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
3519 assert_eq!(hf.provider, "huggingface");
3520
3521 let cerebras = resolve_model_info("cerebras/gpt-oss-120b");
3522 assert_eq!(cerebras.id, "gpt-oss-120b");
3523 assert_eq!(cerebras.provider, "cerebras");
3524
3525 let cerebras_glm = resolve_model_info("cerebras/zai-glm-4.7");
3526 assert_eq!(cerebras_glm.id, "zai-glm-4.7");
3527 assert_eq!(cerebras_glm.provider, "cerebras");
3528 }
3529
3530 #[test]
3531 fn test_model_tier_from_defaults() {
3532 assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
3536 assert_eq!(model_tier("gpt-4o"), "frontier");
3537 assert_eq!(model_tier("Qwen/Qwen3.5-9B"), "small");
3538 assert_eq!(model_tier("deepseek-v4-flash"), "mid");
3539 assert_eq!(model_tier("deepseek-v4-pro"), "frontier");
3540 assert_eq!(model_tier("MiniMax-M2.7"), "frontier");
3541 assert_eq!(model_tier("glm-5.1"), "frontier");
3542 assert_eq!(model_tier("definitely-not-a-real-model"), "mid");
3544 }
3545
3546 #[test]
3547 fn test_model_family_preserves_underlying_hosted_lineage() {
3548 assert_eq!(
3549 model_family("openrouter", "anthropic/claude-sonnet-4-6"),
3550 "anthropic-claude"
3551 );
3552 assert_eq!(
3553 model_family("openrouter", "google/gemini-2.5-flash"),
3554 "google-gemini"
3555 );
3556 assert_eq!(
3557 model_family("openrouter", "openai/o3-mini"),
3558 "openai-reasoning"
3559 );
3560 assert_eq!(model_lineage("openrouter", "openai/gpt-5.5"), "openai-gpt5");
3561 assert_eq!(
3562 model_lineage("openrouter", "openai/o3-mini"),
3563 "openai-reasoning"
3564 );
3565 assert_eq!(
3566 model_lineage("anthropic", "claude-opus-4-8"),
3567 "claude-opus-adaptive"
3568 );
3569 assert_eq!(model_lineage("llamacpp", "qwen3.6-35b-a3b"), "qwen3");
3570 }
3571
3572 #[test]
3573 fn test_complementary_reviewer_uses_different_family() {
3574 let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
3575 author_model: "claude-sonnet-4-6".to_string(),
3576 author_provider: None,
3577 intent: ComplementaryReviewerIntent::PlanReview,
3578 max_price_multiplier: Some(3.0),
3579 });
3580
3581 assert!(!selection.fallback, "{selection:?}");
3582 assert_eq!(selection.author.family, "anthropic-claude");
3583 assert_ne!(selection.reviewer.family, selection.author.family);
3584 assert_eq!(selection.reviewer.tier, "frontier");
3585 assert!(selection.estimated_incremental_cost.is_some());
3586 assert_eq!(selection.fallback_code, None, "{selection:?}");
3589 }
3590
3591 #[test]
3592 fn test_complementary_reviewer_falls_back_deterministically_on_price_cap() {
3593 let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
3594 author_model: "gpt-4o-mini".to_string(),
3595 author_provider: Some("openai".to_string()),
3596 intent: ComplementaryReviewerIntent::Review,
3597 max_price_multiplier: Some(0.01),
3598 });
3599
3600 assert!(selection.fallback, "{selection:?}");
3601 assert_eq!(selection.reviewer.id, "gpt-4o-mini");
3602 assert_eq!(selection.reviewer.family, selection.author.family);
3603 assert!(selection
3604 .fallback_reason
3605 .as_deref()
3606 .is_some_and(|reason| reason.contains("max_price_multiplier")));
3607 assert_eq!(
3611 selection.fallback_code.as_deref(),
3612 Some(ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code()),
3613 "{selection:?}"
3614 );
3615 assert_eq!(
3616 ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code(),
3617 "no_diff_family_within_price"
3618 );
3619 }
3620
3621 #[test]
3622 fn test_reviewer_fallback_codes_are_stable_strings() {
3623 assert_eq!(
3626 ReviewerFallbackCode::UnknownAuthorFamily.as_code(),
3627 "unknown_author_family"
3628 );
3629 assert_eq!(
3630 ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code(),
3631 "no_diff_family_within_price"
3632 );
3633 assert_eq!(
3634 ReviewerFallbackCode::NoDiffFamilyServerless.as_code(),
3635 "no_diff_family_serverless"
3636 );
3637 assert_eq!(
3638 ReviewerFallbackCode::AllDiffFamilyExcluded.as_code(),
3639 "all_diff_family_excluded"
3640 );
3641 }
3642
3643 #[test]
3644 fn test_resolve_model_unknown_alias() {
3645 let (id, provider) = resolve_model("gpt-4o");
3646 assert_eq!(id, "gpt-4o");
3647 assert!(provider.is_none());
3648 }
3649
3650 #[test]
3651 fn test_provider_names() {
3652 let names = provider_names();
3653 assert!(names.len() >= 7);
3654 assert!(names.contains(&"anthropic".to_string()));
3655 assert!(names.contains(&"together".to_string()));
3656 assert!(names.contains(&"local".to_string()));
3657 assert!(names.contains(&"mlx".to_string()));
3658 assert!(names.contains(&"openai".to_string()));
3659 assert!(names.contains(&"ollama".to_string()));
3660 assert!(names.contains(&"bedrock".to_string()));
3661 assert!(names.contains(&"azure_openai".to_string()));
3662 assert!(names.contains(&"vertex".to_string()));
3663 }
3664
3665 #[test]
3666 fn global_provider_file_is_an_overlay_on_builtin_defaults() {
3667 let mut overlay = ProvidersConfig {
3668 default_provider: Some("ollama".to_string()),
3669 ..Default::default()
3670 };
3671 overlay.aliases.insert(
3672 "quickstart".to_string(),
3673 AliasDef {
3674 id: "llama3.2".to_string(),
3675 provider: "ollama".to_string(),
3676 tool_format: None,
3677 },
3678 );
3679
3680 let merged = merge_global_config(overlay);
3681
3682 assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
3683 assert!(merged.providers.contains_key("anthropic"));
3684 assert!(merged.providers.contains_key("ollama"));
3685 assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
3686 }
3687
3688 #[test]
3689 fn partial_provider_overlay_preserves_builtin_provider_metadata() {
3690 let overlay = parse_config_toml(
3691 r#"
3692 [providers.ollama]
3693 base_url = "http://localhost:11435"
3694 extra_headers = { "x-local" = "1" }
3695 "#,
3696 )
3697 .expect("provider overlay parses");
3698
3699 let merged = merge_global_config(overlay);
3700 let ollama = merged
3701 .providers
3702 .get("ollama")
3703 .expect("ollama remains configured");
3704
3705 assert_eq!(ollama.base_url, "http://localhost:11435");
3706 assert_eq!(ollama.auth_style, "none");
3707 assert_eq!(ollama.chat_endpoint, "/api/chat");
3708 assert_eq!(ollama.completion_endpoint.as_deref(), Some("/api/generate"));
3709 assert_eq!(ollama.cost_per_1k_in, Some(0.0));
3710 assert_eq!(ollama.cost_per_1k_out, Some(0.0));
3711 assert_eq!(
3712 ollama
3713 .healthcheck
3714 .as_ref()
3715 .and_then(|healthcheck| healthcheck.path.as_deref()),
3716 Some("/api/tags")
3717 );
3718 assert_eq!(
3719 ollama.extra_headers.get("x-local").map(String::as_str),
3720 Some("1")
3721 );
3722 }
3723
3724 #[test]
3725 fn partial_provider_overlay_can_explicitly_replace_default_auth_style() {
3726 let overlay = parse_config_toml(
3727 r#"
3728 [providers.ollama]
3729 auth_style = "bearer"
3730 auth_env = "OLLAMA_API_KEY"
3731 "#,
3732 )
3733 .expect("provider overlay parses");
3734
3735 let merged = merge_global_config(overlay);
3736 let ollama = merged
3737 .providers
3738 .get("ollama")
3739 .expect("ollama remains configured");
3740
3741 assert_eq!(ollama.auth_style, "bearer");
3742 assert_eq!(auth_env_names(&ollama.auth_env), vec!["OLLAMA_API_KEY"]);
3743 assert_eq!(ollama.chat_endpoint, "/api/chat");
3744 }
3745
3746 #[test]
3747 fn test_resolve_tier_model_default_aliases() {
3748 let (model, provider) = resolve_tier_model("frontier", None)
3753 .expect("frontier alias must resolve from the embedded catalog");
3754 assert_eq!(provider, "anthropic");
3755 assert!(
3756 model_catalog_entry(&model)
3757 .is_some_and(|entry| entry.provider == "anthropic" && !entry.deprecated),
3758 "frontier alias must point at a registered, non-deprecated anthropic model (got {model})"
3759 );
3760
3761 let (model, provider) = resolve_tier_model("small", None)
3762 .expect("small alias must resolve from the embedded catalog");
3763 assert!(
3764 [
3765 "openrouter",
3766 "huggingface",
3767 "local",
3768 "llamacpp",
3769 "mlx",
3770 "ollama"
3771 ]
3772 .contains(&provider.as_str()),
3773 "small tier should resolve to an open-weight provider (got {provider} / {model})"
3774 );
3775
3776 let (model, provider) = resolve_tier_model("mid", None)
3777 .expect("mid alias must resolve from the embedded catalog");
3778 assert_eq!(provider, "openrouter");
3779 assert_eq!(model, "qwen/qwen3.6-flash");
3780 }
3781
3782 #[test]
3783 fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
3784 let (model, provider) = resolve_tier_model("mid", Some("openai"))
3788 .expect("mid tier scoped to openai must resolve");
3789 assert_eq!(provider, "openai");
3790 assert_eq!(model, "gpt-5.4-mini");
3791 assert!(
3792 model_catalog_entry(&model).is_some(),
3793 "mid/openai alias must point at a registered model (got {model})"
3794 );
3795 }
3796
3797 #[test]
3798 fn test_provider_config_anthropic() {
3799 let pdef = provider_config("anthropic").unwrap();
3800 assert_eq!(pdef.auth_style, "header");
3801 assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
3802 }
3803
3804 #[test]
3805 fn test_provider_config_mlx() {
3806 let pdef = provider_config("mlx").unwrap();
3807 assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
3808 assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
3809 assert_eq!(
3810 pdef.healthcheck.unwrap().path.as_deref(),
3811 Some("/v1/models")
3812 );
3813
3814 let (model, provider) = resolve_model("mlx-qwen36-27b");
3815 assert_eq!(model, "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit");
3816 assert_eq!(provider.as_deref(), Some("mlx"));
3817 }
3818
3819 #[test]
3820 fn test_enterprise_provider_defaults_and_inference() {
3821 let bedrock = provider_config("bedrock").unwrap();
3822 assert_eq!(bedrock.auth_style, "aws_sigv4");
3823 assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
3824 assert_eq!(
3825 infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
3826 "bedrock"
3827 );
3828 assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
3829
3830 let azure = provider_config("azure_openai").unwrap();
3831 assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
3832 assert_eq!(
3833 auth_env_names(&azure.auth_env),
3834 vec![
3835 "AZURE_OPENAI_API_KEY".to_string(),
3836 "AZURE_OPENAI_AD_TOKEN".to_string(),
3837 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
3838 ]
3839 );
3840
3841 let vertex = provider_config("vertex").unwrap();
3842 assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
3843 assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
3844 }
3845
3846 #[test]
3847 fn test_default_provider_env_override_for_unknown_model() {
3848 let _guard = crate::llm::env_guard();
3849 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3850 unsafe {
3851 std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
3852 }
3853
3854 let inference = infer_provider_detail("unknown-model");
3855
3856 unsafe {
3857 match prev_default_provider {
3858 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3859 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3860 }
3861 }
3862
3863 assert_eq!(inference.provider, "openai");
3864 assert_eq!(
3865 inference.source,
3866 crate::llm::provider::ProviderInferenceSource::DefaultFallback
3867 );
3868 }
3869
3870 #[test]
3871 fn test_unknown_model_family_ignores_default_provider_fallback() {
3872 let _guard = crate::llm::env_guard();
3873 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3874 unsafe {
3875 std::env::set_var("HARN_DEFAULT_PROVIDER", "ollama");
3876 }
3877
3878 let unknown = resolve_model_info("mystery-model-xyz");
3879 let known_family = resolve_model_info("deepseek-mystery-model");
3880
3881 unsafe {
3882 match prev_default_provider {
3883 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3884 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3885 }
3886 }
3887
3888 assert_eq!(unknown.provider, "ollama");
3889 assert_eq!(unknown.family, "unknown");
3890 assert_eq!(unknown.lineage, "unknown");
3891 assert_eq!(known_family.family, "deepseek");
3892 assert_eq!(known_family.lineage, "deepseek");
3893 }
3894
3895 #[test]
3896 fn test_resolve_base_url_no_env() {
3897 let pdef = ProviderDef {
3898 base_url: "https://example.com".to_string(),
3899 ..Default::default()
3900 };
3901 assert_eq!(resolve_base_url(&pdef), "https://example.com");
3902 }
3903
3904 #[test]
3905 fn test_default_config_roundtrip() {
3906 let config = default_config();
3907 assert!(!config.providers.is_empty());
3908 assert!(!config.inference_rules.is_empty());
3909 assert_eq!(config.tier_defaults.default, "mid");
3912 let frontiers = config
3914 .models
3915 .iter()
3916 .filter(|(_, m)| m.tier.as_deref() == Some("frontier"))
3917 .count();
3918 assert!(
3919 frontiers >= 4,
3920 "expected at least 4 frontier-tagged models, got {frontiers}"
3921 );
3922 }
3923
3924 #[test]
3925 fn test_local_ollama_catalog_metadata() {
3926 reset_overrides();
3927
3928 let devstral =
3929 model_catalog_entry("devstral-small-2:24b").expect("devstral-small-2 catalog entry");
3930 assert_eq!(devstral.context_window, 262_144);
3931 assert!(!devstral.capabilities.iter().any(|cap| cap == "vision"));
3932
3933 let gemma4 = model_catalog_entry("gemma4:26b").expect("gemma4 catalog entry");
3934 assert_eq!(gemma4.context_window, 262_144);
3935 assert!(gemma4.capabilities.iter().any(|cap| cap == "vision"));
3936 }
3937
3938 #[test]
3939 fn local_gemma4_source_tags_match_structured_capability_tags() {
3940 reset_overrides();
3941 let config = default_config();
3942 for id in [
3943 "gemma-4-e2b-it",
3944 "gemma-4-e4b-it",
3945 "gemma-4-12b-it",
3946 "gemma-4-26b-a4b-it",
3947 "gemma-4-31b-it",
3948 ] {
3949 let source = config
3950 .models
3951 .get(id)
3952 .unwrap_or_else(|| panic!("{id} should be in the embedded catalog"));
3953 let derived = effective_model_capability_tags(&source.provider, id);
3954 assert_eq!(
3955 source.capabilities, derived,
3956 "{}/{} source capabilities must match derived capability_tags",
3957 source.provider, id
3958 );
3959 }
3960 }
3961
3962 #[test]
3963 fn capability_tags_include_structured_capability_flags() {
3964 let caps = crate::llm::capabilities::Capabilities {
3965 native_tools: true,
3966 tool_search: vec!["web".to_string()],
3967 vision_supported: true,
3968 audio: true,
3969 pdf: true,
3970 video: true,
3971 files_api_supported: true,
3972 prompt_caching: true,
3973 thinking_modes: vec!["enabled".to_string()],
3974 structured_output: Some("native".to_string()),
3975 ..Default::default()
3976 };
3977
3978 assert_eq!(
3979 capability_tags_from_capabilities(&caps),
3980 vec![
3981 "streaming",
3982 "tools",
3983 "tool_search",
3984 "vision",
3985 "audio",
3986 "pdf",
3987 "video",
3988 "files",
3989 "prompt_caching",
3990 "thinking",
3991 "structured_output",
3992 ]
3993 );
3994 }
3995
3996 #[test]
3997 fn test_external_config_overlays_default_catalog() {
3998 let mut config = default_config();
3999 let mut overlay = ProvidersConfig {
4000 default_provider: Some("ollama".to_string()),
4001 ..Default::default()
4002 };
4003 overlay.providers.insert(
4004 "custom".to_string(),
4005 ProviderDef {
4006 base_url: "https://llm.example.test/v1".to_string(),
4007 chat_endpoint: "/chat/completions".to_string(),
4008 ..Default::default()
4009 },
4010 );
4011
4012 config.merge_from(&overlay);
4013
4014 assert_eq!(config.default_provider.as_deref(), Some("ollama"));
4015 assert!(config.providers.contains_key("custom"));
4016 assert!(config.providers.contains_key("anthropic"));
4017 assert!(config.providers.contains_key("ollama"));
4018 }
4019
4020 #[test]
4021 fn test_model_params_empty() {
4022 let params = model_params("claude-sonnet-4-20250514");
4023 assert!(params.is_empty());
4024 }
4025
4026 #[test]
4027 fn test_user_overrides_add_provider_and_alias() {
4028 reset_overrides();
4029 let mut overlay = ProvidersConfig::default();
4030 overlay.providers.insert(
4031 "acme".to_string(),
4032 ProviderDef {
4033 base_url: "https://llm.acme.test/v1".to_string(),
4034 chat_endpoint: "/chat/completions".to_string(),
4035 ..Default::default()
4036 },
4037 );
4038 overlay.aliases.insert(
4039 "acme-fast".to_string(),
4040 AliasDef {
4041 id: "acme/model-fast".to_string(),
4042 provider: "acme".to_string(),
4043 tool_format: Some("native".to_string()),
4044 },
4045 );
4046 set_user_overrides(Some(overlay));
4047
4048 let (model, provider) = resolve_model("acme-fast");
4049 assert_eq!(model, "acme/model-fast");
4050 assert_eq!(provider.as_deref(), Some("acme"));
4051 assert!(provider_names().contains(&"acme".to_string()));
4052 assert_eq!(
4053 provider_config("acme").map(|provider| provider.base_url),
4054 Some("https://llm.acme.test/v1".to_string())
4055 );
4056
4057 reset_overrides();
4058 }
4059
4060 #[test]
4061 fn test_default_tool_format_uses_capability_matrix() {
4062 reset_overrides();
4063
4064 assert_eq!(
4065 default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
4066 "native"
4067 );
4068 assert_eq!(
4073 default_tool_format("devstral-small-2:24b", "ollama"),
4074 "json"
4075 );
4076 assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "native");
4080 assert_eq!(
4083 default_tool_format("deepseek/deepseek-v3.2", "openrouter"),
4084 "text"
4085 );
4086 assert_eq!(
4087 default_tool_format("qwen/qwen3-coder-flash", "openrouter"),
4088 "text"
4089 );
4090 assert_eq!(
4091 default_tool_format("qwen/qwen3.6-flash", "openrouter"),
4092 "native"
4093 );
4094 assert_eq!(default_tool_format("z-ai/glm-5.2", "openrouter"), "text");
4095 assert_eq!(
4101 default_tool_format("openai/gpt-oss-120b", "openrouter"),
4102 "text"
4103 );
4104 assert_eq!(
4105 default_tool_format("accounts/fireworks/models/gpt-oss-120b", "fireworks"),
4106 "text"
4107 );
4108 assert_eq!(default_tool_format("gpt-oss-120b", "cerebras"), "native");
4109 assert_eq!(
4110 default_tool_format("openai/gpt-oss-120b", "deepinfra"),
4111 "text"
4112 );
4113 assert_eq!(default_tool_format("openai/gpt-oss-120b", "groq"), "native");
4114 }
4115
4116 #[test]
4117 fn test_default_tool_format_unpinned_text_channel_is_json() {
4118 reset_overrides();
4119
4120 assert_eq!(default_tool_format("mystery-model-xyz", "ollama"), "json");
4126 }
4127
4128 #[test]
4129 fn test_claude_family_defaults_native_without_host_pin() {
4130 reset_overrides();
4131
4132 for (model, provider) in [
4139 ("claude-sonnet-4-6", "anthropic"),
4140 ("claude-sonnet-5", "anthropic"),
4141 ("anthropic/claude-nova-1", "anthropic"),
4142 ("anthropic/claude-sonnet-4.6", "openrouter"),
4143 ("anthropic/claude-sonnet-5", "openrouter"),
4144 ("anthropic/claude-opus-4-5-20251101", "openrouter"),
4145 ("anthropic/claude-sonnet-next", "openrouter"),
4146 ("anthropic/claude-nova-1", "openrouter"),
4147 ("anthropic.claude-sonnet-4-6", "bedrock"),
4148 ] {
4149 assert_eq!(
4150 default_tool_format(model, provider),
4151 "native",
4152 "{provider}:{model} must default native without a host pin"
4153 );
4154 }
4155
4156 let overlay = parse_config_toml(
4160 "[aliases.probe-sonnet]\nid = \"claude-sonnet-4-6\"\nprovider = \"anthropic\"\n",
4161 )
4162 .expect("overlay parses");
4163 set_user_overrides(Some(overlay));
4164 let resolved = resolve_model_info("probe-sonnet");
4165 assert_eq!(resolved.provider, "anthropic");
4166 assert_eq!(
4167 resolved.tool_format, "native",
4168 "an unpinned claude alias must inherit the family-level native default"
4169 );
4170 clear_user_overrides();
4171
4172 let overlay = parse_config_toml(
4176 "[aliases.probe-sonnet-json]\nid = \"claude-sonnet-4-6\"\nprovider = \"anthropic\"\ntool_format = \"json\"\n",
4177 )
4178 .expect("overlay parses");
4179 set_user_overrides(Some(overlay));
4180 let pinned = resolve_model_info("probe-sonnet-json");
4181 assert_eq!(
4182 pinned.tool_format, "json",
4183 "an explicit host pin must win over the claude family default"
4184 );
4185 clear_user_overrides();
4186
4187 assert_eq!(
4190 default_tool_format("mystery-model-xyz", "openrouter"),
4191 "json"
4192 );
4193 }
4194
4195 #[test]
4196 fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
4197 reset_overrides();
4198 let mut overlay = ProvidersConfig::default();
4199 overlay.models.insert(
4200 "acme/model-fast".to_string(),
4201 ModelDef {
4202 name: "Acme Fast".to_string(),
4203 provider: "acme".to_string(),
4204 context_window: 65_536,
4205 logical_model: None,
4206 equivalence_group: None,
4207 served_variant: None,
4208 wire_model: None,
4209 api_dialect: None,
4210 rate_limits: None,
4211 performance: None,
4212 architecture: None,
4213 local_memory: None,
4214 runtime_context_window: None,
4215 stream_timeout: Some(42.0),
4216 capabilities: vec!["tools".to_string(), "streaming".to_string()],
4217 pricing: Some(ModelPricing {
4218 input_per_mtok: 1.25,
4219 output_per_mtok: 2.5,
4220 cache_read_per_mtok: Some(0.25),
4221 cache_write_per_mtok: None,
4222 }),
4223 deprecated: false,
4224 deprecation_note: None,
4225 superseded_by: None,
4226 fast_mode: None,
4227 quality_tags: Vec::new(),
4228 availability: ModelAvailability::default(),
4229 tier: None,
4230 open_weight: None,
4231 strengths: Vec::new(),
4232 benchmarks: std::collections::BTreeMap::new(),
4233 family: None,
4234 lineage: None,
4235 complementary_with: Vec::new(),
4236 avoid_as_reviewer_for: Vec::new(),
4237 },
4238 );
4239 overlay
4240 .qc_defaults
4241 .insert("acme".to_string(), "acme/model-cheap".to_string());
4242 set_user_overrides(Some(overlay));
4243
4244 let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
4245 assert_eq!(entry.context_window, 65_536);
4246 assert_eq!(
4247 entry.capabilities,
4248 vec!["streaming".to_string(), "tools".to_string()]
4249 );
4250 assert_eq!(
4251 entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
4252 Some(1.25)
4253 );
4254 assert_eq!(
4255 pricing_per_1k_for("acme", "acme/model-fast"),
4256 Some((0.00125, 0.0025))
4257 );
4258 assert_eq!(
4259 qc_default_model("acme").as_deref(),
4260 Some("acme/model-cheap")
4261 );
4262
4263 reset_overrides();
4264 }
4265
4266 #[test]
4267 fn test_user_overrides_prepend_inference_rules() {
4268 reset_overrides();
4269 let mut overlay = ProvidersConfig::default();
4270 overlay.inference_rules.push(InferenceRule {
4271 pattern: Some("internal-*".to_string()),
4272 contains: None,
4273 exact: None,
4274 provider: "openai".to_string(),
4275 });
4276 set_user_overrides(Some(overlay));
4277
4278 assert_eq!(infer_provider("internal-foo"), "openai");
4279
4280 reset_overrides();
4281 }
4282
4283 #[test]
4290 fn embedded_providers_toml_parses_and_is_not_trivially_empty() {
4291 let config = default_config();
4292 assert!(
4293 config.providers.len() >= 10,
4294 "expected >=10 providers in embedded catalog, got {}",
4295 config.providers.len()
4296 );
4297 assert!(
4298 config.models.len() >= 20,
4299 "expected >=20 models in embedded catalog, got {}",
4300 config.models.len()
4301 );
4302 assert!(
4303 config.aliases.len() >= 15,
4304 "expected >=15 aliases in embedded catalog, got {}",
4305 config.aliases.len()
4306 );
4307 assert_eq!(config.default_provider.as_deref(), Some("anthropic"));
4308 }
4309
4310 #[test]
4311 fn embedded_catalog_every_deprecated_model_has_a_note() {
4312 let config = default_config();
4313 let offenders: Vec<&str> = config
4314 .models
4315 .iter()
4316 .filter(|(_, model)| {
4317 model.deprecated
4318 && model
4319 .deprecation_note
4320 .as_deref()
4321 .unwrap_or("")
4322 .trim()
4323 .is_empty()
4324 })
4325 .map(|(id, _)| id.as_str())
4326 .collect();
4327 assert!(
4328 offenders.is_empty(),
4329 "deprecated models missing a deprecation_note: {offenders:?}"
4330 );
4331 }
4332
4333 #[test]
4334 fn embedded_cerebras_catalog_separates_public_and_dedicated_routes() {
4335 let config = default_config();
4336 for id in ["gpt-oss-120b", "zai-glm-4.7"] {
4337 let model = config.models.get(id).expect("current public Cerebras row");
4338 assert_eq!(model.provider, "cerebras");
4339 assert_eq!(model.availability, ModelAvailability::Serverless);
4340 assert!(!model.deprecated);
4341 }
4342
4343 let llama = config
4344 .models
4345 .get("llama-3.3-70b")
4346 .expect("legacy Cerebras row");
4347 assert_eq!(llama.provider, "cerebras");
4348 assert_eq!(llama.availability, ModelAvailability::Dedicated);
4349 assert!(llama.deprecated);
4350 }
4351
4352 #[test]
4353 fn embedded_openrouter_gpt_oss_120b_has_no_fragment_bleed() {
4354 let config = default_config();
4362 let model = config
4363 .models
4364 .get("openai/gpt-oss-120b")
4365 .expect("openrouter gpt-oss-120b row");
4366 assert_eq!(model.provider, "openrouter");
4367 assert_eq!(
4368 model.open_weight,
4369 Some(true),
4370 "gpt-oss-120b is Apache-2.0 open weight, not the bled-in open_weight=false"
4371 );
4372 assert!(
4373 !model.strengths.iter().any(|s| s == "vision"),
4374 "gpt-oss-120b is text-only; the bled-in `vision` strength must be gone: {:?}",
4375 model.strengths
4376 );
4377 assert!(
4378 !model.strengths.is_empty(),
4379 "gpt-oss-120b must carry its own strengths, not None"
4380 );
4381
4382 let group_tiers: std::collections::BTreeSet<_> = config
4385 .models
4386 .values()
4387 .filter(|m| {
4388 m.equivalence_group.as_deref() == Some("openai-gpt-oss-120b") && !m.deprecated
4389 })
4390 .map(|m| m.tier.clone())
4391 .collect();
4392 assert_eq!(
4393 group_tiers.len(),
4394 1,
4395 "openai-gpt-oss-120b group must share one tier, got {group_tiers:?}"
4396 );
4397 }
4398
4399 #[test]
4400 fn embedded_catalog_every_model_targets_a_registered_provider() {
4401 let config = default_config();
4402 let known: std::collections::BTreeSet<&str> =
4403 config.providers.keys().map(String::as_str).collect();
4404 let orphans: Vec<(&str, &str)> = config
4405 .models
4406 .iter()
4407 .filter(|(_, model)| !known.contains(model.provider.as_str()))
4408 .map(|(id, model)| (id.as_str(), model.provider.as_str()))
4409 .collect();
4410 assert!(
4411 orphans.is_empty(),
4412 "models reference unknown providers: {orphans:?}"
4413 );
4414 }
4415
4416 #[test]
4417 fn embedded_catalog_every_alias_targets_a_registered_provider() {
4418 let config = default_config();
4419 let known: std::collections::BTreeSet<&str> =
4420 config.providers.keys().map(String::as_str).collect();
4421 let orphans: Vec<(&str, &str)> = config
4422 .aliases
4423 .iter()
4424 .filter(|(_, alias)| !known.contains(alias.provider.as_str()))
4425 .map(|(name, alias)| (name.as_str(), alias.provider.as_str()))
4426 .collect();
4427 assert!(
4428 orphans.is_empty(),
4429 "aliases reference unknown providers: {orphans:?}"
4430 );
4431 }
4432
4433 #[test]
4434 fn embedded_catalog_every_qc_default_targets_a_known_model() {
4435 let config = default_config();
4436 let orphans: Vec<(&str, &str)> = config
4437 .qc_defaults
4438 .iter()
4439 .filter(|(_, model_id)| !config.models.contains_key(model_id.as_str()))
4440 .map(|(provider, model_id)| (provider.as_str(), model_id.as_str()))
4441 .collect();
4442 assert!(
4443 orphans.is_empty(),
4444 "qc_defaults reference unknown models: {orphans:?}"
4445 );
4446 }
4447
4448 #[test]
4449 fn embedded_catalog_pricing_rates_are_non_negative() {
4450 let config = default_config();
4451 for (id, model) in &config.models {
4452 let Some(pricing) = &model.pricing else {
4453 continue;
4454 };
4455 assert!(
4456 pricing.input_per_mtok >= 0.0 && pricing.output_per_mtok >= 0.0,
4457 "{id}: negative pricing — in={} out={}",
4458 pricing.input_per_mtok,
4459 pricing.output_per_mtok
4460 );
4461 if let Some(rate) = pricing.cache_read_per_mtok {
4462 assert!(rate >= 0.0, "{id}: negative cache_read rate {rate}");
4463 }
4464 if let Some(rate) = pricing.cache_write_per_mtok {
4465 assert!(rate >= 0.0, "{id}: negative cache_write rate {rate}");
4466 }
4467 }
4468 }
4469
4470 #[test]
4471 fn model_availability_parses_known_strings() {
4472 assert_eq!(
4473 ModelAvailability::parse("serverless"),
4474 Some(ModelAvailability::Serverless)
4475 );
4476 assert_eq!(
4477 ModelAvailability::parse("dedicated"),
4478 Some(ModelAvailability::Dedicated)
4479 );
4480 assert_eq!(
4481 ModelAvailability::parse("unknown"),
4482 Some(ModelAvailability::Unknown)
4483 );
4484 assert_eq!(ModelAvailability::parse("provisioned"), None);
4485 for value in [
4486 ModelAvailability::Serverless,
4487 ModelAvailability::Dedicated,
4488 ModelAvailability::Unknown,
4489 ] {
4490 assert_eq!(ModelAvailability::parse(value.as_str()), Some(value));
4491 }
4492 }
4493
4494 #[test]
4495 fn embedded_catalog_marks_together_dedicated_route_as_dedicated() {
4496 let config = default_config();
4497 let model = config
4498 .models
4499 .get("Qwen/Qwen3-Coder-Next-FP8")
4500 .expect("Together Qwen3 Coder Next FP8 is cataloged");
4501 assert_eq!(model.provider, "together");
4502 assert_eq!(model.availability, ModelAvailability::Dedicated);
4503 }
4504
4505 #[test]
4506 fn embedded_catalog_dedicated_models_are_not_targeted_by_tier_aliases() {
4507 let config = default_config();
4511 let dedicated: std::collections::BTreeSet<(&str, &str)> = config
4512 .models
4513 .iter()
4514 .filter(|(_, model)| model.availability == ModelAvailability::Dedicated)
4515 .map(|(id, model)| (model.provider.as_str(), id.as_str()))
4516 .collect();
4517 for (name, alias) in &config.aliases {
4518 if matches!(
4519 name.as_str(),
4520 "frontier"
4521 | "mid"
4522 | "small"
4523 | "tier/frontier"
4524 | "tier/mid"
4525 | "tier/small"
4526 | "sonnet"
4527 | "opus"
4528 | "haiku"
4529 ) {
4530 assert!(
4531 !dedicated.contains(&(alias.provider.as_str(), alias.id.as_str())),
4532 "tier alias `{name}` targets dedicated-only route `{}/{}`",
4533 alias.provider,
4534 alias.id,
4535 );
4536 }
4537 }
4538 }
4539
4540 #[test]
4541 fn embedded_catalog_tier_aliases_resolve_to_active_models() {
4542 for alias in ["frontier", "mid", "small"] {
4546 let (model, _provider) = resolve_tier_model(alias, None)
4547 .unwrap_or_else(|| panic!("tier alias `{alias}` must resolve"));
4548 let entry = model_catalog_entry(&model).unwrap_or_else(|| {
4549 panic!("tier alias `{alias}` -> `{model}` must be a registered catalog entry")
4550 });
4551 assert!(
4552 !entry.deprecated,
4553 "tier alias `{alias}` resolves to deprecated model `{model}` ({:?})",
4554 entry.deprecation_note
4555 );
4556 }
4557 }
4558
4559 #[test]
4560 fn opus_alias_tracks_claude_opus_4_8_with_fast_mode() {
4561 let (model, provider) = resolve_model("opus");
4564 assert_eq!(model, "claude-opus-4-8");
4565 assert_eq!(provider.as_deref(), Some("anthropic"));
4566
4567 let opus48 = model_catalog_entry("claude-opus-4-8").expect("opus 4.8 catalog entry");
4568 assert!(!opus48.deprecated, "newest Opus must not be deprecated");
4569 let fast = opus48.fast_mode.expect("opus 4.8 advertises fast mode");
4570 assert_eq!(fast.param, "speed");
4571 assert_eq!(fast.value, "fast");
4572 assert_eq!(fast.status.as_deref(), Some("research_preview"));
4573 let fast_pricing = fast.pricing.expect("fast mode carries premium pricing");
4574 let standard = opus48.pricing.expect("opus 4.8 standard pricing");
4575 assert!(
4576 fast_pricing.input_per_mtok > standard.input_per_mtok,
4577 "fast mode must be premium-priced relative to standard"
4578 );
4579 }
4580
4581 #[test]
4582 fn superseded_opus_models_point_at_claude_opus_4_8() {
4583 for model in ["claude-opus-4-7", "claude-opus-4-6"] {
4586 let entry =
4587 model_catalog_entry(model).unwrap_or_else(|| panic!("{model} catalog entry"));
4588 assert!(entry.deprecated, "{model} should be deprecated");
4589 assert_eq!(
4590 entry.superseded_by.as_deref(),
4591 Some("claude-opus-4-8"),
4592 "{model} should be superseded by claude-opus-4-8"
4593 );
4594 }
4595 }
4596
4597 #[test]
4598 fn opus_46_no_longer_advertises_fast_mode() {
4599 let opus46 = model_catalog_entry("claude-opus-4-6").expect("opus 4.6 catalog entry");
4600 assert!(
4601 opus46.fast_mode.is_none(),
4602 "Anthropic removed Opus 4.6 fast mode on 2026-06-29; Harn should not advertise it"
4603 );
4604
4605 let opus47 = model_catalog_entry("claude-opus-4-7").expect("opus 4.7 catalog entry");
4606 assert!(
4607 opus47.fast_mode.is_some(),
4608 "Opus 4.7 still advertises its own fast-mode tier"
4609 );
4610 }
4611
4612 #[test]
4613 fn gpt_5_5_fast_mode_rides_service_tier() {
4614 let entry = model_catalog_entry("gpt-5.5").expect("gpt-5.5 catalog entry");
4617 let fast = entry.fast_mode.expect("gpt-5.5 advertises a fast tier");
4618 assert_eq!(fast.param, "service_tier");
4619 assert_eq!(fast.status.as_deref(), Some("ga"));
4620 }
4621}