1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::{BTreeMap, BTreeSet};
4use std::sync::atomic::{AtomicBool, Ordering};
5use std::sync::{OnceLock, RwLock};
6
7static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
8static CONFIG_PATH: OnceLock<String> = OnceLock::new();
9static RUNTIME_CATALOG_OVERLAY: OnceLock<RwLock<Option<ProvidersConfig>>> = OnceLock::new();
10
11thread_local! {
12 static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
17}
18
19#[derive(Debug, Clone, Deserialize, Default)]
20pub struct ProvidersConfig {
21 #[serde(default)]
22 pub default_provider: Option<String>,
23 #[serde(default)]
24 pub providers: BTreeMap<String, ProviderDef>,
25 #[serde(default)]
26 pub aliases: BTreeMap<String, AliasDef>,
27 #[serde(default)]
28 pub alias_tool_calling: BTreeMap<String, AliasToolCallingDef>,
29 #[serde(default)]
30 pub models: BTreeMap<String, ModelDef>,
31 #[serde(default)]
32 pub qc_defaults: BTreeMap<String, String>,
33 #[serde(default)]
34 pub inference_rules: Vec<InferenceRule>,
35 #[serde(default)]
36 pub tier_rules: Vec<TierRule>,
37 #[serde(default)]
38 pub tier_defaults: TierDefaults,
39 #[serde(default)]
40 pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
41 #[serde(default)]
42 pub model_roles: BTreeMap<String, BTreeMap<String, toml::Value>>,
43 #[serde(default)]
44 pub suppress: SuppressDef,
45}
46
47#[derive(Debug, Clone, Deserialize, Default, PartialEq, Eq)]
60pub struct SuppressDef {
61 #[serde(default)]
66 pub routes: Vec<String>,
67}
68
69impl ProvidersConfig {
70 pub fn is_empty(&self) -> bool {
71 self.default_provider.is_none()
72 && self.providers.is_empty()
73 && self.aliases.is_empty()
74 && self.alias_tool_calling.is_empty()
75 && self.models.is_empty()
76 && self.qc_defaults.is_empty()
77 && self.inference_rules.is_empty()
78 && self.tier_rules.is_empty()
79 && self.model_defaults.is_empty()
80 && self.model_roles.is_empty()
81 && self.suppress.routes.is_empty()
82 && self.tier_defaults.default == default_mid()
83 }
84
85 pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
86 for (name, provider) in &overlay.providers {
87 match self.providers.get_mut(name) {
88 Some(existing) => existing.merge_from(provider),
89 None => {
90 self.providers.insert(name.clone(), provider.clone());
91 }
92 }
93 }
94 self.aliases.extend(overlay.aliases.clone());
95 self.alias_tool_calling
96 .extend(overlay.alias_tool_calling.clone());
97 self.models.extend(overlay.models.clone());
98 self.qc_defaults.extend(overlay.qc_defaults.clone());
99
100 if overlay.default_provider.is_some() {
101 self.default_provider = overlay.default_provider.clone();
102 }
103
104 if !overlay.inference_rules.is_empty() {
105 let mut merged = overlay.inference_rules.clone();
106 merged.extend(self.inference_rules.clone());
107 self.inference_rules = merged;
108 }
109
110 if !overlay.tier_rules.is_empty() {
111 let mut merged = overlay.tier_rules.clone();
112 merged.extend(self.tier_rules.clone());
113 self.tier_rules = merged;
114 }
115
116 if overlay.tier_defaults.default != default_mid() {
117 self.tier_defaults = overlay.tier_defaults.clone();
118 }
119
120 for (pattern, defaults) in &overlay.model_defaults {
121 self.model_defaults
122 .entry(pattern.clone())
123 .or_default()
124 .extend(defaults.clone());
125 }
126
127 for (role, defaults) in &overlay.model_roles {
128 self.model_roles
129 .entry(role.clone())
130 .or_default()
131 .extend(defaults.clone());
132 }
133
134 for route in &overlay.suppress.routes {
135 if !self.suppress.routes.contains(route) {
136 self.suppress.routes.push(route.clone());
137 }
138 }
139 }
140}
141
142#[derive(Debug, Clone)]
143pub struct ProviderDef {
144 pub display_name: Option<String>,
145 pub icon: Option<String>,
146 pub protocol: Option<String>,
150 pub base_url: String,
151 pub base_url_env: Option<String>,
152 pub auth_style: String,
153 pub auth_header: Option<String>,
154 pub auth_env: AuthEnv,
155 pub extra_headers: BTreeMap<String, String>,
156 pub chat_endpoint: String,
157 pub completion_endpoint: Option<String>,
158 pub command: Option<String>,
159 pub args: Vec<String>,
160 pub env: BTreeMap<String, String>,
161 pub cwd: Option<String>,
162 pub mcp_servers: Vec<serde_json::Value>,
163 pub healthcheck: Option<HealthcheckDef>,
164 pub local_runtime: Option<LocalRuntimeDef>,
168 pub features: Vec<String>,
169 pub fallback: Option<String>,
171 pub retry_count: Option<u32>,
173 pub retry_delay_ms: Option<u64>,
175 pub rpm: Option<u32>,
177 pub rate_limits: Option<RateLimitsDef>,
181 pub cost_per_1k_in: Option<f64>,
183 pub cost_per_1k_out: Option<f64>,
185 pub latency_p50_ms: Option<u64>,
187 pub performance: Option<ServingPerformanceDef>,
189 #[doc(hidden)]
190 pub auth_style_explicit: bool,
191}
192
193#[derive(Debug, Clone, Deserialize)]
194struct ProviderDefWire {
195 #[serde(default)]
196 display_name: Option<String>,
197 #[serde(default)]
198 icon: Option<String>,
199 #[serde(default)]
200 protocol: Option<String>,
201 #[serde(default)]
202 base_url: String,
203 #[serde(default)]
204 base_url_env: Option<String>,
205 #[serde(default)]
206 auth_style: Option<String>,
207 #[serde(default)]
208 auth_header: Option<String>,
209 #[serde(default)]
210 auth_env: AuthEnv,
211 #[serde(default)]
212 extra_headers: BTreeMap<String, String>,
213 #[serde(default)]
214 chat_endpoint: String,
215 #[serde(default)]
216 completion_endpoint: Option<String>,
217 #[serde(default)]
218 command: Option<String>,
219 #[serde(default)]
220 args: Vec<String>,
221 #[serde(default)]
222 env: BTreeMap<String, String>,
223 #[serde(default)]
224 cwd: Option<String>,
225 #[serde(default)]
226 mcp_servers: Vec<serde_json::Value>,
227 #[serde(default)]
228 healthcheck: Option<HealthcheckDef>,
229 #[serde(default)]
230 local_runtime: Option<LocalRuntimeDef>,
231 #[serde(default)]
232 features: Vec<String>,
233 #[serde(default)]
234 fallback: Option<String>,
235 #[serde(default)]
236 retry_count: Option<u32>,
237 #[serde(default)]
238 retry_delay_ms: Option<u64>,
239 #[serde(default)]
240 rpm: Option<u32>,
241 #[serde(default)]
242 rate_limits: Option<RateLimitsDef>,
243 #[serde(default)]
244 cost_per_1k_in: Option<f64>,
245 #[serde(default)]
246 cost_per_1k_out: Option<f64>,
247 #[serde(default)]
248 latency_p50_ms: Option<u64>,
249 #[serde(default)]
250 performance: Option<ServingPerformanceDef>,
251}
252
253impl<'de> Deserialize<'de> for ProviderDef {
254 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
255 where
256 D: serde::Deserializer<'de>,
257 {
258 let wire = ProviderDefWire::deserialize(deserializer)?;
259 let auth_style_explicit = wire.auth_style.is_some();
260 Ok(Self {
261 display_name: wire.display_name,
262 icon: wire.icon,
263 protocol: wire.protocol,
264 base_url: wire.base_url,
265 base_url_env: wire.base_url_env,
266 auth_style: wire.auth_style.unwrap_or_else(default_bearer),
267 auth_header: wire.auth_header,
268 auth_env: wire.auth_env,
269 extra_headers: wire.extra_headers,
270 chat_endpoint: wire.chat_endpoint,
271 completion_endpoint: wire.completion_endpoint,
272 command: wire.command,
273 args: wire.args,
274 env: wire.env,
275 cwd: wire.cwd,
276 mcp_servers: wire.mcp_servers,
277 healthcheck: wire.healthcheck,
278 local_runtime: wire.local_runtime,
279 features: wire.features,
280 fallback: wire.fallback,
281 retry_count: wire.retry_count,
282 retry_delay_ms: wire.retry_delay_ms,
283 rpm: wire.rpm,
284 rate_limits: wire.rate_limits,
285 cost_per_1k_in: wire.cost_per_1k_in,
286 cost_per_1k_out: wire.cost_per_1k_out,
287 latency_p50_ms: wire.latency_p50_ms,
288 performance: wire.performance,
289 auth_style_explicit,
290 })
291 }
292}
293
294impl Default for ProviderDef {
295 fn default() -> Self {
296 Self {
297 display_name: None,
298 icon: None,
299 protocol: None,
300 base_url: String::new(),
301 base_url_env: None,
302 auth_style: default_bearer(),
303 auth_header: None,
304 auth_env: AuthEnv::None,
305 extra_headers: BTreeMap::new(),
306 chat_endpoint: String::new(),
307 completion_endpoint: None,
308 command: None,
309 args: Vec::new(),
310 env: BTreeMap::new(),
311 cwd: None,
312 mcp_servers: Vec::new(),
313 healthcheck: None,
314 local_runtime: None,
315 features: Vec::new(),
316 fallback: None,
317 retry_count: None,
318 retry_delay_ms: None,
319 rpm: None,
320 rate_limits: None,
321 cost_per_1k_in: None,
322 cost_per_1k_out: None,
323 latency_p50_ms: None,
324 performance: None,
325 auth_style_explicit: false,
326 }
327 }
328}
329
330impl ProviderDef {
331 fn merge_from(&mut self, overlay: &ProviderDef) {
332 merge_option(&mut self.display_name, &overlay.display_name);
333 merge_option(&mut self.icon, &overlay.icon);
334 merge_option(&mut self.protocol, &overlay.protocol);
335 merge_string(&mut self.base_url, &overlay.base_url);
336 merge_option(&mut self.base_url_env, &overlay.base_url_env);
337 let overlay_uses_default_auth_style = overlay.auth_style == default_bearer();
338 if overlay.auth_style_explicit
339 || !overlay_uses_default_auth_style
340 || self.auth_style == default_bearer()
341 {
342 self.auth_style = overlay.auth_style.clone();
343 self.auth_style_explicit |=
344 overlay.auth_style_explicit || !overlay_uses_default_auth_style;
345 }
346 merge_option(&mut self.auth_header, &overlay.auth_header);
347 if !overlay.auth_env.is_none() {
348 self.auth_env = overlay.auth_env.clone();
349 }
350 self.extra_headers.extend(overlay.extra_headers.clone());
351 merge_string(&mut self.chat_endpoint, &overlay.chat_endpoint);
352 merge_option(&mut self.completion_endpoint, &overlay.completion_endpoint);
353 merge_option(&mut self.command, &overlay.command);
354 merge_vec(&mut self.args, &overlay.args);
355 self.env.extend(overlay.env.clone());
356 merge_option(&mut self.cwd, &overlay.cwd);
357 merge_vec(&mut self.mcp_servers, &overlay.mcp_servers);
358 merge_option(&mut self.healthcheck, &overlay.healthcheck);
359 merge_option(&mut self.local_runtime, &overlay.local_runtime);
360 merge_vec(&mut self.features, &overlay.features);
361 merge_option(&mut self.fallback, &overlay.fallback);
362 merge_option(&mut self.retry_count, &overlay.retry_count);
363 merge_option(&mut self.retry_delay_ms, &overlay.retry_delay_ms);
364 merge_option(&mut self.rpm, &overlay.rpm);
365 merge_option(&mut self.rate_limits, &overlay.rate_limits);
366 merge_option(&mut self.cost_per_1k_in, &overlay.cost_per_1k_in);
367 merge_option(&mut self.cost_per_1k_out, &overlay.cost_per_1k_out);
368 merge_option(&mut self.latency_p50_ms, &overlay.latency_p50_ms);
369 merge_option(&mut self.performance, &overlay.performance);
370 }
371}
372
373fn merge_option<T: Clone>(base: &mut Option<T>, overlay: &Option<T>) {
374 if overlay.is_some() {
375 *base = overlay.clone();
376 }
377}
378
379fn merge_string(base: &mut String, overlay: &str) {
380 if !overlay.is_empty() {
381 *base = overlay.to_string();
382 }
383}
384
385fn merge_vec<T: Clone>(base: &mut Vec<T>, overlay: &[T]) {
386 if !overlay.is_empty() {
387 *base = overlay.to_vec();
388 }
389}
390
391fn default_bearer() -> String {
392 "bearer".to_string()
393}
394
395#[derive(Debug, Clone, Deserialize, Default)]
398#[serde(untagged)]
399pub enum AuthEnv {
400 #[default]
401 None,
402 Single(String),
403 Multiple(Vec<String>),
404}
405
406impl AuthEnv {
407 fn is_none(&self) -> bool {
408 matches!(self, AuthEnv::None)
409 }
410}
411
412#[derive(Debug, Clone, Deserialize)]
413pub struct HealthcheckDef {
414 pub method: String,
415 #[serde(default)]
416 pub path: Option<String>,
417 #[serde(default)]
418 pub url: Option<String>,
419 #[serde(default)]
420 pub body: Option<String>,
421}
422
423#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
424pub struct LocalRuntimeDef {
425 #[serde(default, skip_serializing_if = "Option::is_none")]
428 pub kind: Option<String>,
429 #[serde(default, skip_serializing_if = "Option::is_none")]
431 pub command: Option<String>,
432 #[serde(default, skip_serializing_if = "Vec::is_empty")]
435 pub prefix_args: Vec<String>,
436 #[serde(default, skip_serializing_if = "Option::is_none")]
439 pub model_source: Option<String>,
440 #[serde(default, skip_serializing_if = "Option::is_none")]
442 pub model_source_env: Option<String>,
443 #[serde(default, skip_serializing_if = "Option::is_none")]
445 pub default_port: Option<u16>,
446 #[serde(default, skip_serializing_if = "Option::is_none")]
448 pub model_arg: Option<String>,
449 #[serde(default, skip_serializing_if = "Option::is_none")]
450 pub served_model_arg: Option<String>,
451 #[serde(default, skip_serializing_if = "Option::is_none")]
452 pub host_arg: Option<String>,
453 #[serde(default, skip_serializing_if = "Option::is_none")]
454 pub port_arg: Option<String>,
455 #[serde(default, skip_serializing_if = "Option::is_none")]
456 pub ctx_arg: Option<String>,
457 #[serde(default, skip_serializing_if = "Option::is_none")]
458 pub parallel_arg: Option<String>,
459 #[serde(default, skip_serializing_if = "Option::is_none")]
460 pub gpu_layers_arg: Option<String>,
461 #[serde(default, skip_serializing_if = "Option::is_none")]
462 pub cache_type_k_arg: Option<String>,
463 #[serde(default, skip_serializing_if = "Option::is_none")]
464 pub cache_type_v_arg: Option<String>,
465 #[serde(default, skip_serializing_if = "Option::is_none")]
466 pub cache_ram_arg: Option<String>,
467 #[serde(default, skip_serializing_if = "Option::is_none")]
469 pub enable_lora_arg: Option<String>,
470 #[serde(default, skip_serializing_if = "Option::is_none")]
472 pub lora_modules_arg: Option<String>,
473 #[serde(default, skip_serializing_if = "Option::is_none")]
475 pub max_lora_rank_arg: Option<String>,
476 #[serde(default, skip_serializing_if = "Vec::is_empty")]
478 pub default_args: Vec<String>,
479 #[serde(default, skip_serializing_if = "Option::is_none")]
481 pub stop: Option<String>,
482 #[serde(default, skip_serializing_if = "Option::is_none")]
484 pub source_url: Option<String>,
485 #[serde(default, skip_serializing_if = "Option::is_none")]
487 pub last_verified: Option<String>,
488 #[serde(default, skip_serializing_if = "Option::is_none")]
490 pub notes: Option<String>,
491}
492
493#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
494pub struct LocalMemoryDef {
495 #[serde(default, skip_serializing_if = "Option::is_none")]
497 pub measured_resident_gib: Option<f64>,
498 #[serde(default, skip_serializing_if = "Option::is_none")]
500 pub measured_context_window: Option<u64>,
501 #[serde(default, skip_serializing_if = "Option::is_none")]
503 pub measured_cache_type: Option<String>,
504 #[serde(default, skip_serializing_if = "Option::is_none")]
506 pub base_resident_gib: Option<f64>,
507 #[serde(default, skip_serializing_if = "Option::is_none")]
510 pub kv_cache_gib_per_1k_ctx: Option<f64>,
511 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
513 pub cache_type_multipliers: BTreeMap<String, f64>,
514 #[serde(default, skip_serializing_if = "Option::is_none")]
516 pub default_cache_type: Option<String>,
517 #[serde(default, skip_serializing_if = "Option::is_none")]
519 pub safety_margin_gib: Option<f64>,
520 #[serde(default, skip_serializing_if = "Option::is_none")]
522 pub max_recommended_context: Option<u64>,
523 #[serde(default, skip_serializing_if = "Option::is_none")]
525 pub source_url: Option<String>,
526 #[serde(default, skip_serializing_if = "Option::is_none")]
528 pub last_verified: Option<String>,
529 #[serde(default, skip_serializing_if = "Option::is_none")]
531 pub notes: Option<String>,
532}
533
534impl LocalMemoryDef {
535 pub fn is_empty(&self) -> bool {
536 self.measured_resident_gib.is_none()
537 && self.measured_context_window.is_none()
538 && self.measured_cache_type.is_none()
539 && self.base_resident_gib.is_none()
540 && self.kv_cache_gib_per_1k_ctx.is_none()
541 && self.cache_type_multipliers.is_empty()
542 && self.default_cache_type.is_none()
543 && self.safety_margin_gib.is_none()
544 && self.max_recommended_context.is_none()
545 && self.source_url.is_none()
546 && self.last_verified.is_none()
547 && self.notes.is_none()
548 }
549}
550
551#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
552pub struct AliasDef {
553 pub id: String,
554 pub provider: String,
555 #[serde(default)]
560 pub tool_format: Option<String>,
561}
562
563#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
564pub struct AliasToolCallingDef {
565 #[serde(default)]
566 #[serde(skip_serializing_if = "Option::is_none")]
567 pub native: Option<String>,
568 #[serde(default)]
569 #[serde(skip_serializing_if = "Option::is_none")]
570 pub text: Option<String>,
571 #[serde(default)]
572 #[serde(skip_serializing_if = "Option::is_none")]
573 pub streaming_native: Option<String>,
574 #[serde(default)]
575 #[serde(skip_serializing_if = "Option::is_none")]
576 pub fallback_mode: Option<String>,
577 #[serde(default)]
578 #[serde(skip_serializing_if = "Option::is_none")]
579 pub failure_reason: Option<String>,
580 #[serde(default)]
581 #[serde(skip_serializing_if = "Option::is_none")]
582 pub last_probe_at: Option<String>,
583}
584
585#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
586pub struct ModelPricing {
587 pub input_per_mtok: f64,
588 pub output_per_mtok: f64,
589 #[serde(default)]
590 pub cache_read_per_mtok: Option<f64>,
591 #[serde(default)]
592 pub cache_write_per_mtok: Option<f64>,
593}
594
595#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
598pub struct RateLimitsDef {
599 #[serde(default, skip_serializing_if = "Option::is_none")]
601 pub rpm: Option<u32>,
602 #[serde(default, skip_serializing_if = "Option::is_none")]
604 pub rph: Option<u32>,
605 #[serde(default, skip_serializing_if = "Option::is_none")]
607 pub rpd: Option<u32>,
608 #[serde(default, skip_serializing_if = "Option::is_none")]
610 pub tpm: Option<u64>,
611 #[serde(default, skip_serializing_if = "Option::is_none")]
613 pub tph: Option<u64>,
614 #[serde(default, skip_serializing_if = "Option::is_none")]
616 pub tpd: Option<u64>,
617 #[serde(default, skip_serializing_if = "Option::is_none")]
619 pub input_tpm: Option<u64>,
620 #[serde(default, skip_serializing_if = "Option::is_none")]
622 pub output_tpm: Option<u64>,
623 #[serde(default, skip_serializing_if = "Option::is_none")]
625 pub concurrency: Option<u32>,
626 #[serde(default, skip_serializing_if = "Option::is_none")]
628 pub tier: Option<String>,
629 #[serde(default, skip_serializing_if = "Option::is_none")]
631 pub source_url: Option<String>,
632 #[serde(default, skip_serializing_if = "Option::is_none")]
634 pub last_verified: Option<String>,
635 #[serde(default, skip_serializing_if = "Option::is_none")]
637 pub notes: Option<String>,
638}
639
640impl RateLimitsDef {
641 pub fn is_empty(&self) -> bool {
642 self.rpm.is_none()
643 && self.rph.is_none()
644 && self.rpd.is_none()
645 && self.tpm.is_none()
646 && self.tph.is_none()
647 && self.tpd.is_none()
648 && self.input_tpm.is_none()
649 && self.output_tpm.is_none()
650 && self.concurrency.is_none()
651 && self.tier.is_none()
652 && self.source_url.is_none()
653 && self.last_verified.is_none()
654 && self.notes.is_none()
655 }
656
657 pub fn with_rpm_fallback(mut self, rpm: Option<u32>) -> Option<Self> {
658 if self.rpm.is_none() {
659 self.rpm = rpm;
660 }
661 (!self.is_empty()).then_some(self)
662 }
663}
664
665#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
669pub struct ServingPerformanceDef {
670 #[serde(default, skip_serializing_if = "Option::is_none")]
672 pub observed_ttft_ms: Option<u64>,
673 #[serde(default, skip_serializing_if = "Option::is_none")]
675 pub output_tokens_per_sec: Option<f64>,
676 #[serde(default, skip_serializing_if = "Option::is_none")]
679 pub time_to_answer_s: Option<f64>,
680 #[serde(default, skip_serializing_if = "Option::is_none")]
683 pub source: Option<String>,
684 #[serde(default, skip_serializing_if = "Option::is_none")]
686 pub source_url: Option<String>,
687 #[serde(default, skip_serializing_if = "Option::is_none")]
689 pub last_verified: Option<String>,
690 #[serde(default, skip_serializing_if = "Option::is_none")]
692 pub sample_size: Option<u32>,
693 #[serde(default, skip_serializing_if = "Option::is_none")]
695 pub notes: Option<String>,
696}
697
698impl ServingPerformanceDef {
699 pub fn is_empty(&self) -> bool {
700 self.observed_ttft_ms.is_none()
701 && self.output_tokens_per_sec.is_none()
702 && self.time_to_answer_s.is_none()
703 && self.source.is_none()
704 && self.source_url.is_none()
705 && self.last_verified.is_none()
706 && self.sample_size.is_none()
707 && self.notes.is_none()
708 }
709}
710
711#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
715pub struct ModelArchitectureDef {
716 #[serde(default, skip_serializing_if = "Option::is_none")]
718 pub parameter_count_b: Option<f64>,
719 #[serde(default, skip_serializing_if = "Option::is_none")]
721 pub active_parameter_count_b: Option<f64>,
722 #[serde(default, skip_serializing_if = "Option::is_none")]
724 pub moe: Option<bool>,
725 #[serde(default, skip_serializing_if = "Option::is_none")]
727 pub quantization: Option<String>,
728 #[serde(default, skip_serializing_if = "Option::is_none")]
730 pub precision: Option<String>,
731 #[serde(default, skip_serializing_if = "Option::is_none")]
733 pub license: Option<String>,
734 #[serde(default, skip_serializing_if = "Option::is_none")]
736 pub tokenizer: Option<String>,
737 #[serde(default, skip_serializing_if = "Option::is_none")]
739 pub knowledge_cutoff: Option<String>,
740 #[serde(default, skip_serializing_if = "Option::is_none")]
742 pub source_url: Option<String>,
743 #[serde(default, skip_serializing_if = "Option::is_none")]
745 pub last_verified: Option<String>,
746}
747
748impl ModelArchitectureDef {
749 pub fn is_empty(&self) -> bool {
750 self.parameter_count_b.is_none()
751 && self.active_parameter_count_b.is_none()
752 && self.moe.is_none()
753 && self.quantization.is_none()
754 && self.precision.is_none()
755 && self.license.is_none()
756 && self.tokenizer.is_none()
757 && self.knowledge_cutoff.is_none()
758 && self.source_url.is_none()
759 && self.last_verified.is_none()
760 }
761}
762
763#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
774pub struct FastModeDef {
775 pub param: String,
778 pub value: String,
780 #[serde(default)]
783 pub beta_header: Option<String>,
784 #[serde(default)]
786 pub otps_speedup: Option<f64>,
787 #[serde(default)]
790 pub status: Option<String>,
791 #[serde(default)]
794 pub pricing: Option<ModelPricing>,
795 #[serde(default)]
797 pub note: Option<String>,
798}
799
800#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
801pub struct ModelDef {
802 pub name: String,
803 pub provider: String,
804 pub context_window: u64,
805 #[serde(default)]
808 pub logical_model: Option<String>,
809 #[serde(default)]
813 pub equivalence_group: Option<String>,
814 #[serde(default)]
817 pub served_variant: Option<String>,
818 #[serde(default)]
822 pub wire_model: Option<String>,
823 #[serde(default)]
826 pub api_dialect: Option<String>,
827 #[serde(default)]
829 pub rate_limits: Option<RateLimitsDef>,
830 #[serde(default)]
832 pub performance: Option<ServingPerformanceDef>,
833 #[serde(default)]
835 pub architecture: Option<ModelArchitectureDef>,
836 #[serde(default)]
838 pub local_memory: Option<LocalMemoryDef>,
839 #[serde(default)]
840 pub runtime_context_window: Option<u64>,
841 #[serde(default)]
842 pub stream_timeout: Option<f64>,
843 #[serde(default)]
844 pub capabilities: Vec<String>,
845 #[serde(default)]
846 pub pricing: Option<ModelPricing>,
847 #[serde(default)]
848 pub deprecated: bool,
849 #[serde(default)]
850 pub deprecation_note: Option<String>,
851 #[serde(default)]
859 pub superseded_by: Option<String>,
860 #[serde(default)]
864 pub fast_mode: Option<FastModeDef>,
865 #[serde(default)]
866 pub quality_tags: Vec<String>,
867 #[serde(default)]
873 pub availability: ModelAvailability,
874 #[serde(default)]
881 pub tier: Option<String>,
882 #[serde(default)]
887 pub open_weight: Option<bool>,
888 #[serde(default)]
893 pub strengths: Vec<String>,
894 #[serde(default)]
900 pub benchmarks: BTreeMap<String, f64>,
901 #[serde(default)]
906 pub family: Option<String>,
907 #[serde(default)]
909 pub lineage: Option<String>,
910 #[serde(default)]
912 pub complementary_with: Vec<String>,
913 #[serde(default)]
916 pub avoid_as_reviewer_for: Vec<String>,
917}
918
919#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Default)]
920#[serde(rename_all = "snake_case")]
921pub enum ModelAvailability {
922 #[default]
926 Serverless,
927 Dedicated,
931 Unknown,
935}
936
937impl ModelAvailability {
938 pub fn as_str(self) -> &'static str {
939 match self {
940 Self::Serverless => "serverless",
941 Self::Dedicated => "dedicated",
942 Self::Unknown => "unknown",
943 }
944 }
945
946 pub fn parse(value: &str) -> Option<Self> {
947 match value {
948 "serverless" => Some(Self::Serverless),
949 "dedicated" => Some(Self::Dedicated),
950 "unknown" => Some(Self::Unknown),
951 _ => None,
952 }
953 }
954}
955
956#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
957pub struct ResolvedModel {
958 pub id: String,
959 pub provider: String,
960 pub alias: Option<String>,
961 pub tool_format: String,
962 pub tier: String,
963 pub family: String,
964 pub lineage: String,
965}
966
967#[derive(Debug, Clone, PartialEq)]
968pub struct ComplementaryReviewerOptions {
969 pub author_model: String,
970 pub author_provider: Option<String>,
971 pub intent: ComplementaryReviewerIntent,
972 pub max_price_multiplier: Option<f64>,
973}
974
975#[derive(Debug, Clone, Copy, PartialEq, Eq)]
976pub enum ComplementaryReviewerIntent {
977 Review,
978 Critique,
979 PlanReview,
980}
981
982impl ComplementaryReviewerIntent {
983 pub fn parse(value: &str) -> Option<Self> {
984 match value {
985 "review" => Some(Self::Review),
986 "critique" => Some(Self::Critique),
987 "plan_review" => Some(Self::PlanReview),
988 _ => None,
989 }
990 }
991
992 pub fn as_str(self) -> &'static str {
993 match self {
994 Self::Review => "review",
995 Self::Critique => "critique",
996 Self::PlanReview => "plan_review",
997 }
998 }
999}
1000
1001#[derive(Debug, Clone, Serialize, PartialEq)]
1002pub struct ComplementaryReviewerSelection {
1003 pub intent: String,
1004 pub author: ComplementaryModelIdentity,
1005 pub reviewer: ComplementaryModelIdentity,
1006 pub fallback: bool,
1007 pub fallback_reason: Option<String>,
1008 #[serde(skip_serializing_if = "Option::is_none")]
1014 pub fallback_code: Option<String>,
1015 pub reason: String,
1016 pub estimated_incremental_cost: Option<ComplementaryCostEstimate>,
1017}
1018
1019#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1024pub enum ReviewerFallbackCode {
1025 UnknownAuthorFamily,
1028 NoDiffFamilyWithinPrice,
1030 NoDiffFamilyServerless,
1032 AllDiffFamilyExcluded,
1035}
1036
1037impl ReviewerFallbackCode {
1038 pub fn as_code(self) -> &'static str {
1039 match self {
1040 Self::UnknownAuthorFamily => "unknown_author_family",
1041 Self::NoDiffFamilyWithinPrice => "no_diff_family_within_price",
1042 Self::NoDiffFamilyServerless => "no_diff_family_serverless",
1043 Self::AllDiffFamilyExcluded => "all_diff_family_excluded",
1044 }
1045 }
1046}
1047
1048#[derive(Debug, Clone, Serialize, PartialEq)]
1049pub struct ComplementaryModelIdentity {
1050 pub id: String,
1051 pub provider: String,
1052 pub family: String,
1053 pub lineage: String,
1054 pub tier: String,
1055 #[serde(skip_serializing_if = "Option::is_none")]
1056 pub pricing: Option<ModelPricing>,
1057}
1058
1059#[derive(Debug, Clone, Serialize, PartialEq)]
1060pub struct ComplementaryCostEstimate {
1061 pub input_per_mtok: f64,
1062 pub output_per_mtok: f64,
1063 pub total_per_mtok: f64,
1064 #[serde(skip_serializing_if = "Option::is_none")]
1065 pub multiplier_vs_author: Option<f64>,
1066}
1067
1068#[derive(Debug, Clone, Deserialize)]
1069pub struct InferenceRule {
1070 #[serde(default)]
1071 pub pattern: Option<String>,
1072 #[serde(default)]
1073 pub contains: Option<String>,
1074 #[serde(default)]
1075 pub exact: Option<String>,
1076 pub provider: String,
1077}
1078
1079#[derive(Debug, Clone, Deserialize)]
1080pub struct TierRule {
1081 #[serde(default)]
1082 pub pattern: Option<String>,
1083 #[serde(default)]
1084 pub contains: Option<String>,
1085 #[serde(default)]
1086 pub exact: Option<String>,
1087 pub tier: String,
1088}
1089
1090#[derive(Debug, Clone, Deserialize)]
1091pub struct TierDefaults {
1092 #[serde(default = "default_mid")]
1093 pub default: String,
1094}
1095
1096impl Default for TierDefaults {
1097 fn default() -> Self {
1098 Self {
1099 default: default_mid(),
1100 }
1101 }
1102}
1103
1104fn default_mid() -> String {
1105 "mid".to_string()
1106}
1107
1108pub fn load_config() -> &'static ProvidersConfig {
1110 CONFIG.get_or_init(|| {
1111 let mut config = default_config();
1112 let verbose_config_logging = matches!(
1113 std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
1114 Some("1" | "true" | "TRUE" | "yes" | "YES")
1115 ) || matches!(
1116 std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
1117 Some("1" | "true" | "TRUE" | "yes" | "YES")
1118 );
1119 if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
1120 if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
1121 config.merge_from(&overlay);
1122 let _ = CONFIG_PATH.set(path);
1123 return config;
1124 }
1125 }
1126 if should_load_home_config() {
1127 if let Some(home) = dirs_or_home() {
1128 let path = format!("{home}/.config/harn/providers.toml");
1129 if let Some(overlay) = read_external_config(&path, false) {
1130 config.merge_from(&overlay);
1131 let _ = CONFIG_PATH.set(path);
1132 return config;
1133 }
1134 }
1135 }
1136 config
1137 })
1138}
1139
1140fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
1141 match std::fs::read_to_string(path) {
1142 Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
1143 Ok(config) => {
1144 if verbose {
1145 eprintln!(
1146 "[llm_config] Loaded {} providers, {} aliases from {}",
1147 config.providers.len(),
1148 config.aliases.len(),
1149 path
1150 );
1151 }
1152 Some(config)
1153 }
1154 Err(error) => {
1155 eprintln!("[llm_config] TOML parse error in {path}: {error}");
1156 None
1157 }
1158 },
1159 Err(error) => {
1160 if verbose {
1161 eprintln!("[llm_config] Cannot read {path}: {error}");
1162 }
1163 None
1164 }
1165 }
1166}
1167
1168fn should_load_home_config() -> bool {
1169 !cfg!(test)
1172}
1173
1174pub fn parse_config_toml(src: &str) -> Result<ProvidersConfig, toml::de::Error> {
1177 toml::from_str::<ProvidersConfig>(src)
1178}
1179
1180pub fn loaded_config_path() -> Option<std::path::PathBuf> {
1183 let _ = load_config();
1185 CONFIG_PATH.get().map(std::path::PathBuf::from)
1186}
1187
1188pub fn set_user_overrides(config: Option<ProvidersConfig>) {
1192 USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
1193}
1194
1195pub fn clear_user_overrides() {
1197 set_user_overrides(None);
1198}
1199
1200pub fn set_runtime_catalog_overlay(config: Option<ProvidersConfig>) {
1205 *runtime_catalog_overlay()
1206 .write()
1207 .expect("runtime catalog overlay poisoned") = config;
1208}
1209
1210pub fn clear_runtime_catalog_overlay() {
1211 set_runtime_catalog_overlay(None);
1212}
1213
1214pub(crate) fn effective_config() -> ProvidersConfig {
1215 let user_overrides = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1216 effective_config_with_user_overrides(user_overrides.as_ref())
1217}
1218
1219pub fn embedded_config(explicit_overlay: Option<&ProvidersConfig>) -> ProvidersConfig {
1237 let mut config = default_config();
1238 if let Some(overlay) = explicit_overlay {
1239 config.merge_from(overlay);
1240 }
1241 config
1242}
1243
1244pub(crate) fn effective_config_with_user_overrides(
1245 user_overrides: Option<&ProvidersConfig>,
1246) -> ProvidersConfig {
1247 let mut merged = load_config().clone();
1248 if let Some(overlay) = runtime_catalog_overlay()
1249 .read()
1250 .expect("runtime catalog overlay poisoned")
1251 .as_ref()
1252 {
1253 merged.merge_from(overlay);
1254 }
1255 if let Some(overlay) = user_overrides {
1256 merged.merge_from(overlay);
1257 }
1258 merged
1259}
1260
1261fn runtime_catalog_overlay() -> &'static RwLock<Option<ProvidersConfig>> {
1262 RUNTIME_CATALOG_OVERLAY.get_or_init(|| RwLock::new(None))
1263}
1264
1265pub fn resolve_model(alias: &str) -> (String, Option<String>) {
1267 let config = effective_config();
1268 if let Some(a) = config.aliases.get(alias) {
1269 return (a.id.clone(), Some(a.provider.clone()));
1270 }
1271 (normalize_model_id(alias), None)
1272}
1273
1274pub fn normalize_model_id(raw: &str) -> String {
1281 for prefix in PROVIDER_SELECTOR_PREFIXES {
1282 if let Some(stripped) = raw.strip_prefix(prefix) {
1283 return stripped.to_string();
1284 }
1285 }
1286 raw.to_string()
1287}
1288
1289const PROVIDER_SELECTOR_PREFIXES: &[&str] =
1290 &["ollama:", "local:", "huggingface:", "hf:", "cerebras/"];
1291
1292pub fn resolve_model_info(selector: &str) -> ResolvedModel {
1295 let config = effective_config();
1296 if let Some(alias) = config.aliases.get(selector) {
1297 let id = alias.id.clone();
1298 let provider = alias.provider.clone();
1299 let requested = alias
1300 .tool_format
1301 .clone()
1302 .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
1303 let tool_format = guard_tool_format(&provider, &id, &requested, Some(selector));
1304 return ResolvedModel {
1305 tier: model_tier_with_config(&config, &id),
1306 family: model_family_with_config(&config, &provider, &id),
1307 lineage: model_lineage_with_config(&config, &provider, &id),
1308 id,
1309 provider,
1310 alias: Some(selector.to_string()),
1311 tool_format,
1312 };
1313 }
1314
1315 let id = normalize_model_id(selector);
1316 let inference = infer_provider_with_config(&config, selector);
1317 let source = inference.source;
1318 let provider = inference.provider;
1319 let requested = default_tool_format_with_config(&config, &id, &provider);
1320 let tool_format = guard_tool_format(&provider, &id, &requested, None);
1321 let tier = model_tier_with_config(&config, &id);
1322 let family = model_family_with_inference_source(&config, &provider, &id, source);
1323 let lineage = model_lineage_with_inference_source(&config, &provider, &id, source);
1324 ResolvedModel {
1325 id,
1326 provider,
1327 alias: None,
1328 tool_format,
1329 tier,
1330 family,
1331 lineage,
1332 }
1333}
1334
1335fn guard_tool_format(provider: &str, model: &str, requested: &str, alias: Option<&str>) -> String {
1342 let decision = crate::llm::capabilities::validate_tool_format(provider, model, requested);
1343 if let Some(reason) = &decision.correction {
1344 tracing::warn!(
1345 target: "harn::llm::tool_format",
1346 alias = alias.unwrap_or(""),
1347 "{reason}"
1348 );
1349 }
1350 decision.effective
1351}
1352
1353pub fn infer_provider(model_id: &str) -> String {
1355 infer_provider_detail(model_id).provider
1356}
1357
1358pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
1360 let config = effective_config();
1361 infer_provider_with_config(&config, model_id)
1362}
1363
1364fn infer_provider_with_config(
1365 config: &ProvidersConfig,
1366 model_id: &str,
1367) -> crate::llm::provider::ProviderInference {
1368 if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
1369 return crate::llm::provider::ProviderInference::builtin("ollama");
1370 }
1371 if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
1372 return crate::llm::provider::ProviderInference::builtin("huggingface");
1373 }
1374 let normalized_id = normalize_model_id(model_id);
1380 if let Some(model) = config
1381 .models
1382 .get(model_id)
1383 .or_else(|| config.models.get(&normalized_id))
1384 {
1385 return crate::llm::provider::ProviderInference::builtin(model.provider.clone());
1386 }
1387 for rule in &config.inference_rules {
1388 if let Some(exact) = &rule.exact {
1389 if model_id == exact {
1390 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1391 }
1392 }
1393 if let Some(pattern) = &rule.pattern {
1394 if glob_match(pattern, model_id) {
1395 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1396 }
1397 }
1398 if let Some(substr) = &rule.contains {
1399 if model_id.contains(substr.as_str()) {
1400 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1401 }
1402 }
1403 }
1404 crate::llm::provider::infer_provider_from_model_id(
1405 model_id,
1406 &default_provider_with_config(config),
1407 )
1408}
1409
1410pub fn default_provider() -> String {
1411 let config = effective_config();
1412 default_provider_with_config(&config)
1413}
1414
1415fn default_provider_with_config(config: &ProvidersConfig) -> String {
1416 std::env::var("HARN_DEFAULT_PROVIDER")
1417 .ok()
1418 .map(|value| value.trim().to_string())
1419 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1420 .or_else(|| {
1421 config
1422 .default_provider
1423 .as_deref()
1424 .map(str::trim)
1425 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1426 .map(str::to_string)
1427 })
1428 .unwrap_or_else(|| auto_select_provider(config))
1429}
1430
1431const FALLBACK_PROVIDER: &str = "anthropic";
1437
1438static AUTO_PROVIDER_WARNED: AtomicBool = AtomicBool::new(false);
1439
1440fn provider_has_credentials(def: &ProviderDef) -> bool {
1442 auth_env_names(&def.auth_env)
1443 .iter()
1444 .any(|name| std::env::var(name).is_ok_and(|value| !value.trim().is_empty()))
1445}
1446
1447fn provider_is_local(def: &ProviderDef) -> bool {
1450 def.local_runtime.is_some() || matches!(def.auth_env, AuthEnv::None)
1451}
1452
1453fn warn_auto_provider_once(message: &str) {
1455 if !AUTO_PROVIDER_WARNED.swap(true, Ordering::Relaxed) {
1456 crate::events::log_warn("llm_config", message);
1457 }
1458}
1459
1460fn auto_select_provider(config: &ProvidersConfig) -> String {
1466 const PREFERRED: &[&str] = &[
1469 "anthropic",
1470 "openai",
1471 "google",
1472 "azure-openai",
1473 "groq",
1474 "mistral",
1475 "deepseek",
1476 "xai",
1477 "openrouter",
1478 ];
1479 for name in PREFERRED {
1480 if config
1481 .providers
1482 .get(*name)
1483 .is_some_and(provider_has_credentials)
1484 {
1485 if *name != FALLBACK_PROVIDER {
1486 warn_auto_provider_once(&format!(
1487 "no default provider configured; using '{name}' (its API key is set). \
1488 Set HARN_DEFAULT_PROVIDER or `default_provider` to silence this."
1489 ));
1490 }
1491 return (*name).to_string();
1492 }
1493 }
1494 for (name, def) in &config.providers {
1495 if provider_has_credentials(def) {
1496 warn_auto_provider_once(&format!(
1497 "no default provider configured; using '{name}' (its API key is set). \
1498 Set HARN_DEFAULT_PROVIDER or `default_provider` to silence this."
1499 ));
1500 return name.clone();
1501 }
1502 }
1503 for (name, def) in &config.providers {
1505 if provider_is_local(def) {
1506 warn_auto_provider_once(&format!(
1507 "no provider API keys found; using local provider '{name}'. \
1508 Set an API key + HARN_DEFAULT_PROVIDER to use a cloud provider."
1509 ));
1510 return name.clone();
1511 }
1512 }
1513 warn_auto_provider_once(&format!(
1515 "no LLM provider configured and no API keys detected; defaulting to \
1516 '{FALLBACK_PROVIDER}'. Set ANTHROPIC_API_KEY (or another provider's key plus \
1517 HARN_DEFAULT_PROVIDER), or run a local model with `harn local launch`."
1518 ));
1519 FALLBACK_PROVIDER.to_string()
1520}
1521
1522pub fn model_tier(model_id: &str) -> String {
1524 let config = effective_config();
1525 model_tier_with_config(&config, model_id)
1526}
1527
1528pub(crate) fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
1529 if let Some(model) = config.models.get(model_id) {
1531 if let Some(tier) = model.tier.as_deref() {
1532 let trimmed = tier.trim();
1533 if !trimmed.is_empty() {
1534 return trimmed.to_string();
1535 }
1536 }
1537 }
1538 for rule in &config.tier_rules {
1542 if let Some(exact) = &rule.exact {
1543 if model_id == exact {
1544 return rule.tier.clone();
1545 }
1546 }
1547 if let Some(pattern) = &rule.pattern {
1548 if glob_match(pattern, model_id) {
1549 return rule.tier.clone();
1550 }
1551 }
1552 if let Some(substr) = &rule.contains {
1553 if model_id.contains(substr.as_str()) {
1554 return rule.tier.clone();
1555 }
1556 }
1557 }
1558 config.tier_defaults.default.clone()
1559}
1560
1561pub fn model_family(provider: &str, model_id: &str) -> String {
1563 let config = effective_config();
1564 model_family_with_config(&config, provider, model_id)
1565}
1566
1567pub(crate) fn model_family_with_config(
1568 config: &ProvidersConfig,
1569 provider: &str,
1570 model_id: &str,
1571) -> String {
1572 catalog_family_token(config, model_id)
1573 .unwrap_or_else(|| derive_model_family(provider, model_id))
1574}
1575
1576fn model_family_with_inference_source(
1577 config: &ProvidersConfig,
1578 provider: &str,
1579 model_id: &str,
1580 source: crate::llm::provider::ProviderInferenceSource,
1581) -> String {
1582 if let Some(family) = catalog_family_token(config, model_id) {
1583 return family;
1584 }
1585 let id_family = derive_model_family("", model_id);
1586 if id_family != "unknown" {
1587 return id_family;
1588 }
1589 if matches!(
1590 source,
1591 crate::llm::provider::ProviderInferenceSource::DefaultFallback
1592 ) {
1593 return "unknown".to_string();
1594 }
1595 derive_model_family(provider, model_id)
1596}
1597
1598pub fn model_lineage(provider: &str, model_id: &str) -> String {
1600 let config = effective_config();
1601 model_lineage_with_config(&config, provider, model_id)
1602}
1603
1604pub(crate) fn model_lineage_with_config(
1605 config: &ProvidersConfig,
1606 provider: &str,
1607 model_id: &str,
1608) -> String {
1609 catalog_lineage_token(config, model_id)
1610 .unwrap_or_else(|| derive_model_lineage(provider, model_id))
1611}
1612
1613fn model_lineage_with_inference_source(
1614 config: &ProvidersConfig,
1615 provider: &str,
1616 model_id: &str,
1617 source: crate::llm::provider::ProviderInferenceSource,
1618) -> String {
1619 if let Some(lineage) = catalog_lineage_token(config, model_id) {
1620 return lineage;
1621 }
1622 let id_lineage = derive_model_lineage("", model_id);
1623 if id_lineage != "unknown" {
1624 return id_lineage;
1625 }
1626 if matches!(
1627 source,
1628 crate::llm::provider::ProviderInferenceSource::DefaultFallback
1629 ) {
1630 return "unknown".to_string();
1631 }
1632 derive_model_lineage(provider, model_id)
1633}
1634
1635fn catalog_family_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1636 config
1637 .models
1638 .get(model_id)
1639 .and_then(|model| normalized_catalog_token(model.family.as_deref()))
1640}
1641
1642fn catalog_lineage_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1643 config
1644 .models
1645 .get(model_id)
1646 .and_then(|model| normalized_catalog_token(model.lineage.as_deref()))
1647}
1648
1649fn normalized_catalog_token(value: Option<&str>) -> Option<String> {
1650 value
1651 .map(str::trim)
1652 .filter(|value| !value.is_empty())
1653 .map(|value| value.to_ascii_lowercase().replace('_', "-"))
1654}
1655
1656fn derive_model_family(provider: &str, model_id: &str) -> String {
1657 let id = model_id.to_ascii_lowercase();
1658 if contains_any(&id, &["claude", "anthropic.claude"]) {
1659 return "anthropic-claude".to_string();
1660 }
1661 if contains_any(&id, &["gemini", "google/gemini"]) {
1662 return "google-gemini".to_string();
1663 }
1664 if contains_any(&id, &["deepseek"]) {
1665 return "deepseek".to_string();
1666 }
1667 if contains_any(&id, &["qwen"]) {
1668 return "qwen".to_string();
1669 }
1670 if contains_any(&id, &["kimi", "moonshot"]) {
1671 return "kimi".to_string();
1672 }
1673 if contains_any(&id, &["glm", "z-ai/glm", "zhipu"]) {
1674 return "glm".to_string();
1675 }
1676 if contains_any(&id, &["mistral", "mixtral", "devstral"]) {
1677 return "mistral".to_string();
1678 }
1679 if contains_any(&id, &["minimax"]) {
1680 return "minimax".to_string();
1681 }
1682 if contains_any(&id, &["llama"]) {
1683 return "llama".to_string();
1684 }
1685 if contains_any(&id, &["gemma"]) {
1686 return "gemma".to_string();
1687 }
1688 if is_openai_reasoning_model(&id) {
1689 return "openai-reasoning".to_string();
1690 }
1691 if contains_any(&id, &["gpt-oss", "openai/gpt", "gpt-"]) {
1692 return "openai-gpt".to_string();
1693 }
1694 match provider {
1695 "anthropic" | "bedrock" | "vertex-anthropic" => "anthropic-claude".to_string(),
1696 "openai" | "azure" | "azure_openai" => "openai-gpt".to_string(),
1697 "gemini" | "vertex" | "google" => "google-gemini".to_string(),
1698 "deepseek" => "deepseek".to_string(),
1699 "zai" => "glm".to_string(),
1700 "minimax" => "minimax".to_string(),
1701 other if !other.is_empty() => normalize_identifier_token(other),
1702 _ => "unknown".to_string(),
1703 }
1704}
1705
1706fn derive_model_lineage(provider: &str, model_id: &str) -> String {
1707 let id = model_id.to_ascii_lowercase();
1708 if contains_any(&id, &["haiku"]) {
1709 return "claude-haiku".to_string();
1710 }
1711 if contains_any(&id, &["opus-4-7", "opus-4-8", "opus-mythos"]) {
1712 return "claude-opus-adaptive".to_string();
1713 }
1714 if contains_any(&id, &["claude"]) {
1715 return "claude-sonnet-opus".to_string();
1716 }
1717 if contains_any(&id, &["gpt-5"]) {
1718 return "openai-gpt5".to_string();
1719 }
1720 if is_openai_reasoning_model(&id) {
1721 return "openai-reasoning".to_string();
1722 }
1723 if contains_any(&id, &["gpt-", "gpt_"]) {
1724 return "openai-legacy".to_string();
1725 }
1726 if contains_any(&id, &["gemini"]) {
1727 if contains_any(&id, &["flash"]) {
1728 return "gemini-flash".to_string();
1729 }
1730 return "gemini-pro".to_string();
1731 }
1732 if contains_any(&id, &["qwen3", "qwen/qwen3"]) {
1733 return "qwen3".to_string();
1734 }
1735 if contains_any(&id, &["gemma4", "gemma-4"]) {
1736 return "gemma4".to_string();
1737 }
1738 let family = derive_model_family(provider, model_id);
1739 if family == "unknown" {
1740 "unknown".to_string()
1741 } else {
1742 family
1743 }
1744}
1745
1746fn contains_any(haystack: &str, needles: &[&str]) -> bool {
1747 needles.iter().any(|needle| haystack.contains(needle))
1748}
1749
1750fn starts_with_any(haystack: &str, prefixes: &[&str]) -> bool {
1751 prefixes.iter().any(|prefix| haystack.starts_with(prefix))
1752}
1753
1754fn is_openai_reasoning_model(id: &str) -> bool {
1755 starts_with_any(id, &["o1", "o3", "o4"])
1756 || contains_any(
1757 id,
1758 &[
1759 "/o1", "/o3", "/o4", ":o1", ":o3", ":o4", ".o1", ".o3", ".o4",
1760 ],
1761 )
1762}
1763
1764fn normalize_identifier_token(value: &str) -> String {
1765 value
1766 .trim()
1767 .to_ascii_lowercase()
1768 .chars()
1769 .map(|ch| {
1770 if ch.is_ascii_alphanumeric() || ch == '-' {
1771 ch
1772 } else {
1773 '-'
1774 }
1775 })
1776 .collect::<String>()
1777 .split('-')
1778 .filter(|part| !part.is_empty())
1779 .collect::<Vec<_>>()
1780 .join("-")
1781}
1782
1783pub fn provider_config(name: &str) -> Option<ProviderDef> {
1785 effective_config().providers.get(name).cloned()
1786}
1787
1788pub fn provider_protocol(name: &str) -> Option<String> {
1789 provider_config(name).and_then(|def| def.protocol)
1790}
1791
1792pub fn provider_uses_acp(name: &str) -> bool {
1793 provider_protocol(name)
1794 .as_deref()
1795 .is_some_and(|protocol| protocol.eq_ignore_ascii_case("acp"))
1796}
1797
1798pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
1801 let config = effective_config();
1802 let mut params = BTreeMap::new();
1803 for (pattern, defaults) in &config.model_defaults {
1804 if glob_match(pattern, model_id) {
1805 for (k, v) in defaults {
1806 params.insert(k.clone(), v.clone());
1807 }
1808 }
1809 }
1810 params
1811}
1812
1813pub fn model_role_defaults(role: &str) -> BTreeMap<String, toml::Value> {
1827 let normalized = normalize_model_role_name(role);
1828 if normalized.is_empty() {
1829 return BTreeMap::new();
1830 }
1831 let config = effective_config();
1832 let mut params = BTreeMap::new();
1833 for key in role_lookup_keys(&normalized) {
1834 extend_model_role_defaults(&config, &key, &mut params);
1835 }
1836 apply_model_role_env_overrides(&normalized, &mut params);
1837 params
1838}
1839
1840fn extend_model_role_defaults(
1841 config: &ProvidersConfig,
1842 role: &str,
1843 params: &mut BTreeMap<String, toml::Value>,
1844) {
1845 for (configured_role, defaults) in &config.model_roles {
1846 if normalize_model_role_name(configured_role) == role {
1847 params.extend(defaults.clone());
1848 }
1849 }
1850 if let Some(defaults) = config.model_roles.get(role) {
1851 params.extend(defaults.clone());
1852 }
1853}
1854
1855fn normalize_model_role_name(role: &str) -> String {
1856 role.trim().to_ascii_lowercase().replace('-', "_")
1857}
1858
1859fn role_lookup_keys(role: &str) -> Vec<String> {
1860 if role == "merge" {
1861 vec!["fast_apply".to_string(), "merge".to_string()]
1862 } else if role == "fast_apply" {
1863 vec!["merge".to_string(), "fast_apply".to_string()]
1864 } else {
1865 vec![role.to_string()]
1866 }
1867}
1868
1869fn role_env_token(role: &str) -> String {
1870 role.chars()
1871 .map(|ch| {
1872 if ch.is_ascii_alphanumeric() {
1873 ch.to_ascii_uppercase()
1874 } else {
1875 '_'
1876 }
1877 })
1878 .collect::<String>()
1879 .split('_')
1880 .filter(|part| !part.is_empty())
1881 .collect::<Vec<_>>()
1882 .join("_")
1883}
1884
1885fn apply_model_role_env_overrides(role: &str, params: &mut BTreeMap<String, toml::Value>) {
1886 for alias in role_env_aliases(role) {
1887 apply_model_role_env_var(&format!("HARN_LLM_{alias}_PROVIDER"), "provider", params);
1888 apply_model_role_env_var(&format!("HARN_LLM_{alias}_MODEL"), "model", params);
1889 apply_model_role_env_var(
1890 &format!("HARN_LLM_{alias}_ROUTE_POLICY"),
1891 "route_policy",
1892 params,
1893 );
1894 apply_model_role_env_var(
1895 &format!("HARN_LLM_ROLE_{alias}_PROVIDER"),
1896 "provider",
1897 params,
1898 );
1899 apply_model_role_env_var(&format!("HARN_LLM_ROLE_{alias}_MODEL"), "model", params);
1900 apply_model_role_env_var(
1901 &format!("HARN_LLM_ROLE_{alias}_ROUTE_POLICY"),
1902 "route_policy",
1903 params,
1904 );
1905 }
1906}
1907
1908fn role_env_aliases(role: &str) -> Vec<String> {
1909 let token = role_env_token(role);
1910 if token.is_empty() {
1911 return Vec::new();
1912 }
1913 if token == "MERGE" {
1914 vec!["FAST_APPLY".to_string(), "MERGE".to_string()]
1915 } else if token == "FAST_APPLY" {
1916 vec!["MERGE".to_string(), "FAST_APPLY".to_string()]
1917 } else {
1918 vec![token]
1919 }
1920}
1921
1922fn apply_model_role_env_var(
1923 env_name: &str,
1924 option_name: &str,
1925 params: &mut BTreeMap<String, toml::Value>,
1926) {
1927 let Ok(value) = std::env::var(env_name) else {
1928 return;
1929 };
1930 let trimmed = value.trim();
1931 if trimmed.is_empty() {
1932 return;
1933 }
1934 params.insert(
1935 option_name.to_string(),
1936 toml::Value::String(trimmed.to_string()),
1937 );
1938}
1939
1940pub fn provider_names() -> Vec<String> {
1942 effective_config().providers.keys().cloned().collect()
1943}
1944
1945pub fn known_model_names() -> Vec<String> {
1947 effective_config().aliases.keys().cloned().collect()
1948}
1949
1950pub fn alias_entries() -> Vec<(String, AliasDef)> {
1951 effective_config().aliases.into_iter().collect()
1952}
1953
1954pub fn alias_tool_calling_entry(alias: &str) -> Option<AliasToolCallingDef> {
1955 effective_config().alias_tool_calling.get(alias).cloned()
1956}
1957
1958pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
1960 let config = effective_config();
1961 model_catalog_entries_with_config(&config)
1962}
1963
1964pub(crate) fn model_catalog_entries_with_config(
1965 config: &ProvidersConfig,
1966) -> Vec<(String, ModelDef)> {
1967 sorted_model_entries_with_config(config)
1968 .into_iter()
1969 .map(|(id, model)| {
1970 let provider = model.provider.clone();
1971 (
1972 id.clone(),
1973 with_effective_capability_tags(id, provider, model),
1974 )
1975 })
1976 .collect()
1977}
1978
1979pub(crate) fn sorted_model_entries_with_config(
1980 config: &ProvidersConfig,
1981) -> Vec<(String, ModelDef)> {
1982 let mut entries: Vec<_> = config
1983 .models
1984 .iter()
1985 .map(|(id, model)| (id.clone(), model.clone()))
1986 .collect();
1987 entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
1988 model_a
1989 .provider
1990 .cmp(&model_b.provider)
1991 .then_with(|| id_a.cmp(id_b))
1992 });
1993 entries
1994}
1995
1996pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
1997 effective_config()
1998 .models
1999 .get(model_id)
2000 .cloned()
2001 .map(|model| {
2002 let provider = model.provider.clone();
2003 with_effective_capability_tags(model_id.to_string(), provider, model)
2004 })
2005}
2006
2007pub fn model_rate_limits(model_id: &str) -> Option<RateLimitsDef> {
2008 model_catalog_entry(model_id).and_then(|model| model.rate_limits)
2009}
2010
2011pub fn wire_model_id(model_id: &str) -> String {
2012 model_catalog_entry(model_id)
2013 .and_then(|model| model.wire_model)
2014 .unwrap_or_else(|| model_id.to_string())
2015}
2016
2017pub fn provider_rate_limits(provider: &str) -> Option<RateLimitsDef> {
2018 provider_config(provider).and_then(|provider| {
2019 provider
2020 .rate_limits
2021 .unwrap_or_default()
2022 .with_rpm_fallback(provider.rpm)
2023 })
2024}
2025
2026pub fn model_equivalence_group(model_id: &str) -> Option<String> {
2027 model_catalog_entry(model_id).and_then(|model| {
2028 model
2029 .equivalence_group
2030 .or(model.logical_model)
2031 .filter(|group| !group.trim().is_empty())
2032 })
2033}
2034
2035pub fn equivalent_model_catalog_entries(selector: &str) -> Vec<(String, ModelDef)> {
2039 let resolved = resolve_model_info(selector);
2040 let Some(group) = model_equivalence_group(&resolved.id) else {
2041 return Vec::new();
2042 };
2043 let config = effective_config();
2044 let Some(source) = config.models.get(&resolved.id) else {
2045 return Vec::new();
2046 };
2047 let source_caps = crate::llm::capabilities::lookup(&source.provider, &resolved.id);
2048 let source_context = source
2049 .runtime_context_window
2050 .unwrap_or(source.context_window);
2051
2052 sorted_model_entries_with_config(&config)
2053 .into_iter()
2054 .filter(|(id, model)| !(id == &resolved.id && model.provider == resolved.provider))
2055 .filter(|(_, model)| !model.deprecated)
2056 .filter(|(_, model)| model.availability != ModelAvailability::Dedicated)
2057 .filter(|(_, model)| {
2058 model.equivalence_group.as_deref() == Some(group.as_str())
2059 || model.logical_model.as_deref() == Some(group.as_str())
2060 })
2061 .filter(|(id, model)| {
2062 let caps = crate::llm::capabilities::lookup(&model.provider, id);
2063 let candidate_context = model.runtime_context_window.unwrap_or(model.context_window);
2064 candidate_context >= source_context
2065 && (!source_caps.native_tools || caps.native_tools)
2066 && (!source_caps.text_tool_wire_format_supported
2067 || caps.text_tool_wire_format_supported)
2068 && (!source_caps.reasoning_effort_supported || caps.reasoning_effort_supported)
2069 && source_caps.structured_output_mode == caps.structured_output_mode
2070 })
2071 .map(|(id, model)| {
2072 let provider = model.provider.clone();
2073 (
2074 id.clone(),
2075 with_effective_capability_tags(id, provider, model),
2076 )
2077 })
2078 .collect()
2079}
2080
2081pub fn qc_default_model(provider: &str) -> Option<String> {
2082 std::env::var("BURIN_QC_MODEL")
2083 .ok()
2084 .filter(|value| !value.trim().is_empty())
2085 .or_else(|| {
2086 effective_config()
2087 .qc_defaults
2088 .get(&provider.to_lowercase())
2089 .cloned()
2090 })
2091}
2092
2093pub fn default_model_for_provider(provider: &str) -> String {
2094 if provider_uses_acp(provider) {
2095 return "default".to_string();
2096 }
2097 match provider {
2098 "local" => std::env::var("LOCAL_LLM_MODEL")
2099 .or_else(|_| std::env::var("HARN_LLM_MODEL"))
2100 .unwrap_or_else(|_| "gemma-4-26b-a4b-it".to_string()),
2101 "mlx" => std::env::var("MLX_MODEL_ID")
2102 .unwrap_or_else(|_| "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit".to_string()),
2103 "openai" => "gpt-4o-mini".to_string(),
2104 "ollama" => "llama3.2".to_string(),
2105 "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
2106 _ => "claude-sonnet-4-6".to_string(),
2107 }
2108}
2109
2110pub fn qc_defaults() -> BTreeMap<String, String> {
2111 effective_config().qc_defaults
2112}
2113
2114pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
2115 effective_config()
2116 .models
2117 .get(model_id)
2118 .and_then(|model| model.pricing.clone())
2119}
2120
2121pub fn model_fast_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
2126 effective_config()
2127 .models
2128 .get(model_id)
2129 .and_then(|model| model.fast_mode.as_ref())
2130 .and_then(|fast_mode| fast_mode.pricing.clone())
2131}
2132
2133pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
2134 model_pricing_per_mtok(model_id)
2135 .map(|pricing| {
2136 (
2137 pricing.input_per_mtok / 1000.0,
2138 pricing.output_per_mtok / 1000.0,
2139 )
2140 })
2141 .or_else(|| {
2142 let (input, output, _) = provider_economics(provider);
2143 match (input, output) {
2144 (Some(input), Some(output)) => Some((input, output)),
2145 _ => None,
2146 }
2147 })
2148}
2149
2150pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
2151 match auth_env {
2152 AuthEnv::None => Vec::new(),
2153 AuthEnv::Single(name) => vec![name.clone()],
2154 AuthEnv::Multiple(names) => names.clone(),
2155 }
2156}
2157
2158pub fn provider_key_available(provider: &str) -> bool {
2159 let Some(pdef) = provider_config(provider) else {
2160 return provider == "ollama";
2161 };
2162 if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
2163 return true;
2164 }
2165 auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
2166 std::env::var(env_name)
2167 .ok()
2168 .is_some_and(|value| !value.trim().is_empty())
2169 })
2170}
2171
2172pub fn available_provider_names() -> Vec<String> {
2173 provider_names()
2174 .into_iter()
2175 .filter(|provider| provider_key_available(provider))
2176 .collect()
2177}
2178
2179pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
2181 provider_config(provider)
2182 .map(|p| p.features.iter().any(|f| f == feature))
2183 .unwrap_or(false)
2184}
2185
2186pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
2190 provider_config(provider)
2191 .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
2192 .unwrap_or((None, None, None))
2193}
2194
2195#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2204pub enum ToolFormatChannel {
2205 Native,
2207 Text,
2209}
2210
2211pub fn tool_format_channel(format: &str) -> Option<ToolFormatChannel> {
2219 match format {
2220 "native" => Some(ToolFormatChannel::Native),
2221 "text" | "json" => Some(ToolFormatChannel::Text),
2222 _ => None,
2223 }
2224}
2225
2226pub fn is_known_tool_format(format: &str) -> bool {
2231 tool_format_channel(format).is_some()
2232}
2233
2234pub fn default_tool_format(model: &str, provider: &str) -> String {
2240 let config = effective_config();
2241 default_tool_format_with_config(&config, model, provider)
2242}
2243
2244fn default_tool_format_with_config(
2245 config: &ProvidersConfig,
2246 model: &str,
2247 provider: &str,
2248) -> String {
2249 for (name, alias) in &config.aliases {
2251 let matches = (alias.id == model && alias.provider == provider) || name == model;
2252 if matches {
2253 if let Some(ref fmt) = alias.tool_format {
2254 return fmt.clone();
2255 }
2256 }
2257 }
2258 let capabilities = crate::llm::capabilities::lookup(provider, model);
2259 if let Some(format) = capabilities.preferred_tool_format.as_deref() {
2260 if is_known_tool_format(format) {
2267 return format.to_string();
2268 }
2269 }
2270 let capability_matrix_native = capabilities.native_tools;
2271 let legacy_provider_native = config
2272 .providers
2273 .get(provider)
2274 .map(|p| p.features.iter().any(|f| f == "native_tools"))
2275 .unwrap_or(false);
2276 if capability_matrix_native || legacy_provider_native {
2277 "native".to_string()
2278 } else {
2279 "json".to_string()
2290 }
2291}
2292
2293fn with_effective_capability_tags(
2294 model_id: String,
2295 provider: String,
2296 mut model: ModelDef,
2297) -> ModelDef {
2298 model.capabilities = effective_model_capability_tags(&provider, &model_id);
2299 model
2300}
2301
2302pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
2306 let caps = crate::llm::capabilities::lookup(provider, model_id);
2307 capability_tags_from_capabilities(&caps)
2308}
2309
2310pub(crate) fn capability_tags_from_capabilities(
2311 caps: &crate::llm::capabilities::Capabilities,
2312) -> Vec<String> {
2313 let mut tags = Vec::new();
2314 tags.push("streaming".to_string());
2317 if caps.native_tools || caps.text_tool_wire_format_supported {
2318 tags.push("tools".to_string());
2319 }
2320 if !caps.tool_search.is_empty() {
2321 tags.push("tool_search".to_string());
2322 }
2323 if caps.vision || caps.vision_supported {
2324 tags.push("vision".to_string());
2325 }
2326 if caps.audio {
2327 tags.push("audio".to_string());
2328 }
2329 if caps.pdf {
2330 tags.push("pdf".to_string());
2331 }
2332 if caps.video {
2333 tags.push("video".to_string());
2334 }
2335 if caps.files_api_supported {
2336 tags.push("files".to_string());
2337 }
2338 if caps.prompt_caching {
2339 tags.push("prompt_caching".to_string());
2340 }
2341 if !caps.thinking_modes.is_empty() {
2342 tags.push("thinking".to_string());
2343 }
2344 if caps.interleaved_thinking_supported
2345 || caps
2346 .thinking_modes
2347 .iter()
2348 .any(|mode| mode == "adaptive" || mode == "effort")
2349 {
2350 tags.push("extended_thinking".to_string());
2351 }
2352 if caps.structured_output.is_some() || caps.json_schema.is_some() {
2353 tags.push("structured_output".to_string());
2354 }
2355 tags
2356}
2357
2358pub fn resolve_tier_model(
2360 target: &str,
2361 preferred_provider: Option<&str>,
2362) -> Option<(String, String)> {
2363 let config = effective_config();
2364
2365 let candidate_aliases = if let Some(provider) = preferred_provider {
2366 vec![
2367 format!("{provider}/{target}"),
2368 format!("{provider}:{target}"),
2369 format!("tier/{target}"),
2370 target.to_string(),
2371 ]
2372 } else {
2373 vec![format!("tier/{target}"), target.to_string()]
2374 };
2375
2376 for alias_name in candidate_aliases {
2377 if let Some(alias) = config.aliases.get(&alias_name) {
2378 return Some((alias.id.clone(), alias.provider.clone()));
2379 }
2380 }
2381
2382 None
2383}
2384
2385pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
2389 let config = effective_config();
2390 let mut seen = std::collections::BTreeSet::new();
2391 let mut candidates = Vec::new();
2392
2393 for alias in config.aliases.values() {
2394 let pair = (alias.id.clone(), alias.provider.clone());
2395 if seen.contains(&pair) {
2396 continue;
2397 }
2398 if model_tier(&alias.id) == target {
2399 seen.insert(pair.clone());
2400 candidates.push(pair);
2401 }
2402 }
2403
2404 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2405 provider_a
2406 .cmp(provider_b)
2407 .then_with(|| model_a.cmp(model_b))
2408 });
2409 candidates
2410}
2411
2412pub fn all_model_candidates() -> Vec<(String, String)> {
2415 let config = effective_config();
2416 let mut seen = std::collections::BTreeSet::new();
2417 let mut candidates = Vec::new();
2418
2419 for alias in config.aliases.values() {
2420 let pair = (alias.id.clone(), alias.provider.clone());
2421 if seen.insert(pair.clone()) {
2422 candidates.push(pair);
2423 }
2424 }
2425
2426 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2427 provider_a
2428 .cmp(provider_b)
2429 .then_with(|| model_a.cmp(model_b))
2430 });
2431 candidates
2432}
2433
2434pub fn pick_complementary_reviewer(
2435 options: ComplementaryReviewerOptions,
2436) -> ComplementaryReviewerSelection {
2437 let config = effective_config();
2438 let mut author = resolve_model_info(&options.author_model);
2439 if let Some(provider) = options
2440 .author_provider
2441 .as_deref()
2442 .map(str::trim)
2443 .filter(|provider| !provider.is_empty())
2444 {
2445 author.provider = provider.to_string();
2446 author.family = model_family_with_config(&config, &author.provider, &author.id);
2447 author.lineage = model_lineage_with_config(&config, &author.provider, &author.id);
2448 author.tool_format = default_tool_format_with_config(&config, &author.id, &author.provider);
2449 }
2450 let author_entry = config.models.get(&author.id);
2451 let author_identity = complementary_identity(
2452 author.id.clone(),
2453 author.provider.clone(),
2454 author.family.clone(),
2455 author.lineage.clone(),
2456 author.tier.clone(),
2457 author_entry.and_then(|model| model.pricing.clone()),
2458 );
2459
2460 let fallback =
2461 |code: ReviewerFallbackCode, fallback_reason: String| ComplementaryReviewerSelection {
2462 intent: options.intent.as_str().to_string(),
2463 reviewer: author_identity.clone(),
2464 estimated_incremental_cost: cost_estimate(
2465 author_identity.pricing.as_ref(),
2466 author_identity.pricing.as_ref(),
2467 ),
2468 author: author_identity.clone(),
2469 fallback: true,
2470 reason: format!(
2471 "using author model {} because {fallback_reason}",
2472 author_identity.id
2473 ),
2474 fallback_reason: Some(fallback_reason),
2475 fallback_code: Some(code.as_code().to_string()),
2476 };
2477
2478 if author_identity.family == "unknown" {
2479 return fallback(
2480 ReviewerFallbackCode::UnknownAuthorFamily,
2481 "author model family is unknown".to_string(),
2482 );
2483 }
2484
2485 let preferred_families = author_entry
2486 .map(|model| model.complementary_with.clone())
2487 .unwrap_or_default();
2488 let author_refs = reviewer_match_refs(&author_identity);
2489 let mut rejected_by_price = 0usize;
2490 let mut diff_family_seen = 0usize;
2491 let mut candidates = Vec::new();
2492
2493 for (id, model) in config.models.iter() {
2494 if id == &author_identity.id && model.provider == author_identity.provider {
2495 continue;
2496 }
2497 if model.deprecated || model.availability != ModelAvailability::Serverless {
2498 continue;
2499 }
2500 let family = model_family_with_config(&config, &model.provider, id);
2501 if family == "unknown" || family == author_identity.family {
2502 continue;
2503 }
2504 diff_family_seen += 1;
2505 let lineage = model_lineage_with_config(&config, &model.provider, id);
2506 let candidate_identity = complementary_identity(
2507 id.clone(),
2508 model.provider.clone(),
2509 family,
2510 lineage,
2511 model_tier_with_config(&config, id),
2512 model.pricing.clone(),
2513 );
2514 if model
2515 .avoid_as_reviewer_for
2516 .iter()
2517 .any(|selector| refs_contain_selector(&author_refs, selector))
2518 {
2519 continue;
2520 }
2521 if exceeds_price_cap(
2522 author_identity.pricing.as_ref(),
2523 candidate_identity.pricing.as_ref(),
2524 options.max_price_multiplier,
2525 ) {
2526 rejected_by_price += 1;
2527 continue;
2528 }
2529 let score = reviewer_score(
2530 &options,
2531 &author_identity,
2532 &candidate_identity,
2533 model,
2534 &preferred_families,
2535 );
2536 candidates.push(ReviewerCandidate {
2537 identity: candidate_identity,
2538 score,
2539 });
2540 }
2541
2542 candidates.sort_by(|left, right| {
2543 right
2544 .score
2545 .partial_cmp(&left.score)
2546 .unwrap_or(std::cmp::Ordering::Equal)
2547 .then_with(|| left.identity.provider.cmp(&right.identity.provider))
2548 .then_with(|| left.identity.id.cmp(&right.identity.id))
2549 });
2550
2551 let Some(best) = candidates.into_iter().next() else {
2552 if rejected_by_price > 0 {
2553 let cap = options.max_price_multiplier.unwrap_or_default();
2554 return fallback(
2555 ReviewerFallbackCode::NoDiffFamilyWithinPrice,
2556 format!("no different-family reviewer satisfied max_price_multiplier {cap}"),
2557 );
2558 }
2559 if diff_family_seen == 0 {
2560 return fallback(
2561 ReviewerFallbackCode::NoDiffFamilyServerless,
2562 "no active serverless different-family reviewer is cataloged".to_string(),
2563 );
2564 }
2565 return fallback(
2566 ReviewerFallbackCode::AllDiffFamilyExcluded,
2567 "all different-family reviewer candidates were excluded".to_string(),
2568 );
2569 };
2570
2571 let estimate = cost_estimate(
2572 best.identity.pricing.as_ref(),
2573 author_identity.pricing.as_ref(),
2574 );
2575 ComplementaryReviewerSelection {
2576 intent: options.intent.as_str().to_string(),
2577 reason: reviewer_reason(&author_identity, &best.identity, estimate.as_ref()),
2578 estimated_incremental_cost: estimate,
2579 author: author_identity,
2580 reviewer: best.identity,
2581 fallback: false,
2582 fallback_reason: None,
2583 fallback_code: None,
2584 }
2585}
2586
2587#[derive(Debug, Clone)]
2588struct ReviewerCandidate {
2589 identity: ComplementaryModelIdentity,
2590 score: f64,
2591}
2592
2593fn complementary_identity(
2594 id: String,
2595 provider: String,
2596 family: String,
2597 lineage: String,
2598 tier: String,
2599 pricing: Option<ModelPricing>,
2600) -> ComplementaryModelIdentity {
2601 ComplementaryModelIdentity {
2602 id,
2603 provider,
2604 family,
2605 lineage,
2606 tier,
2607 pricing,
2608 }
2609}
2610
2611fn reviewer_score(
2612 options: &ComplementaryReviewerOptions,
2613 author: &ComplementaryModelIdentity,
2614 candidate: &ComplementaryModelIdentity,
2615 model: &ModelDef,
2616 preferred_families: &[String],
2617) -> f64 {
2618 let candidate_refs = reviewer_match_refs(candidate);
2619 let mut score = 0.0;
2620 if let Some(rank) = preferred_families
2621 .iter()
2622 .position(|selector| refs_contain_selector(&candidate_refs, selector))
2623 {
2624 score += 1_000.0 - rank as f64;
2625 }
2626 if candidate.provider != author.provider {
2627 score += 100.0;
2628 }
2629 score += match tier_distance(&author.tier, &candidate.tier) {
2630 0 => 80.0,
2631 1 => 45.0,
2632 2 => 15.0,
2633 _ => 0.0,
2634 };
2635 for strength in intent_strengths(options.intent) {
2636 if model.strengths.iter().any(|tag| tag == strength) {
2637 score += 8.0;
2638 }
2639 }
2640 if model.capabilities.iter().any(|tag| tag == "tools") {
2641 score += 4.0;
2642 }
2643 if let (Some(author_total), Some(candidate_total)) = (
2644 pricing_total(author.pricing.as_ref()),
2645 pricing_total(candidate.pricing.as_ref()),
2646 ) {
2647 if author_total > 0.0 {
2648 let ratio = candidate_total / author_total;
2649 if ratio <= 1.0 {
2650 score += 20.0;
2651 }
2652 score -= (ratio - 1.0).abs().min(10.0) * 8.0;
2653 }
2654 }
2655 score
2656}
2657
2658fn intent_strengths(intent: ComplementaryReviewerIntent) -> &'static [&'static str] {
2659 match intent {
2660 ComplementaryReviewerIntent::Review => &["reasoning", "coding", "tool_use"],
2661 ComplementaryReviewerIntent::Critique => &["reasoning", "long_context", "tool_use"],
2662 ComplementaryReviewerIntent::PlanReview => {
2663 &["reasoning", "coding", "agentic", "long_context", "tool_use"]
2664 }
2665 }
2666}
2667
2668fn tier_distance(left: &str, right: &str) -> u8 {
2669 let left = tier_rank(left);
2670 let right = tier_rank(right);
2671 left.abs_diff(right)
2672}
2673
2674fn tier_rank(tier: &str) -> u8 {
2675 match tier {
2676 "small" => 0,
2677 "mid" => 1,
2678 "frontier" | "reasoning" => 2,
2679 _ => 1,
2680 }
2681}
2682
2683fn exceeds_price_cap(
2684 author_pricing: Option<&ModelPricing>,
2685 candidate_pricing: Option<&ModelPricing>,
2686 max_price_multiplier: Option<f64>,
2687) -> bool {
2688 let Some(max_price_multiplier) = max_price_multiplier else {
2689 return false;
2690 };
2691 let Some(author_total) = pricing_total(author_pricing) else {
2692 return false;
2693 };
2694 let Some(candidate_total) = pricing_total(candidate_pricing) else {
2695 return true;
2696 };
2697 author_total > 0.0 && candidate_total > author_total * max_price_multiplier
2698}
2699
2700fn cost_estimate(
2701 reviewer_pricing: Option<&ModelPricing>,
2702 author_pricing: Option<&ModelPricing>,
2703) -> Option<ComplementaryCostEstimate> {
2704 let reviewer_pricing = reviewer_pricing?;
2705 let total_per_mtok = reviewer_pricing.input_per_mtok + reviewer_pricing.output_per_mtok;
2706 let multiplier_vs_author = pricing_total(author_pricing)
2707 .filter(|author_total| *author_total > 0.0)
2708 .map(|author_total| total_per_mtok / author_total);
2709 Some(ComplementaryCostEstimate {
2710 input_per_mtok: reviewer_pricing.input_per_mtok,
2711 output_per_mtok: reviewer_pricing.output_per_mtok,
2712 total_per_mtok,
2713 multiplier_vs_author,
2714 })
2715}
2716
2717fn pricing_total(pricing: Option<&ModelPricing>) -> Option<f64> {
2718 pricing.map(|pricing| pricing.input_per_mtok + pricing.output_per_mtok)
2719}
2720
2721fn reviewer_reason(
2722 author: &ComplementaryModelIdentity,
2723 reviewer: &ComplementaryModelIdentity,
2724 estimate: Option<&ComplementaryCostEstimate>,
2725) -> String {
2726 let cost = estimate
2727 .and_then(|estimate| estimate.multiplier_vs_author)
2728 .map(|multiplier| format!("{multiplier:.2}x the author model price"))
2729 .unwrap_or_else(|| "price ratio unavailable".to_string());
2730 format!(
2731 "selected {} via {} because family {} differs from author family {}, tier {} matches author tier {}, and {}",
2732 reviewer.id,
2733 reviewer.provider,
2734 reviewer.family,
2735 author.family,
2736 reviewer.tier,
2737 author.tier,
2738 cost
2739 )
2740}
2741
2742fn reviewer_match_refs(identity: &ComplementaryModelIdentity) -> BTreeSet<String> {
2743 BTreeSet::from([
2744 identity.id.to_ascii_lowercase(),
2745 identity.provider.to_ascii_lowercase(),
2746 format!("{}/{}", identity.provider, identity.id).to_ascii_lowercase(),
2747 format!("{}:{}", identity.provider, identity.id).to_ascii_lowercase(),
2748 identity.family.to_ascii_lowercase(),
2749 identity.lineage.to_ascii_lowercase(),
2750 ])
2751}
2752
2753fn refs_contain_selector(refs: &BTreeSet<String>, selector: &str) -> bool {
2754 normalized_catalog_token(Some(selector))
2755 .or_else(|| Some(selector.trim().to_ascii_lowercase()))
2756 .is_some_and(|selector| refs.contains(&selector))
2757}
2758
2759use harn_glob::match_name as glob_match;
2762
2763fn dirs_or_home() -> Option<String> {
2764 crate::user_dirs::home_dir().map(|home| home.to_string_lossy().into_owned())
2765}
2766
2767pub fn resolve_base_url(pdef: &ProviderDef) -> String {
2770 if let Some(env_name) = &pdef.base_url_env {
2771 if let Ok(val) = std::env::var(env_name) {
2772 let trimmed = val.trim().trim_matches('"').trim_matches('\'');
2774 if !trimmed.is_empty() {
2775 return trimmed.to_string();
2776 }
2777 }
2778 }
2779 pdef.base_url.clone()
2780}
2781
2782const EMBEDDED_PROVIDERS_TOML: &str = include_str!("llm/providers.toml");
2786
2787fn default_config() -> ProvidersConfig {
2801 parse_config_toml(EMBEDDED_PROVIDERS_TOML)
2802 .expect("embedded providers.toml must parse — invariant checked by harn-vm tests")
2803}
2804
2805#[cfg(test)]
2806fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
2807 let mut config = default_config();
2808 config.merge_from(&overlay);
2809 config
2810}
2811
2812#[cfg(test)]
2813mod tests {
2814 use super::*;
2815
2816 fn reset_overrides() {
2817 clear_user_overrides();
2818 }
2819
2820 #[test]
2821 fn resolve_model_info_guards_bad_native_pin_on_unreliable_route() {
2822 reset_overrides();
2823 let overlay = parse_config_toml(
2830 "[aliases.guard-ds]\nid = \"deepseek/deepseek-v3.2\"\nprovider = \"openrouter\"\ntool_format = \"native\"\n",
2831 )
2832 .expect("overlay parses");
2833 set_user_overrides(Some(overlay));
2834 let resolved = resolve_model_info("guard-ds");
2835 assert_eq!(
2836 resolved.tool_format, "text",
2837 "a native pin on a native_unreliable route must be auto-corrected to text"
2838 );
2839 clear_user_overrides();
2840
2841 let overlay_ok = parse_config_toml(
2843 "[aliases.guard-ds-ok]\nid = \"deepseek/deepseek-v3-base\"\nprovider = \"openrouter\"\ntool_format = \"native\"\n",
2844 )
2845 .expect("overlay parses");
2846 set_user_overrides(Some(overlay_ok));
2847 let resolved_ok = resolve_model_info("guard-ds-ok");
2848 assert_eq!(resolved_ok.tool_format, "native");
2849 clear_user_overrides();
2850 }
2851
2852 #[test]
2853 fn auto_select_prefers_local_provider_without_cloud_credentials() {
2854 let config = parse_config_toml(
2858 "[providers.ollama]\nbase_url = \"http://localhost:11434\"\nchat_endpoint = \"/v1/chat/completions\"\n",
2859 )
2860 .expect("config parses");
2861 assert!(provider_is_local(config.providers.get("ollama").unwrap()));
2862 assert_eq!(auto_select_provider(&config), "ollama");
2863 }
2864
2865 #[test]
2866 fn auto_select_falls_back_to_documented_default_when_empty() {
2867 let config = parse_config_toml("").expect("config parses");
2868 assert_eq!(auto_select_provider(&config), FALLBACK_PROVIDER);
2869 }
2870
2871 #[test]
2872 fn suppress_routes_parse_and_merge_dedupe() {
2873 let mut base =
2874 parse_config_toml("[suppress]\nroutes = [\"together:Qwen/Qwen3-Coder-Next-FP8\"]\n")
2875 .expect("base parses");
2876 assert!(!base.is_empty(), "a suppress-only overlay is not empty");
2877 let overlay = parse_config_toml(
2878 "[suppress]\nroutes = [\"together:Qwen/Qwen3-Coder-Next-FP8\", \"ollama:img:tag\"]\n",
2879 )
2880 .expect("overlay parses");
2881 base.merge_from(&overlay);
2882 assert_eq!(
2883 base.suppress.routes,
2884 vec![
2885 "together:Qwen/Qwen3-Coder-Next-FP8".to_string(),
2886 "ollama:img:tag".to_string(),
2887 ],
2888 "merge appends new selectors without duplicating existing ones"
2889 );
2890 }
2891
2892 #[test]
2893 fn test_glob_match_prefix() {
2894 assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
2895 assert!(glob_match("gpt-*", "gpt-4o"));
2896 assert!(!glob_match("claude-*", "gpt-4o"));
2897 }
2898
2899 #[test]
2900 fn test_glob_match_suffix() {
2901 assert!(glob_match("*-latest", "llama3.2-latest"));
2902 assert!(!glob_match("*-latest", "llama3.2"));
2903 }
2904
2905 #[test]
2906 fn test_glob_match_middle() {
2907 assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
2908 assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
2909 }
2910
2911 #[test]
2912 fn test_glob_match_exact() {
2913 assert!(glob_match("gpt-4o", "gpt-4o"));
2914 assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
2915 }
2916
2917 #[test]
2918 fn test_infer_provider_from_defaults() {
2919 let _guard = crate::llm::env_guard();
2920 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2921 unsafe {
2922 std::env::remove_var("HARN_DEFAULT_PROVIDER");
2923 }
2924
2925 assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
2926 assert_eq!(infer_provider("gpt-4o"), "openai");
2927 assert_eq!(infer_provider("o1-preview"), "openai");
2928 assert_eq!(infer_provider("o3-mini"), "openai");
2929 assert_eq!(infer_provider("o4-mini"), "openai");
2930 assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
2931 assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
2932 assert_eq!(infer_provider("llama3.2:latest"), "ollama");
2933 assert_eq!(infer_provider("unknown-model"), "anthropic");
2934
2935 unsafe {
2936 match prev_default_provider {
2937 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2938 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2939 }
2940 }
2941 }
2942
2943 #[test]
2944 fn test_infer_provider_prefix_rules() {
2945 assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
2946 assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
2947 assert_eq!(infer_provider("local:owner/model"), "ollama");
2949 assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
2950 }
2951
2952 #[test]
2953 fn test_openrouter_inference_requires_one_slash() {
2954 let _guard = crate::llm::env_guard();
2955 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2956 unsafe {
2957 std::env::remove_var("HARN_DEFAULT_PROVIDER");
2958 }
2959
2960 assert_eq!(infer_provider("org/model"), "openrouter");
2961 assert_eq!(infer_provider("org/team/model"), "anthropic");
2962
2963 unsafe {
2964 match prev_default_provider {
2965 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2966 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2967 }
2968 }
2969 }
2970
2971 #[test]
2972 fn test_cerebras_inference_beats_openrouter_slash_fallback() {
2973 let _guard = crate::llm::env_guard();
2974 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2975 unsafe {
2976 std::env::remove_var("HARN_DEFAULT_PROVIDER");
2977 }
2978
2979 assert_eq!(infer_provider("cerebras/gpt-oss-120b"), "cerebras");
2980 assert_eq!(infer_provider("cerebras/zai-glm-4.7"), "cerebras");
2981 assert_eq!(infer_provider("cerebras/llama-3.3-70b"), "cerebras");
2982
2983 unsafe {
2984 match prev_default_provider {
2985 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2986 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2987 }
2988 }
2989 }
2990
2991 #[test]
2992 fn test_direct_catalog_model_id_resolves_to_catalog_provider() {
2993 let _guard = crate::llm::env_guard();
2998 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2999 unsafe {
3000 std::env::remove_var("HARN_DEFAULT_PROVIDER");
3001 }
3002
3003 for model in ["gpt-oss-120b", "zai-glm-4.7", "llama-3.3-70b"] {
3004 assert_eq!(
3005 infer_provider(model),
3006 "cerebras",
3007 "{model} should route to its catalog provider"
3008 );
3009 let resolved = resolve_model_info(model);
3010 assert_eq!(resolved.id, model);
3011 assert_eq!(resolved.provider, "cerebras");
3012 }
3013
3014 unsafe {
3015 match prev_default_provider {
3016 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3017 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3018 }
3019 }
3020 }
3021
3022 #[test]
3023 fn test_equivalent_model_catalog_entries_use_capability_compatible_routes() {
3024 reset_overrides();
3025
3026 assert_eq!(
3027 wire_model_id("groq/openai/gpt-oss-120b"),
3028 "openai/gpt-oss-120b"
3029 );
3030 assert_eq!(wire_model_id("gpt-oss-120b"), "gpt-oss-120b");
3031
3032 let equivalents = equivalent_model_catalog_entries("gpt-oss-120b");
3033 let ids = equivalents
3034 .iter()
3035 .map(|(id, _)| id.as_str())
3036 .collect::<Vec<_>>();
3037
3038 assert!(
3039 ids.contains(&"groq/openai/gpt-oss-120b"),
3040 "Cerebras GPT-OSS should surface the Groq serving variant"
3041 );
3042 assert!(
3043 !ids.contains(&"gpt-oss-120b"),
3044 "equivalence results should not include the source row"
3045 );
3046 assert!(equivalents.iter().all(|(_, model)| {
3047 model.equivalence_group.as_deref() == Some("openai-gpt-oss-120b")
3048 }));
3049 }
3050
3051 #[test]
3052 fn fireworks_gpt_oss_route_has_real_context_window() {
3053 reset_overrides();
3060
3061 let entry = model_catalog_entry("accounts/fireworks/models/gpt-oss-120b")
3062 .expect("Fireworks gpt-oss-120b must be in the model catalog");
3063 assert_eq!(entry.context_window, 131_072);
3064 assert_eq!(entry.provider, "fireworks");
3065 assert_eq!(
3066 entry.equivalence_group.as_deref(),
3067 Some("openai-gpt-oss-120b"),
3068 );
3069 }
3070
3071 #[test]
3072 fn test_user_catalog_overlay_re_homes_model_provider() {
3073 reset_overrides();
3077 let mut overlay = ProvidersConfig::default();
3078 overlay.models.insert(
3079 "gpt-4o".to_string(),
3080 ModelDef {
3081 name: "GPT-4o via OpenRouter".to_string(),
3082 provider: "openrouter".to_string(),
3083 context_window: 128_000,
3084 logical_model: None,
3085 equivalence_group: None,
3086 served_variant: None,
3087 wire_model: None,
3088 api_dialect: None,
3089 rate_limits: None,
3090 performance: None,
3091 architecture: None,
3092 local_memory: None,
3093 runtime_context_window: None,
3094 stream_timeout: None,
3095 capabilities: Vec::new(),
3096 pricing: None,
3097 deprecated: false,
3098 deprecation_note: None,
3099 superseded_by: None,
3100 fast_mode: None,
3101 quality_tags: Vec::new(),
3102 availability: ModelAvailability::default(),
3103 tier: None,
3104 open_weight: None,
3105 strengths: Vec::new(),
3106 benchmarks: std::collections::BTreeMap::new(),
3107 family: None,
3108 lineage: None,
3109 complementary_with: Vec::new(),
3110 avoid_as_reviewer_for: Vec::new(),
3111 },
3112 );
3113 set_user_overrides(Some(overlay));
3114
3115 assert_eq!(infer_provider("gpt-4o"), "openrouter");
3116
3117 reset_overrides();
3118 }
3119
3120 #[test]
3121 fn test_resolve_model_info_normalizes_provider_prefixes() {
3122 let local = resolve_model_info("local:gemma-4-e4b-it");
3123 assert_eq!(local.id, "gemma-4-e4b-it");
3124 assert_eq!(local.provider, "ollama");
3125
3126 let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
3127 assert_eq!(ollama.id, "qwen3:30b-a3b");
3128 assert_eq!(ollama.provider, "ollama");
3129
3130 let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
3131 assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
3132 assert_eq!(hf.provider, "huggingface");
3133
3134 let cerebras = resolve_model_info("cerebras/gpt-oss-120b");
3135 assert_eq!(cerebras.id, "gpt-oss-120b");
3136 assert_eq!(cerebras.provider, "cerebras");
3137
3138 let cerebras_glm = resolve_model_info("cerebras/zai-glm-4.7");
3139 assert_eq!(cerebras_glm.id, "zai-glm-4.7");
3140 assert_eq!(cerebras_glm.provider, "cerebras");
3141 }
3142
3143 #[test]
3144 fn test_model_tier_from_defaults() {
3145 assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
3149 assert_eq!(model_tier("gpt-4o"), "frontier");
3150 assert_eq!(model_tier("Qwen/Qwen3.5-9B"), "small");
3151 assert_eq!(model_tier("deepseek-v4-flash"), "mid");
3152 assert_eq!(model_tier("deepseek-v4-pro"), "frontier");
3153 assert_eq!(model_tier("MiniMax-M2.7"), "frontier");
3154 assert_eq!(model_tier("glm-5.1"), "frontier");
3155 assert_eq!(model_tier("definitely-not-a-real-model"), "mid");
3157 }
3158
3159 #[test]
3160 fn test_model_family_preserves_underlying_hosted_lineage() {
3161 assert_eq!(
3162 model_family("openrouter", "anthropic/claude-sonnet-4-6"),
3163 "anthropic-claude"
3164 );
3165 assert_eq!(
3166 model_family("openrouter", "google/gemini-2.5-flash"),
3167 "google-gemini"
3168 );
3169 assert_eq!(
3170 model_family("openrouter", "openai/o3-mini"),
3171 "openai-reasoning"
3172 );
3173 assert_eq!(model_lineage("openrouter", "openai/gpt-5.5"), "openai-gpt5");
3174 assert_eq!(
3175 model_lineage("openrouter", "openai/o3-mini"),
3176 "openai-reasoning"
3177 );
3178 assert_eq!(
3179 model_lineage("anthropic", "claude-opus-4-8"),
3180 "claude-opus-adaptive"
3181 );
3182 assert_eq!(model_lineage("llamacpp", "qwen3.6-35b-a3b"), "qwen3");
3183 }
3184
3185 #[test]
3186 fn test_complementary_reviewer_uses_different_family() {
3187 let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
3188 author_model: "claude-sonnet-4-6".to_string(),
3189 author_provider: None,
3190 intent: ComplementaryReviewerIntent::PlanReview,
3191 max_price_multiplier: Some(3.0),
3192 });
3193
3194 assert!(!selection.fallback, "{selection:?}");
3195 assert_eq!(selection.author.family, "anthropic-claude");
3196 assert_ne!(selection.reviewer.family, selection.author.family);
3197 assert_eq!(selection.reviewer.tier, "frontier");
3198 assert!(selection.estimated_incremental_cost.is_some());
3199 assert_eq!(selection.fallback_code, None, "{selection:?}");
3202 }
3203
3204 #[test]
3205 fn test_complementary_reviewer_falls_back_deterministically_on_price_cap() {
3206 let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
3207 author_model: "gpt-4o-mini".to_string(),
3208 author_provider: Some("openai".to_string()),
3209 intent: ComplementaryReviewerIntent::Review,
3210 max_price_multiplier: Some(0.01),
3211 });
3212
3213 assert!(selection.fallback, "{selection:?}");
3214 assert_eq!(selection.reviewer.id, "gpt-4o-mini");
3215 assert_eq!(selection.reviewer.family, selection.author.family);
3216 assert!(selection
3217 .fallback_reason
3218 .as_deref()
3219 .is_some_and(|reason| reason.contains("max_price_multiplier")));
3220 assert_eq!(
3224 selection.fallback_code.as_deref(),
3225 Some(ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code()),
3226 "{selection:?}"
3227 );
3228 assert_eq!(
3229 ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code(),
3230 "no_diff_family_within_price"
3231 );
3232 }
3233
3234 #[test]
3235 fn test_reviewer_fallback_codes_are_stable_strings() {
3236 assert_eq!(
3239 ReviewerFallbackCode::UnknownAuthorFamily.as_code(),
3240 "unknown_author_family"
3241 );
3242 assert_eq!(
3243 ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code(),
3244 "no_diff_family_within_price"
3245 );
3246 assert_eq!(
3247 ReviewerFallbackCode::NoDiffFamilyServerless.as_code(),
3248 "no_diff_family_serverless"
3249 );
3250 assert_eq!(
3251 ReviewerFallbackCode::AllDiffFamilyExcluded.as_code(),
3252 "all_diff_family_excluded"
3253 );
3254 }
3255
3256 #[test]
3257 fn test_resolve_model_unknown_alias() {
3258 let (id, provider) = resolve_model("gpt-4o");
3259 assert_eq!(id, "gpt-4o");
3260 assert!(provider.is_none());
3261 }
3262
3263 #[test]
3264 fn test_provider_names() {
3265 let names = provider_names();
3266 assert!(names.len() >= 7);
3267 assert!(names.contains(&"anthropic".to_string()));
3268 assert!(names.contains(&"together".to_string()));
3269 assert!(names.contains(&"local".to_string()));
3270 assert!(names.contains(&"mlx".to_string()));
3271 assert!(names.contains(&"openai".to_string()));
3272 assert!(names.contains(&"ollama".to_string()));
3273 assert!(names.contains(&"bedrock".to_string()));
3274 assert!(names.contains(&"azure_openai".to_string()));
3275 assert!(names.contains(&"vertex".to_string()));
3276 }
3277
3278 #[test]
3279 fn global_provider_file_is_an_overlay_on_builtin_defaults() {
3280 let mut overlay = ProvidersConfig {
3281 default_provider: Some("ollama".to_string()),
3282 ..Default::default()
3283 };
3284 overlay.aliases.insert(
3285 "quickstart".to_string(),
3286 AliasDef {
3287 id: "llama3.2".to_string(),
3288 provider: "ollama".to_string(),
3289 tool_format: None,
3290 },
3291 );
3292
3293 let merged = merge_global_config(overlay);
3294
3295 assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
3296 assert!(merged.providers.contains_key("anthropic"));
3297 assert!(merged.providers.contains_key("ollama"));
3298 assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
3299 }
3300
3301 #[test]
3302 fn partial_provider_overlay_preserves_builtin_provider_metadata() {
3303 let overlay = parse_config_toml(
3304 r#"
3305 [providers.ollama]
3306 base_url = "http://localhost:11435"
3307 extra_headers = { "x-local" = "1" }
3308 "#,
3309 )
3310 .expect("provider overlay parses");
3311
3312 let merged = merge_global_config(overlay);
3313 let ollama = merged
3314 .providers
3315 .get("ollama")
3316 .expect("ollama remains configured");
3317
3318 assert_eq!(ollama.base_url, "http://localhost:11435");
3319 assert_eq!(ollama.auth_style, "none");
3320 assert_eq!(ollama.chat_endpoint, "/api/chat");
3321 assert_eq!(ollama.completion_endpoint.as_deref(), Some("/api/generate"));
3322 assert_eq!(ollama.cost_per_1k_in, Some(0.0));
3323 assert_eq!(ollama.cost_per_1k_out, Some(0.0));
3324 assert_eq!(
3325 ollama
3326 .healthcheck
3327 .as_ref()
3328 .and_then(|healthcheck| healthcheck.path.as_deref()),
3329 Some("/api/tags")
3330 );
3331 assert_eq!(
3332 ollama.extra_headers.get("x-local").map(String::as_str),
3333 Some("1")
3334 );
3335 }
3336
3337 #[test]
3338 fn partial_provider_overlay_can_explicitly_replace_default_auth_style() {
3339 let overlay = parse_config_toml(
3340 r#"
3341 [providers.ollama]
3342 auth_style = "bearer"
3343 auth_env = "OLLAMA_API_KEY"
3344 "#,
3345 )
3346 .expect("provider overlay parses");
3347
3348 let merged = merge_global_config(overlay);
3349 let ollama = merged
3350 .providers
3351 .get("ollama")
3352 .expect("ollama remains configured");
3353
3354 assert_eq!(ollama.auth_style, "bearer");
3355 assert_eq!(auth_env_names(&ollama.auth_env), vec!["OLLAMA_API_KEY"]);
3356 assert_eq!(ollama.chat_endpoint, "/api/chat");
3357 }
3358
3359 #[test]
3360 fn test_resolve_tier_model_default_aliases() {
3361 let (model, provider) = resolve_tier_model("frontier", None)
3366 .expect("frontier alias must resolve from the embedded catalog");
3367 assert_eq!(provider, "anthropic");
3368 assert!(
3369 model_catalog_entry(&model)
3370 .is_some_and(|entry| entry.provider == "anthropic" && !entry.deprecated),
3371 "frontier alias must point at a registered, non-deprecated anthropic model (got {model})"
3372 );
3373
3374 let (model, provider) = resolve_tier_model("small", None)
3375 .expect("small alias must resolve from the embedded catalog");
3376 assert!(
3377 [
3378 "openrouter",
3379 "huggingface",
3380 "local",
3381 "llamacpp",
3382 "mlx",
3383 "ollama"
3384 ]
3385 .contains(&provider.as_str()),
3386 "small tier should resolve to an open-weight provider (got {provider} / {model})"
3387 );
3388
3389 let (model, provider) = resolve_tier_model("mid", None)
3390 .expect("mid alias must resolve from the embedded catalog");
3391 assert_eq!(provider, "openrouter");
3392 assert_eq!(model, "qwen/qwen3.6-flash");
3393 }
3394
3395 #[test]
3396 fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
3397 let (model, provider) = resolve_tier_model("mid", Some("openai"))
3401 .expect("mid tier scoped to openai must resolve");
3402 assert_eq!(provider, "openai");
3403 assert_eq!(model, "gpt-5.4-mini");
3404 assert!(
3405 model_catalog_entry(&model).is_some(),
3406 "mid/openai alias must point at a registered model (got {model})"
3407 );
3408 }
3409
3410 #[test]
3411 fn test_provider_config_anthropic() {
3412 let pdef = provider_config("anthropic").unwrap();
3413 assert_eq!(pdef.auth_style, "header");
3414 assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
3415 }
3416
3417 #[test]
3418 fn test_provider_config_mlx() {
3419 let pdef = provider_config("mlx").unwrap();
3420 assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
3421 assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
3422 assert_eq!(
3423 pdef.healthcheck.unwrap().path.as_deref(),
3424 Some("/v1/models")
3425 );
3426
3427 let (model, provider) = resolve_model("mlx-qwen36-27b");
3428 assert_eq!(model, "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit");
3429 assert_eq!(provider.as_deref(), Some("mlx"));
3430 }
3431
3432 #[test]
3433 fn test_enterprise_provider_defaults_and_inference() {
3434 let bedrock = provider_config("bedrock").unwrap();
3435 assert_eq!(bedrock.auth_style, "aws_sigv4");
3436 assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
3437 assert_eq!(
3438 infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
3439 "bedrock"
3440 );
3441 assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
3442
3443 let azure = provider_config("azure_openai").unwrap();
3444 assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
3445 assert_eq!(
3446 auth_env_names(&azure.auth_env),
3447 vec![
3448 "AZURE_OPENAI_API_KEY".to_string(),
3449 "AZURE_OPENAI_AD_TOKEN".to_string(),
3450 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
3451 ]
3452 );
3453
3454 let vertex = provider_config("vertex").unwrap();
3455 assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
3456 assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
3457 }
3458
3459 #[test]
3460 fn test_default_provider_env_override_for_unknown_model() {
3461 let _guard = crate::llm::env_guard();
3462 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3463 unsafe {
3464 std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
3465 }
3466
3467 let inference = infer_provider_detail("unknown-model");
3468
3469 unsafe {
3470 match prev_default_provider {
3471 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3472 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3473 }
3474 }
3475
3476 assert_eq!(inference.provider, "openai");
3477 assert_eq!(
3478 inference.source,
3479 crate::llm::provider::ProviderInferenceSource::DefaultFallback
3480 );
3481 }
3482
3483 #[test]
3484 fn test_unknown_model_family_ignores_default_provider_fallback() {
3485 let _guard = crate::llm::env_guard();
3486 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3487 unsafe {
3488 std::env::set_var("HARN_DEFAULT_PROVIDER", "ollama");
3489 }
3490
3491 let unknown = resolve_model_info("mystery-model-xyz");
3492 let known_family = resolve_model_info("deepseek-mystery-model");
3493
3494 unsafe {
3495 match prev_default_provider {
3496 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3497 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3498 }
3499 }
3500
3501 assert_eq!(unknown.provider, "ollama");
3502 assert_eq!(unknown.family, "unknown");
3503 assert_eq!(unknown.lineage, "unknown");
3504 assert_eq!(known_family.family, "deepseek");
3505 assert_eq!(known_family.lineage, "deepseek");
3506 }
3507
3508 #[test]
3509 fn test_resolve_base_url_no_env() {
3510 let pdef = ProviderDef {
3511 base_url: "https://example.com".to_string(),
3512 ..Default::default()
3513 };
3514 assert_eq!(resolve_base_url(&pdef), "https://example.com");
3515 }
3516
3517 #[test]
3518 fn test_default_config_roundtrip() {
3519 let config = default_config();
3520 assert!(!config.providers.is_empty());
3521 assert!(!config.inference_rules.is_empty());
3522 assert_eq!(config.tier_defaults.default, "mid");
3525 let frontiers = config
3527 .models
3528 .iter()
3529 .filter(|(_, m)| m.tier.as_deref() == Some("frontier"))
3530 .count();
3531 assert!(
3532 frontiers >= 4,
3533 "expected at least 4 frontier-tagged models, got {frontiers}"
3534 );
3535 }
3536
3537 #[test]
3538 fn test_local_ollama_catalog_metadata() {
3539 reset_overrides();
3540
3541 let devstral =
3542 model_catalog_entry("devstral-small-2:24b").expect("devstral-small-2 catalog entry");
3543 assert_eq!(devstral.context_window, 262_144);
3544 assert!(!devstral.capabilities.iter().any(|cap| cap == "vision"));
3545
3546 let gemma4 = model_catalog_entry("gemma4:26b").expect("gemma4 catalog entry");
3547 assert_eq!(gemma4.context_window, 262_144);
3548 assert!(gemma4.capabilities.iter().any(|cap| cap == "vision"));
3549 }
3550
3551 #[test]
3552 fn local_gemma4_source_tags_match_structured_capability_tags() {
3553 reset_overrides();
3554 let config = default_config();
3555 for id in [
3556 "gemma-4-e2b-it",
3557 "gemma-4-e4b-it",
3558 "gemma-4-12b-it",
3559 "gemma-4-26b-a4b-it",
3560 "gemma-4-31b-it",
3561 ] {
3562 let source = config
3563 .models
3564 .get(id)
3565 .unwrap_or_else(|| panic!("{id} should be in the embedded catalog"));
3566 let derived = effective_model_capability_tags(&source.provider, id);
3567 assert_eq!(
3568 source.capabilities, derived,
3569 "{}/{} source capabilities must match derived capability_tags",
3570 source.provider, id
3571 );
3572 }
3573 }
3574
3575 #[test]
3576 fn capability_tags_include_structured_capability_flags() {
3577 let caps = crate::llm::capabilities::Capabilities {
3578 native_tools: true,
3579 tool_search: vec!["web".to_string()],
3580 vision_supported: true,
3581 audio: true,
3582 pdf: true,
3583 video: true,
3584 files_api_supported: true,
3585 prompt_caching: true,
3586 thinking_modes: vec!["enabled".to_string()],
3587 structured_output: Some("native".to_string()),
3588 ..Default::default()
3589 };
3590
3591 assert_eq!(
3592 capability_tags_from_capabilities(&caps),
3593 vec![
3594 "streaming",
3595 "tools",
3596 "tool_search",
3597 "vision",
3598 "audio",
3599 "pdf",
3600 "video",
3601 "files",
3602 "prompt_caching",
3603 "thinking",
3604 "structured_output",
3605 ]
3606 );
3607 }
3608
3609 #[test]
3610 fn test_external_config_overlays_default_catalog() {
3611 let mut config = default_config();
3612 let mut overlay = ProvidersConfig {
3613 default_provider: Some("ollama".to_string()),
3614 ..Default::default()
3615 };
3616 overlay.providers.insert(
3617 "custom".to_string(),
3618 ProviderDef {
3619 base_url: "https://llm.example.test/v1".to_string(),
3620 chat_endpoint: "/chat/completions".to_string(),
3621 ..Default::default()
3622 },
3623 );
3624
3625 config.merge_from(&overlay);
3626
3627 assert_eq!(config.default_provider.as_deref(), Some("ollama"));
3628 assert!(config.providers.contains_key("custom"));
3629 assert!(config.providers.contains_key("anthropic"));
3630 assert!(config.providers.contains_key("ollama"));
3631 }
3632
3633 #[test]
3634 fn test_model_params_empty() {
3635 let params = model_params("claude-sonnet-4-20250514");
3636 assert!(params.is_empty());
3637 }
3638
3639 #[test]
3640 fn test_user_overrides_add_provider_and_alias() {
3641 reset_overrides();
3642 let mut overlay = ProvidersConfig::default();
3643 overlay.providers.insert(
3644 "acme".to_string(),
3645 ProviderDef {
3646 base_url: "https://llm.acme.test/v1".to_string(),
3647 chat_endpoint: "/chat/completions".to_string(),
3648 ..Default::default()
3649 },
3650 );
3651 overlay.aliases.insert(
3652 "acme-fast".to_string(),
3653 AliasDef {
3654 id: "acme/model-fast".to_string(),
3655 provider: "acme".to_string(),
3656 tool_format: Some("native".to_string()),
3657 },
3658 );
3659 set_user_overrides(Some(overlay));
3660
3661 let (model, provider) = resolve_model("acme-fast");
3662 assert_eq!(model, "acme/model-fast");
3663 assert_eq!(provider.as_deref(), Some("acme"));
3664 assert!(provider_names().contains(&"acme".to_string()));
3665 assert_eq!(
3666 provider_config("acme").map(|provider| provider.base_url),
3667 Some("https://llm.acme.test/v1".to_string())
3668 );
3669
3670 reset_overrides();
3671 }
3672
3673 #[test]
3674 fn test_default_tool_format_uses_capability_matrix() {
3675 reset_overrides();
3676
3677 assert_eq!(
3678 default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
3679 "native"
3680 );
3681 assert_eq!(
3686 default_tool_format("devstral-small-2:24b", "ollama"),
3687 "json"
3688 );
3689 assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "native");
3693 assert_eq!(
3696 default_tool_format("deepseek/deepseek-v3.2", "openrouter"),
3697 "text"
3698 );
3699 assert_eq!(
3700 default_tool_format("qwen/qwen3-coder-flash", "openrouter"),
3701 "text"
3702 );
3703 assert_eq!(
3704 default_tool_format("qwen/qwen3.6-flash", "openrouter"),
3705 "native"
3706 );
3707 assert_eq!(default_tool_format("z-ai/glm-5.2", "openrouter"), "text");
3708 assert_eq!(
3714 default_tool_format("openai/gpt-oss-120b", "openrouter"),
3715 "text"
3716 );
3717 assert_eq!(
3718 default_tool_format("accounts/fireworks/models/gpt-oss-120b", "fireworks"),
3719 "text"
3720 );
3721 assert_eq!(default_tool_format("gpt-oss-120b", "cerebras"), "native");
3722 assert_eq!(
3723 default_tool_format("openai/gpt-oss-120b", "deepinfra"),
3724 "text"
3725 );
3726 assert_eq!(default_tool_format("openai/gpt-oss-120b", "groq"), "native");
3727 }
3728
3729 #[test]
3730 fn test_default_tool_format_unpinned_text_channel_is_json() {
3731 reset_overrides();
3732
3733 assert_eq!(default_tool_format("mystery-model-xyz", "ollama"), "json");
3739 }
3740
3741 #[test]
3742 fn test_claude_family_defaults_native_without_host_pin() {
3743 reset_overrides();
3744
3745 for (model, provider) in [
3752 ("claude-sonnet-4-6", "anthropic"),
3753 ("claude-sonnet-5", "anthropic"),
3754 ("anthropic/claude-nova-1", "anthropic"),
3755 ("anthropic/claude-sonnet-4.6", "openrouter"),
3756 ("anthropic/claude-sonnet-5", "openrouter"),
3757 ("anthropic/claude-opus-4-5-20251101", "openrouter"),
3758 ("anthropic/claude-sonnet-next", "openrouter"),
3759 ("anthropic/claude-nova-1", "openrouter"),
3760 ("anthropic.claude-sonnet-4-6", "bedrock"),
3761 ] {
3762 assert_eq!(
3763 default_tool_format(model, provider),
3764 "native",
3765 "{provider}:{model} must default native without a host pin"
3766 );
3767 }
3768
3769 let overlay = parse_config_toml(
3773 "[aliases.probe-sonnet]\nid = \"claude-sonnet-4-6\"\nprovider = \"anthropic\"\n",
3774 )
3775 .expect("overlay parses");
3776 set_user_overrides(Some(overlay));
3777 let resolved = resolve_model_info("probe-sonnet");
3778 assert_eq!(resolved.provider, "anthropic");
3779 assert_eq!(
3780 resolved.tool_format, "native",
3781 "an unpinned claude alias must inherit the family-level native default"
3782 );
3783 clear_user_overrides();
3784
3785 let overlay = parse_config_toml(
3789 "[aliases.probe-sonnet-json]\nid = \"claude-sonnet-4-6\"\nprovider = \"anthropic\"\ntool_format = \"json\"\n",
3790 )
3791 .expect("overlay parses");
3792 set_user_overrides(Some(overlay));
3793 let pinned = resolve_model_info("probe-sonnet-json");
3794 assert_eq!(
3795 pinned.tool_format, "json",
3796 "an explicit host pin must win over the claude family default"
3797 );
3798 clear_user_overrides();
3799
3800 assert_eq!(
3803 default_tool_format("mystery-model-xyz", "openrouter"),
3804 "json"
3805 );
3806 }
3807
3808 #[test]
3809 fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
3810 reset_overrides();
3811 let mut overlay = ProvidersConfig::default();
3812 overlay.models.insert(
3813 "acme/model-fast".to_string(),
3814 ModelDef {
3815 name: "Acme Fast".to_string(),
3816 provider: "acme".to_string(),
3817 context_window: 65_536,
3818 logical_model: None,
3819 equivalence_group: None,
3820 served_variant: None,
3821 wire_model: None,
3822 api_dialect: None,
3823 rate_limits: None,
3824 performance: None,
3825 architecture: None,
3826 local_memory: None,
3827 runtime_context_window: None,
3828 stream_timeout: Some(42.0),
3829 capabilities: vec!["tools".to_string(), "streaming".to_string()],
3830 pricing: Some(ModelPricing {
3831 input_per_mtok: 1.25,
3832 output_per_mtok: 2.5,
3833 cache_read_per_mtok: Some(0.25),
3834 cache_write_per_mtok: None,
3835 }),
3836 deprecated: false,
3837 deprecation_note: None,
3838 superseded_by: None,
3839 fast_mode: None,
3840 quality_tags: Vec::new(),
3841 availability: ModelAvailability::default(),
3842 tier: None,
3843 open_weight: None,
3844 strengths: Vec::new(),
3845 benchmarks: std::collections::BTreeMap::new(),
3846 family: None,
3847 lineage: None,
3848 complementary_with: Vec::new(),
3849 avoid_as_reviewer_for: Vec::new(),
3850 },
3851 );
3852 overlay
3853 .qc_defaults
3854 .insert("acme".to_string(), "acme/model-cheap".to_string());
3855 set_user_overrides(Some(overlay));
3856
3857 let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
3858 assert_eq!(entry.context_window, 65_536);
3859 assert_eq!(
3860 entry.capabilities,
3861 vec!["streaming".to_string(), "tools".to_string()]
3862 );
3863 assert_eq!(
3864 entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
3865 Some(1.25)
3866 );
3867 assert_eq!(
3868 pricing_per_1k_for("acme", "acme/model-fast"),
3869 Some((0.00125, 0.0025))
3870 );
3871 assert_eq!(
3872 qc_default_model("acme").as_deref(),
3873 Some("acme/model-cheap")
3874 );
3875
3876 reset_overrides();
3877 }
3878
3879 #[test]
3880 fn test_user_overrides_prepend_inference_rules() {
3881 reset_overrides();
3882 let mut overlay = ProvidersConfig::default();
3883 overlay.inference_rules.push(InferenceRule {
3884 pattern: Some("internal-*".to_string()),
3885 contains: None,
3886 exact: None,
3887 provider: "openai".to_string(),
3888 });
3889 set_user_overrides(Some(overlay));
3890
3891 assert_eq!(infer_provider("internal-foo"), "openai");
3892
3893 reset_overrides();
3894 }
3895
3896 #[test]
3903 fn embedded_providers_toml_parses_and_is_not_trivially_empty() {
3904 let config = default_config();
3905 assert!(
3906 config.providers.len() >= 10,
3907 "expected >=10 providers in embedded catalog, got {}",
3908 config.providers.len()
3909 );
3910 assert!(
3911 config.models.len() >= 20,
3912 "expected >=20 models in embedded catalog, got {}",
3913 config.models.len()
3914 );
3915 assert!(
3916 config.aliases.len() >= 15,
3917 "expected >=15 aliases in embedded catalog, got {}",
3918 config.aliases.len()
3919 );
3920 assert_eq!(config.default_provider.as_deref(), Some("anthropic"));
3921 }
3922
3923 #[test]
3924 fn embedded_catalog_every_deprecated_model_has_a_note() {
3925 let config = default_config();
3926 let offenders: Vec<&str> = config
3927 .models
3928 .iter()
3929 .filter(|(_, model)| {
3930 model.deprecated
3931 && model
3932 .deprecation_note
3933 .as_deref()
3934 .unwrap_or("")
3935 .trim()
3936 .is_empty()
3937 })
3938 .map(|(id, _)| id.as_str())
3939 .collect();
3940 assert!(
3941 offenders.is_empty(),
3942 "deprecated models missing a deprecation_note: {offenders:?}"
3943 );
3944 }
3945
3946 #[test]
3947 fn embedded_cerebras_catalog_separates_public_and_dedicated_routes() {
3948 let config = default_config();
3949 for id in ["gpt-oss-120b", "zai-glm-4.7"] {
3950 let model = config.models.get(id).expect("current public Cerebras row");
3951 assert_eq!(model.provider, "cerebras");
3952 assert_eq!(model.availability, ModelAvailability::Serverless);
3953 assert!(!model.deprecated);
3954 }
3955
3956 let llama = config
3957 .models
3958 .get("llama-3.3-70b")
3959 .expect("legacy Cerebras row");
3960 assert_eq!(llama.provider, "cerebras");
3961 assert_eq!(llama.availability, ModelAvailability::Dedicated);
3962 assert!(llama.deprecated);
3963 }
3964
3965 #[test]
3966 fn embedded_openrouter_gpt_oss_120b_has_no_fragment_bleed() {
3967 let config = default_config();
3975 let model = config
3976 .models
3977 .get("openai/gpt-oss-120b")
3978 .expect("openrouter gpt-oss-120b row");
3979 assert_eq!(model.provider, "openrouter");
3980 assert_eq!(
3981 model.open_weight,
3982 Some(true),
3983 "gpt-oss-120b is Apache-2.0 open weight, not the bled-in open_weight=false"
3984 );
3985 assert!(
3986 !model.strengths.iter().any(|s| s == "vision"),
3987 "gpt-oss-120b is text-only; the bled-in `vision` strength must be gone: {:?}",
3988 model.strengths
3989 );
3990 assert!(
3991 !model.strengths.is_empty(),
3992 "gpt-oss-120b must carry its own strengths, not None"
3993 );
3994
3995 let group_tiers: std::collections::BTreeSet<_> = config
3998 .models
3999 .values()
4000 .filter(|m| {
4001 m.equivalence_group.as_deref() == Some("openai-gpt-oss-120b") && !m.deprecated
4002 })
4003 .map(|m| m.tier.clone())
4004 .collect();
4005 assert_eq!(
4006 group_tiers.len(),
4007 1,
4008 "openai-gpt-oss-120b group must share one tier, got {group_tiers:?}"
4009 );
4010 }
4011
4012 #[test]
4013 fn embedded_catalog_every_model_targets_a_registered_provider() {
4014 let config = default_config();
4015 let known: std::collections::BTreeSet<&str> =
4016 config.providers.keys().map(String::as_str).collect();
4017 let orphans: Vec<(&str, &str)> = config
4018 .models
4019 .iter()
4020 .filter(|(_, model)| !known.contains(model.provider.as_str()))
4021 .map(|(id, model)| (id.as_str(), model.provider.as_str()))
4022 .collect();
4023 assert!(
4024 orphans.is_empty(),
4025 "models reference unknown providers: {orphans:?}"
4026 );
4027 }
4028
4029 #[test]
4030 fn embedded_catalog_every_alias_targets_a_registered_provider() {
4031 let config = default_config();
4032 let known: std::collections::BTreeSet<&str> =
4033 config.providers.keys().map(String::as_str).collect();
4034 let orphans: Vec<(&str, &str)> = config
4035 .aliases
4036 .iter()
4037 .filter(|(_, alias)| !known.contains(alias.provider.as_str()))
4038 .map(|(name, alias)| (name.as_str(), alias.provider.as_str()))
4039 .collect();
4040 assert!(
4041 orphans.is_empty(),
4042 "aliases reference unknown providers: {orphans:?}"
4043 );
4044 }
4045
4046 #[test]
4047 fn embedded_catalog_every_qc_default_targets_a_known_model() {
4048 let config = default_config();
4049 let orphans: Vec<(&str, &str)> = config
4050 .qc_defaults
4051 .iter()
4052 .filter(|(_, model_id)| !config.models.contains_key(model_id.as_str()))
4053 .map(|(provider, model_id)| (provider.as_str(), model_id.as_str()))
4054 .collect();
4055 assert!(
4056 orphans.is_empty(),
4057 "qc_defaults reference unknown models: {orphans:?}"
4058 );
4059 }
4060
4061 #[test]
4062 fn embedded_catalog_pricing_rates_are_non_negative() {
4063 let config = default_config();
4064 for (id, model) in &config.models {
4065 let Some(pricing) = &model.pricing else {
4066 continue;
4067 };
4068 assert!(
4069 pricing.input_per_mtok >= 0.0 && pricing.output_per_mtok >= 0.0,
4070 "{id}: negative pricing — in={} out={}",
4071 pricing.input_per_mtok,
4072 pricing.output_per_mtok
4073 );
4074 if let Some(rate) = pricing.cache_read_per_mtok {
4075 assert!(rate >= 0.0, "{id}: negative cache_read rate {rate}");
4076 }
4077 if let Some(rate) = pricing.cache_write_per_mtok {
4078 assert!(rate >= 0.0, "{id}: negative cache_write rate {rate}");
4079 }
4080 }
4081 }
4082
4083 #[test]
4084 fn model_availability_parses_known_strings() {
4085 assert_eq!(
4086 ModelAvailability::parse("serverless"),
4087 Some(ModelAvailability::Serverless)
4088 );
4089 assert_eq!(
4090 ModelAvailability::parse("dedicated"),
4091 Some(ModelAvailability::Dedicated)
4092 );
4093 assert_eq!(
4094 ModelAvailability::parse("unknown"),
4095 Some(ModelAvailability::Unknown)
4096 );
4097 assert_eq!(ModelAvailability::parse("provisioned"), None);
4098 for value in [
4099 ModelAvailability::Serverless,
4100 ModelAvailability::Dedicated,
4101 ModelAvailability::Unknown,
4102 ] {
4103 assert_eq!(ModelAvailability::parse(value.as_str()), Some(value));
4104 }
4105 }
4106
4107 #[test]
4108 fn embedded_catalog_marks_together_dedicated_route_as_dedicated() {
4109 let config = default_config();
4110 let model = config
4111 .models
4112 .get("Qwen/Qwen3-Coder-Next-FP8")
4113 .expect("Together Qwen3 Coder Next FP8 is cataloged");
4114 assert_eq!(model.provider, "together");
4115 assert_eq!(model.availability, ModelAvailability::Dedicated);
4116 }
4117
4118 #[test]
4119 fn embedded_catalog_dedicated_models_are_not_targeted_by_tier_aliases() {
4120 let config = default_config();
4124 let dedicated: std::collections::BTreeSet<(&str, &str)> = config
4125 .models
4126 .iter()
4127 .filter(|(_, model)| model.availability == ModelAvailability::Dedicated)
4128 .map(|(id, model)| (model.provider.as_str(), id.as_str()))
4129 .collect();
4130 for (name, alias) in &config.aliases {
4131 if matches!(
4132 name.as_str(),
4133 "frontier"
4134 | "mid"
4135 | "small"
4136 | "tier/frontier"
4137 | "tier/mid"
4138 | "tier/small"
4139 | "sonnet"
4140 | "opus"
4141 | "haiku"
4142 ) {
4143 assert!(
4144 !dedicated.contains(&(alias.provider.as_str(), alias.id.as_str())),
4145 "tier alias `{name}` targets dedicated-only route `{}/{}`",
4146 alias.provider,
4147 alias.id,
4148 );
4149 }
4150 }
4151 }
4152
4153 #[test]
4154 fn embedded_catalog_tier_aliases_resolve_to_active_models() {
4155 for alias in ["frontier", "mid", "small"] {
4159 let (model, _provider) = resolve_tier_model(alias, None)
4160 .unwrap_or_else(|| panic!("tier alias `{alias}` must resolve"));
4161 let entry = model_catalog_entry(&model).unwrap_or_else(|| {
4162 panic!("tier alias `{alias}` -> `{model}` must be a registered catalog entry")
4163 });
4164 assert!(
4165 !entry.deprecated,
4166 "tier alias `{alias}` resolves to deprecated model `{model}` ({:?})",
4167 entry.deprecation_note
4168 );
4169 }
4170 }
4171
4172 #[test]
4173 fn opus_alias_tracks_claude_opus_4_8_with_fast_mode() {
4174 let (model, provider) = resolve_model("opus");
4177 assert_eq!(model, "claude-opus-4-8");
4178 assert_eq!(provider.as_deref(), Some("anthropic"));
4179
4180 let opus48 = model_catalog_entry("claude-opus-4-8").expect("opus 4.8 catalog entry");
4181 assert!(!opus48.deprecated, "newest Opus must not be deprecated");
4182 let fast = opus48.fast_mode.expect("opus 4.8 advertises fast mode");
4183 assert_eq!(fast.param, "speed");
4184 assert_eq!(fast.value, "fast");
4185 assert_eq!(fast.status.as_deref(), Some("research_preview"));
4186 let fast_pricing = fast.pricing.expect("fast mode carries premium pricing");
4187 let standard = opus48.pricing.expect("opus 4.8 standard pricing");
4188 assert!(
4189 fast_pricing.input_per_mtok > standard.input_per_mtok,
4190 "fast mode must be premium-priced relative to standard"
4191 );
4192 }
4193
4194 #[test]
4195 fn superseded_opus_models_point_at_claude_opus_4_8() {
4196 for model in ["claude-opus-4-7", "claude-opus-4-6"] {
4199 let entry =
4200 model_catalog_entry(model).unwrap_or_else(|| panic!("{model} catalog entry"));
4201 assert!(entry.deprecated, "{model} should be deprecated");
4202 assert_eq!(
4203 entry.superseded_by.as_deref(),
4204 Some("claude-opus-4-8"),
4205 "{model} should be superseded by claude-opus-4-8"
4206 );
4207 }
4208 }
4209
4210 #[test]
4211 fn opus_46_no_longer_advertises_fast_mode() {
4212 let opus46 = model_catalog_entry("claude-opus-4-6").expect("opus 4.6 catalog entry");
4213 assert!(
4214 opus46.fast_mode.is_none(),
4215 "Anthropic removed Opus 4.6 fast mode on 2026-06-29; Harn should not advertise it"
4216 );
4217
4218 let opus47 = model_catalog_entry("claude-opus-4-7").expect("opus 4.7 catalog entry");
4219 assert!(
4220 opus47.fast_mode.is_some(),
4221 "Opus 4.7 still advertises its own fast-mode tier"
4222 );
4223 }
4224
4225 #[test]
4226 fn gpt_5_5_fast_mode_rides_service_tier() {
4227 let entry = model_catalog_entry("gpt-5.5").expect("gpt-5.5 catalog entry");
4230 let fast = entry.fast_mode.expect("gpt-5.5 advertises a fast tier");
4231 assert_eq!(fast.param, "service_tier");
4232 assert_eq!(fast.status.as_deref(), Some("ga"));
4233 }
4234}