1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::{BTreeMap, BTreeSet};
4use std::sync::atomic::{AtomicBool, Ordering};
5use std::sync::{OnceLock, RwLock};
6
7static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
8static CONFIG_PATH: OnceLock<String> = OnceLock::new();
9static RUNTIME_CATALOG_OVERLAY: OnceLock<RwLock<Option<ProvidersConfig>>> = OnceLock::new();
10
11thread_local! {
12 static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
17}
18
19#[derive(Debug, Clone, Deserialize, Default)]
20pub struct ProvidersConfig {
21 #[serde(default)]
22 pub default_provider: Option<String>,
23 #[serde(default)]
24 pub providers: BTreeMap<String, ProviderDef>,
25 #[serde(default)]
26 pub aliases: BTreeMap<String, AliasDef>,
27 #[serde(default)]
28 pub alias_tool_calling: BTreeMap<String, AliasToolCallingDef>,
29 #[serde(default)]
30 pub models: BTreeMap<String, ModelDef>,
31 #[serde(default)]
32 pub qc_defaults: BTreeMap<String, String>,
33 #[serde(default)]
34 pub inference_rules: Vec<InferenceRule>,
35 #[serde(default)]
36 pub tier_rules: Vec<TierRule>,
37 #[serde(default)]
38 pub tier_defaults: TierDefaults,
39 #[serde(default)]
40 pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
41 #[serde(default)]
42 pub model_roles: BTreeMap<String, BTreeMap<String, toml::Value>>,
43 #[serde(default)]
44 pub suppress: SuppressDef,
45}
46
47#[derive(Debug, Clone, Deserialize, Default, PartialEq, Eq)]
60pub struct SuppressDef {
61 #[serde(default)]
66 pub routes: Vec<String>,
67}
68
69impl ProvidersConfig {
70 pub fn is_empty(&self) -> bool {
71 self.default_provider.is_none()
72 && self.providers.is_empty()
73 && self.aliases.is_empty()
74 && self.alias_tool_calling.is_empty()
75 && self.models.is_empty()
76 && self.qc_defaults.is_empty()
77 && self.inference_rules.is_empty()
78 && self.tier_rules.is_empty()
79 && self.model_defaults.is_empty()
80 && self.model_roles.is_empty()
81 && self.suppress.routes.is_empty()
82 && self.tier_defaults.default == default_mid()
83 }
84
85 pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
86 for (name, provider) in &overlay.providers {
87 match self.providers.get_mut(name) {
88 Some(existing) => existing.merge_from(provider),
89 None => {
90 self.providers.insert(name.clone(), provider.clone());
91 }
92 }
93 }
94 self.aliases.extend(overlay.aliases.clone());
95 self.alias_tool_calling
96 .extend(overlay.alias_tool_calling.clone());
97 self.models.extend(overlay.models.clone());
98 self.qc_defaults.extend(overlay.qc_defaults.clone());
99
100 if overlay.default_provider.is_some() {
101 self.default_provider = overlay.default_provider.clone();
102 }
103
104 if !overlay.inference_rules.is_empty() {
105 let mut merged = overlay.inference_rules.clone();
106 merged.extend(self.inference_rules.clone());
107 self.inference_rules = merged;
108 }
109
110 if !overlay.tier_rules.is_empty() {
111 let mut merged = overlay.tier_rules.clone();
112 merged.extend(self.tier_rules.clone());
113 self.tier_rules = merged;
114 }
115
116 if overlay.tier_defaults.default != default_mid() {
117 self.tier_defaults = overlay.tier_defaults.clone();
118 }
119
120 for (pattern, defaults) in &overlay.model_defaults {
121 self.model_defaults
122 .entry(pattern.clone())
123 .or_default()
124 .extend(defaults.clone());
125 }
126
127 for (role, defaults) in &overlay.model_roles {
128 self.model_roles
129 .entry(role.clone())
130 .or_default()
131 .extend(defaults.clone());
132 }
133
134 for route in &overlay.suppress.routes {
135 if !self.suppress.routes.contains(route) {
136 self.suppress.routes.push(route.clone());
137 }
138 }
139 }
140}
141
142#[derive(Debug, Clone)]
143pub struct ProviderDef {
144 pub display_name: Option<String>,
145 pub icon: Option<String>,
146 pub protocol: Option<String>,
150 pub base_url: String,
151 pub base_url_env: Option<String>,
152 pub auth_style: String,
153 pub auth_header: Option<String>,
154 pub auth_env: AuthEnv,
155 pub extra_headers: BTreeMap<String, String>,
156 pub chat_endpoint: String,
157 pub completion_endpoint: Option<String>,
158 pub command: Option<String>,
159 pub args: Vec<String>,
160 pub env: BTreeMap<String, String>,
161 pub cwd: Option<String>,
162 pub mcp_servers: Vec<serde_json::Value>,
163 pub healthcheck: Option<HealthcheckDef>,
164 pub local_runtime: Option<LocalRuntimeDef>,
168 pub features: Vec<String>,
169 pub fallback: Option<String>,
171 pub retry_count: Option<u32>,
173 pub retry_delay_ms: Option<u64>,
175 pub rpm: Option<u32>,
177 pub rate_limits: Option<RateLimitsDef>,
181 pub cost_per_1k_in: Option<f64>,
183 pub cost_per_1k_out: Option<f64>,
185 pub latency_p50_ms: Option<u64>,
187 pub performance: Option<ServingPerformanceDef>,
189 #[doc(hidden)]
190 pub auth_style_explicit: bool,
191}
192
193#[derive(Debug, Clone, Deserialize)]
194struct ProviderDefWire {
195 #[serde(default)]
196 display_name: Option<String>,
197 #[serde(default)]
198 icon: Option<String>,
199 #[serde(default)]
200 protocol: Option<String>,
201 #[serde(default)]
202 base_url: String,
203 #[serde(default)]
204 base_url_env: Option<String>,
205 #[serde(default)]
206 auth_style: Option<String>,
207 #[serde(default)]
208 auth_header: Option<String>,
209 #[serde(default)]
210 auth_env: AuthEnv,
211 #[serde(default)]
212 extra_headers: BTreeMap<String, String>,
213 #[serde(default)]
214 chat_endpoint: String,
215 #[serde(default)]
216 completion_endpoint: Option<String>,
217 #[serde(default)]
218 command: Option<String>,
219 #[serde(default)]
220 args: Vec<String>,
221 #[serde(default)]
222 env: BTreeMap<String, String>,
223 #[serde(default)]
224 cwd: Option<String>,
225 #[serde(default)]
226 mcp_servers: Vec<serde_json::Value>,
227 #[serde(default)]
228 healthcheck: Option<HealthcheckDef>,
229 #[serde(default)]
230 local_runtime: Option<LocalRuntimeDef>,
231 #[serde(default)]
232 features: Vec<String>,
233 #[serde(default)]
234 fallback: Option<String>,
235 #[serde(default)]
236 retry_count: Option<u32>,
237 #[serde(default)]
238 retry_delay_ms: Option<u64>,
239 #[serde(default)]
240 rpm: Option<u32>,
241 #[serde(default)]
242 rate_limits: Option<RateLimitsDef>,
243 #[serde(default)]
244 cost_per_1k_in: Option<f64>,
245 #[serde(default)]
246 cost_per_1k_out: Option<f64>,
247 #[serde(default)]
248 latency_p50_ms: Option<u64>,
249 #[serde(default)]
250 performance: Option<ServingPerformanceDef>,
251}
252
253impl<'de> Deserialize<'de> for ProviderDef {
254 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
255 where
256 D: serde::Deserializer<'de>,
257 {
258 let wire = ProviderDefWire::deserialize(deserializer)?;
259 let auth_style_explicit = wire.auth_style.is_some();
260 Ok(Self {
261 display_name: wire.display_name,
262 icon: wire.icon,
263 protocol: wire.protocol,
264 base_url: wire.base_url,
265 base_url_env: wire.base_url_env,
266 auth_style: wire.auth_style.unwrap_or_else(default_bearer),
267 auth_header: wire.auth_header,
268 auth_env: wire.auth_env,
269 extra_headers: wire.extra_headers,
270 chat_endpoint: wire.chat_endpoint,
271 completion_endpoint: wire.completion_endpoint,
272 command: wire.command,
273 args: wire.args,
274 env: wire.env,
275 cwd: wire.cwd,
276 mcp_servers: wire.mcp_servers,
277 healthcheck: wire.healthcheck,
278 local_runtime: wire.local_runtime,
279 features: wire.features,
280 fallback: wire.fallback,
281 retry_count: wire.retry_count,
282 retry_delay_ms: wire.retry_delay_ms,
283 rpm: wire.rpm,
284 rate_limits: wire.rate_limits,
285 cost_per_1k_in: wire.cost_per_1k_in,
286 cost_per_1k_out: wire.cost_per_1k_out,
287 latency_p50_ms: wire.latency_p50_ms,
288 performance: wire.performance,
289 auth_style_explicit,
290 })
291 }
292}
293
294impl Default for ProviderDef {
295 fn default() -> Self {
296 Self {
297 display_name: None,
298 icon: None,
299 protocol: None,
300 base_url: String::new(),
301 base_url_env: None,
302 auth_style: default_bearer(),
303 auth_header: None,
304 auth_env: AuthEnv::None,
305 extra_headers: BTreeMap::new(),
306 chat_endpoint: String::new(),
307 completion_endpoint: None,
308 command: None,
309 args: Vec::new(),
310 env: BTreeMap::new(),
311 cwd: None,
312 mcp_servers: Vec::new(),
313 healthcheck: None,
314 local_runtime: None,
315 features: Vec::new(),
316 fallback: None,
317 retry_count: None,
318 retry_delay_ms: None,
319 rpm: None,
320 rate_limits: None,
321 cost_per_1k_in: None,
322 cost_per_1k_out: None,
323 latency_p50_ms: None,
324 performance: None,
325 auth_style_explicit: false,
326 }
327 }
328}
329
330impl ProviderDef {
331 fn merge_from(&mut self, overlay: &ProviderDef) {
332 merge_option(&mut self.display_name, &overlay.display_name);
333 merge_option(&mut self.icon, &overlay.icon);
334 merge_option(&mut self.protocol, &overlay.protocol);
335 merge_string(&mut self.base_url, &overlay.base_url);
336 merge_option(&mut self.base_url_env, &overlay.base_url_env);
337 let overlay_uses_default_auth_style = overlay.auth_style == default_bearer();
338 if overlay.auth_style_explicit
339 || !overlay_uses_default_auth_style
340 || self.auth_style == default_bearer()
341 {
342 self.auth_style = overlay.auth_style.clone();
343 self.auth_style_explicit |=
344 overlay.auth_style_explicit || !overlay_uses_default_auth_style;
345 }
346 merge_option(&mut self.auth_header, &overlay.auth_header);
347 if !overlay.auth_env.is_none() {
348 self.auth_env = overlay.auth_env.clone();
349 }
350 self.extra_headers.extend(overlay.extra_headers.clone());
351 merge_string(&mut self.chat_endpoint, &overlay.chat_endpoint);
352 merge_option(&mut self.completion_endpoint, &overlay.completion_endpoint);
353 merge_option(&mut self.command, &overlay.command);
354 merge_vec(&mut self.args, &overlay.args);
355 self.env.extend(overlay.env.clone());
356 merge_option(&mut self.cwd, &overlay.cwd);
357 merge_vec(&mut self.mcp_servers, &overlay.mcp_servers);
358 merge_option(&mut self.healthcheck, &overlay.healthcheck);
359 merge_option(&mut self.local_runtime, &overlay.local_runtime);
360 merge_vec(&mut self.features, &overlay.features);
361 merge_option(&mut self.fallback, &overlay.fallback);
362 merge_option(&mut self.retry_count, &overlay.retry_count);
363 merge_option(&mut self.retry_delay_ms, &overlay.retry_delay_ms);
364 merge_option(&mut self.rpm, &overlay.rpm);
365 merge_option(&mut self.rate_limits, &overlay.rate_limits);
366 merge_option(&mut self.cost_per_1k_in, &overlay.cost_per_1k_in);
367 merge_option(&mut self.cost_per_1k_out, &overlay.cost_per_1k_out);
368 merge_option(&mut self.latency_p50_ms, &overlay.latency_p50_ms);
369 merge_option(&mut self.performance, &overlay.performance);
370 }
371}
372
373fn merge_option<T: Clone>(base: &mut Option<T>, overlay: &Option<T>) {
374 if overlay.is_some() {
375 *base = overlay.clone();
376 }
377}
378
379fn merge_string(base: &mut String, overlay: &str) {
380 if !overlay.is_empty() {
381 *base = overlay.to_string();
382 }
383}
384
385fn merge_vec<T: Clone>(base: &mut Vec<T>, overlay: &[T]) {
386 if !overlay.is_empty() {
387 *base = overlay.to_vec();
388 }
389}
390
391fn default_bearer() -> String {
392 "bearer".to_string()
393}
394
395#[derive(Debug, Clone, Deserialize, Default)]
398#[serde(untagged)]
399pub enum AuthEnv {
400 #[default]
401 None,
402 Single(String),
403 Multiple(Vec<String>),
404}
405
406impl AuthEnv {
407 fn is_none(&self) -> bool {
408 matches!(self, AuthEnv::None)
409 }
410}
411
412#[derive(Debug, Clone, Deserialize)]
413pub struct HealthcheckDef {
414 pub method: String,
415 #[serde(default)]
416 pub path: Option<String>,
417 #[serde(default)]
418 pub url: Option<String>,
419 #[serde(default)]
420 pub body: Option<String>,
421}
422
423#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
424pub struct LocalRuntimeDef {
425 #[serde(default, skip_serializing_if = "Option::is_none")]
428 pub kind: Option<String>,
429 #[serde(default, skip_serializing_if = "Option::is_none")]
431 pub command: Option<String>,
432 #[serde(default, skip_serializing_if = "Option::is_none")]
435 pub model_source: Option<String>,
436 #[serde(default, skip_serializing_if = "Option::is_none")]
438 pub model_source_env: Option<String>,
439 #[serde(default, skip_serializing_if = "Option::is_none")]
441 pub default_port: Option<u16>,
442 #[serde(default, skip_serializing_if = "Option::is_none")]
444 pub model_arg: Option<String>,
445 #[serde(default, skip_serializing_if = "Option::is_none")]
446 pub served_model_arg: Option<String>,
447 #[serde(default, skip_serializing_if = "Option::is_none")]
448 pub host_arg: Option<String>,
449 #[serde(default, skip_serializing_if = "Option::is_none")]
450 pub port_arg: Option<String>,
451 #[serde(default, skip_serializing_if = "Option::is_none")]
452 pub ctx_arg: Option<String>,
453 #[serde(default, skip_serializing_if = "Option::is_none")]
454 pub parallel_arg: Option<String>,
455 #[serde(default, skip_serializing_if = "Option::is_none")]
456 pub gpu_layers_arg: Option<String>,
457 #[serde(default, skip_serializing_if = "Option::is_none")]
458 pub cache_type_k_arg: Option<String>,
459 #[serde(default, skip_serializing_if = "Option::is_none")]
460 pub cache_type_v_arg: Option<String>,
461 #[serde(default, skip_serializing_if = "Option::is_none")]
462 pub cache_ram_arg: Option<String>,
463 #[serde(default, skip_serializing_if = "Vec::is_empty")]
465 pub default_args: Vec<String>,
466 #[serde(default, skip_serializing_if = "Option::is_none")]
468 pub stop: Option<String>,
469 #[serde(default, skip_serializing_if = "Option::is_none")]
471 pub source_url: Option<String>,
472 #[serde(default, skip_serializing_if = "Option::is_none")]
474 pub last_verified: Option<String>,
475 #[serde(default, skip_serializing_if = "Option::is_none")]
477 pub notes: Option<String>,
478}
479
480#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
481pub struct LocalMemoryDef {
482 #[serde(default, skip_serializing_if = "Option::is_none")]
484 pub measured_resident_gib: Option<f64>,
485 #[serde(default, skip_serializing_if = "Option::is_none")]
487 pub measured_context_window: Option<u64>,
488 #[serde(default, skip_serializing_if = "Option::is_none")]
490 pub measured_cache_type: Option<String>,
491 #[serde(default, skip_serializing_if = "Option::is_none")]
493 pub base_resident_gib: Option<f64>,
494 #[serde(default, skip_serializing_if = "Option::is_none")]
497 pub kv_cache_gib_per_1k_ctx: Option<f64>,
498 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
500 pub cache_type_multipliers: BTreeMap<String, f64>,
501 #[serde(default, skip_serializing_if = "Option::is_none")]
503 pub default_cache_type: Option<String>,
504 #[serde(default, skip_serializing_if = "Option::is_none")]
506 pub safety_margin_gib: Option<f64>,
507 #[serde(default, skip_serializing_if = "Option::is_none")]
509 pub max_recommended_context: Option<u64>,
510 #[serde(default, skip_serializing_if = "Option::is_none")]
512 pub source_url: Option<String>,
513 #[serde(default, skip_serializing_if = "Option::is_none")]
515 pub last_verified: Option<String>,
516 #[serde(default, skip_serializing_if = "Option::is_none")]
518 pub notes: Option<String>,
519}
520
521impl LocalMemoryDef {
522 pub fn is_empty(&self) -> bool {
523 self.measured_resident_gib.is_none()
524 && self.measured_context_window.is_none()
525 && self.measured_cache_type.is_none()
526 && self.base_resident_gib.is_none()
527 && self.kv_cache_gib_per_1k_ctx.is_none()
528 && self.cache_type_multipliers.is_empty()
529 && self.default_cache_type.is_none()
530 && self.safety_margin_gib.is_none()
531 && self.max_recommended_context.is_none()
532 && self.source_url.is_none()
533 && self.last_verified.is_none()
534 && self.notes.is_none()
535 }
536}
537
538#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
539pub struct AliasDef {
540 pub id: String,
541 pub provider: String,
542 #[serde(default)]
547 pub tool_format: Option<String>,
548}
549
550#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
551pub struct AliasToolCallingDef {
552 #[serde(default)]
553 #[serde(skip_serializing_if = "Option::is_none")]
554 pub native: Option<String>,
555 #[serde(default)]
556 #[serde(skip_serializing_if = "Option::is_none")]
557 pub text: Option<String>,
558 #[serde(default)]
559 #[serde(skip_serializing_if = "Option::is_none")]
560 pub streaming_native: Option<String>,
561 #[serde(default)]
562 #[serde(skip_serializing_if = "Option::is_none")]
563 pub fallback_mode: Option<String>,
564 #[serde(default)]
565 #[serde(skip_serializing_if = "Option::is_none")]
566 pub failure_reason: Option<String>,
567 #[serde(default)]
568 #[serde(skip_serializing_if = "Option::is_none")]
569 pub last_probe_at: Option<String>,
570}
571
572#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
573pub struct ModelPricing {
574 pub input_per_mtok: f64,
575 pub output_per_mtok: f64,
576 #[serde(default)]
577 pub cache_read_per_mtok: Option<f64>,
578 #[serde(default)]
579 pub cache_write_per_mtok: Option<f64>,
580}
581
582#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
585pub struct RateLimitsDef {
586 #[serde(default, skip_serializing_if = "Option::is_none")]
588 pub rpm: Option<u32>,
589 #[serde(default, skip_serializing_if = "Option::is_none")]
591 pub rph: Option<u32>,
592 #[serde(default, skip_serializing_if = "Option::is_none")]
594 pub rpd: Option<u32>,
595 #[serde(default, skip_serializing_if = "Option::is_none")]
597 pub tpm: Option<u64>,
598 #[serde(default, skip_serializing_if = "Option::is_none")]
600 pub tph: Option<u64>,
601 #[serde(default, skip_serializing_if = "Option::is_none")]
603 pub tpd: Option<u64>,
604 #[serde(default, skip_serializing_if = "Option::is_none")]
606 pub input_tpm: Option<u64>,
607 #[serde(default, skip_serializing_if = "Option::is_none")]
609 pub output_tpm: Option<u64>,
610 #[serde(default, skip_serializing_if = "Option::is_none")]
612 pub concurrency: Option<u32>,
613 #[serde(default, skip_serializing_if = "Option::is_none")]
615 pub tier: Option<String>,
616 #[serde(default, skip_serializing_if = "Option::is_none")]
618 pub source_url: Option<String>,
619 #[serde(default, skip_serializing_if = "Option::is_none")]
621 pub last_verified: Option<String>,
622 #[serde(default, skip_serializing_if = "Option::is_none")]
624 pub notes: Option<String>,
625}
626
627impl RateLimitsDef {
628 pub fn is_empty(&self) -> bool {
629 self.rpm.is_none()
630 && self.rph.is_none()
631 && self.rpd.is_none()
632 && self.tpm.is_none()
633 && self.tph.is_none()
634 && self.tpd.is_none()
635 && self.input_tpm.is_none()
636 && self.output_tpm.is_none()
637 && self.concurrency.is_none()
638 && self.tier.is_none()
639 && self.source_url.is_none()
640 && self.last_verified.is_none()
641 && self.notes.is_none()
642 }
643
644 pub fn with_rpm_fallback(mut self, rpm: Option<u32>) -> Option<Self> {
645 if self.rpm.is_none() {
646 self.rpm = rpm;
647 }
648 (!self.is_empty()).then_some(self)
649 }
650}
651
652#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
656pub struct ServingPerformanceDef {
657 #[serde(default, skip_serializing_if = "Option::is_none")]
659 pub observed_ttft_ms: Option<u64>,
660 #[serde(default, skip_serializing_if = "Option::is_none")]
662 pub output_tokens_per_sec: Option<f64>,
663 #[serde(default, skip_serializing_if = "Option::is_none")]
666 pub time_to_answer_s: Option<f64>,
667 #[serde(default, skip_serializing_if = "Option::is_none")]
670 pub source: Option<String>,
671 #[serde(default, skip_serializing_if = "Option::is_none")]
673 pub source_url: Option<String>,
674 #[serde(default, skip_serializing_if = "Option::is_none")]
676 pub last_verified: Option<String>,
677 #[serde(default, skip_serializing_if = "Option::is_none")]
679 pub sample_size: Option<u32>,
680 #[serde(default, skip_serializing_if = "Option::is_none")]
682 pub notes: Option<String>,
683}
684
685impl ServingPerformanceDef {
686 pub fn is_empty(&self) -> bool {
687 self.observed_ttft_ms.is_none()
688 && self.output_tokens_per_sec.is_none()
689 && self.time_to_answer_s.is_none()
690 && self.source.is_none()
691 && self.source_url.is_none()
692 && self.last_verified.is_none()
693 && self.sample_size.is_none()
694 && self.notes.is_none()
695 }
696}
697
698#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
702pub struct ModelArchitectureDef {
703 #[serde(default, skip_serializing_if = "Option::is_none")]
705 pub parameter_count_b: Option<f64>,
706 #[serde(default, skip_serializing_if = "Option::is_none")]
708 pub active_parameter_count_b: Option<f64>,
709 #[serde(default, skip_serializing_if = "Option::is_none")]
711 pub moe: Option<bool>,
712 #[serde(default, skip_serializing_if = "Option::is_none")]
714 pub quantization: Option<String>,
715 #[serde(default, skip_serializing_if = "Option::is_none")]
717 pub precision: Option<String>,
718 #[serde(default, skip_serializing_if = "Option::is_none")]
720 pub license: Option<String>,
721 #[serde(default, skip_serializing_if = "Option::is_none")]
723 pub tokenizer: Option<String>,
724 #[serde(default, skip_serializing_if = "Option::is_none")]
726 pub knowledge_cutoff: Option<String>,
727 #[serde(default, skip_serializing_if = "Option::is_none")]
729 pub source_url: Option<String>,
730 #[serde(default, skip_serializing_if = "Option::is_none")]
732 pub last_verified: Option<String>,
733}
734
735impl ModelArchitectureDef {
736 pub fn is_empty(&self) -> bool {
737 self.parameter_count_b.is_none()
738 && self.active_parameter_count_b.is_none()
739 && self.moe.is_none()
740 && self.quantization.is_none()
741 && self.precision.is_none()
742 && self.license.is_none()
743 && self.tokenizer.is_none()
744 && self.knowledge_cutoff.is_none()
745 && self.source_url.is_none()
746 && self.last_verified.is_none()
747 }
748}
749
750#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
761pub struct FastModeDef {
762 pub param: String,
765 pub value: String,
767 #[serde(default)]
770 pub beta_header: Option<String>,
771 #[serde(default)]
773 pub otps_speedup: Option<f64>,
774 #[serde(default)]
777 pub status: Option<String>,
778 #[serde(default)]
781 pub pricing: Option<ModelPricing>,
782 #[serde(default)]
784 pub note: Option<String>,
785}
786
787#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
788pub struct ModelDef {
789 pub name: String,
790 pub provider: String,
791 pub context_window: u64,
792 #[serde(default)]
795 pub logical_model: Option<String>,
796 #[serde(default)]
800 pub equivalence_group: Option<String>,
801 #[serde(default)]
804 pub served_variant: Option<String>,
805 #[serde(default)]
809 pub wire_model: Option<String>,
810 #[serde(default)]
813 pub api_dialect: Option<String>,
814 #[serde(default)]
816 pub rate_limits: Option<RateLimitsDef>,
817 #[serde(default)]
819 pub performance: Option<ServingPerformanceDef>,
820 #[serde(default)]
822 pub architecture: Option<ModelArchitectureDef>,
823 #[serde(default)]
825 pub local_memory: Option<LocalMemoryDef>,
826 #[serde(default)]
827 pub runtime_context_window: Option<u64>,
828 #[serde(default)]
829 pub stream_timeout: Option<f64>,
830 #[serde(default)]
831 pub capabilities: Vec<String>,
832 #[serde(default)]
833 pub pricing: Option<ModelPricing>,
834 #[serde(default)]
835 pub deprecated: bool,
836 #[serde(default)]
837 pub deprecation_note: Option<String>,
838 #[serde(default)]
846 pub superseded_by: Option<String>,
847 #[serde(default)]
851 pub fast_mode: Option<FastModeDef>,
852 #[serde(default)]
853 pub quality_tags: Vec<String>,
854 #[serde(default)]
860 pub availability: ModelAvailability,
861 #[serde(default)]
868 pub tier: Option<String>,
869 #[serde(default)]
874 pub open_weight: Option<bool>,
875 #[serde(default)]
880 pub strengths: Vec<String>,
881 #[serde(default)]
887 pub benchmarks: BTreeMap<String, f64>,
888 #[serde(default)]
893 pub family: Option<String>,
894 #[serde(default)]
896 pub lineage: Option<String>,
897 #[serde(default)]
899 pub complementary_with: Vec<String>,
900 #[serde(default)]
903 pub avoid_as_reviewer_for: Vec<String>,
904}
905
906#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Default)]
907#[serde(rename_all = "snake_case")]
908pub enum ModelAvailability {
909 #[default]
913 Serverless,
914 Dedicated,
918 Unknown,
922}
923
924impl ModelAvailability {
925 pub fn as_str(self) -> &'static str {
926 match self {
927 Self::Serverless => "serverless",
928 Self::Dedicated => "dedicated",
929 Self::Unknown => "unknown",
930 }
931 }
932
933 pub fn parse(value: &str) -> Option<Self> {
934 match value {
935 "serverless" => Some(Self::Serverless),
936 "dedicated" => Some(Self::Dedicated),
937 "unknown" => Some(Self::Unknown),
938 _ => None,
939 }
940 }
941}
942
943#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
944pub struct ResolvedModel {
945 pub id: String,
946 pub provider: String,
947 pub alias: Option<String>,
948 pub tool_format: String,
949 pub tier: String,
950 pub family: String,
951 pub lineage: String,
952}
953
954#[derive(Debug, Clone, PartialEq)]
955pub struct ComplementaryReviewerOptions {
956 pub author_model: String,
957 pub author_provider: Option<String>,
958 pub intent: ComplementaryReviewerIntent,
959 pub max_price_multiplier: Option<f64>,
960}
961
962#[derive(Debug, Clone, Copy, PartialEq, Eq)]
963pub enum ComplementaryReviewerIntent {
964 Review,
965 Critique,
966 PlanReview,
967}
968
969impl ComplementaryReviewerIntent {
970 pub fn parse(value: &str) -> Option<Self> {
971 match value {
972 "review" => Some(Self::Review),
973 "critique" => Some(Self::Critique),
974 "plan_review" => Some(Self::PlanReview),
975 _ => None,
976 }
977 }
978
979 pub fn as_str(self) -> &'static str {
980 match self {
981 Self::Review => "review",
982 Self::Critique => "critique",
983 Self::PlanReview => "plan_review",
984 }
985 }
986}
987
988#[derive(Debug, Clone, Serialize, PartialEq)]
989pub struct ComplementaryReviewerSelection {
990 pub intent: String,
991 pub author: ComplementaryModelIdentity,
992 pub reviewer: ComplementaryModelIdentity,
993 pub fallback: bool,
994 pub fallback_reason: Option<String>,
995 #[serde(skip_serializing_if = "Option::is_none")]
1001 pub fallback_code: Option<String>,
1002 pub reason: String,
1003 pub estimated_incremental_cost: Option<ComplementaryCostEstimate>,
1004}
1005
1006#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1011pub enum ReviewerFallbackCode {
1012 UnknownAuthorFamily,
1015 NoDiffFamilyWithinPrice,
1017 NoDiffFamilyServerless,
1019 AllDiffFamilyExcluded,
1022}
1023
1024impl ReviewerFallbackCode {
1025 pub fn as_code(self) -> &'static str {
1026 match self {
1027 Self::UnknownAuthorFamily => "unknown_author_family",
1028 Self::NoDiffFamilyWithinPrice => "no_diff_family_within_price",
1029 Self::NoDiffFamilyServerless => "no_diff_family_serverless",
1030 Self::AllDiffFamilyExcluded => "all_diff_family_excluded",
1031 }
1032 }
1033}
1034
1035#[derive(Debug, Clone, Serialize, PartialEq)]
1036pub struct ComplementaryModelIdentity {
1037 pub id: String,
1038 pub provider: String,
1039 pub family: String,
1040 pub lineage: String,
1041 pub tier: String,
1042 #[serde(skip_serializing_if = "Option::is_none")]
1043 pub pricing: Option<ModelPricing>,
1044}
1045
1046#[derive(Debug, Clone, Serialize, PartialEq)]
1047pub struct ComplementaryCostEstimate {
1048 pub input_per_mtok: f64,
1049 pub output_per_mtok: f64,
1050 pub total_per_mtok: f64,
1051 #[serde(skip_serializing_if = "Option::is_none")]
1052 pub multiplier_vs_author: Option<f64>,
1053}
1054
1055#[derive(Debug, Clone, Deserialize)]
1056pub struct InferenceRule {
1057 #[serde(default)]
1058 pub pattern: Option<String>,
1059 #[serde(default)]
1060 pub contains: Option<String>,
1061 #[serde(default)]
1062 pub exact: Option<String>,
1063 pub provider: String,
1064}
1065
1066#[derive(Debug, Clone, Deserialize)]
1067pub struct TierRule {
1068 #[serde(default)]
1069 pub pattern: Option<String>,
1070 #[serde(default)]
1071 pub contains: Option<String>,
1072 #[serde(default)]
1073 pub exact: Option<String>,
1074 pub tier: String,
1075}
1076
1077#[derive(Debug, Clone, Deserialize)]
1078pub struct TierDefaults {
1079 #[serde(default = "default_mid")]
1080 pub default: String,
1081}
1082
1083impl Default for TierDefaults {
1084 fn default() -> Self {
1085 Self {
1086 default: default_mid(),
1087 }
1088 }
1089}
1090
1091fn default_mid() -> String {
1092 "mid".to_string()
1093}
1094
1095pub fn load_config() -> &'static ProvidersConfig {
1097 CONFIG.get_or_init(|| {
1098 let mut config = default_config();
1099 let verbose_config_logging = matches!(
1100 std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
1101 Some("1" | "true" | "TRUE" | "yes" | "YES")
1102 ) || matches!(
1103 std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
1104 Some("1" | "true" | "TRUE" | "yes" | "YES")
1105 );
1106 if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
1107 if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
1108 config.merge_from(&overlay);
1109 let _ = CONFIG_PATH.set(path);
1110 return config;
1111 }
1112 }
1113 if should_load_home_config() {
1114 if let Some(home) = dirs_or_home() {
1115 let path = format!("{home}/.config/harn/providers.toml");
1116 if let Some(overlay) = read_external_config(&path, false) {
1117 config.merge_from(&overlay);
1118 let _ = CONFIG_PATH.set(path);
1119 return config;
1120 }
1121 }
1122 }
1123 config
1124 })
1125}
1126
1127fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
1128 match std::fs::read_to_string(path) {
1129 Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
1130 Ok(config) => {
1131 if verbose {
1132 eprintln!(
1133 "[llm_config] Loaded {} providers, {} aliases from {}",
1134 config.providers.len(),
1135 config.aliases.len(),
1136 path
1137 );
1138 }
1139 Some(config)
1140 }
1141 Err(error) => {
1142 eprintln!("[llm_config] TOML parse error in {path}: {error}");
1143 None
1144 }
1145 },
1146 Err(error) => {
1147 if verbose {
1148 eprintln!("[llm_config] Cannot read {path}: {error}");
1149 }
1150 None
1151 }
1152 }
1153}
1154
1155fn should_load_home_config() -> bool {
1156 !cfg!(test)
1159}
1160
1161pub fn parse_config_toml(src: &str) -> Result<ProvidersConfig, toml::de::Error> {
1164 toml::from_str::<ProvidersConfig>(src)
1165}
1166
1167pub fn loaded_config_path() -> Option<std::path::PathBuf> {
1170 let _ = load_config();
1172 CONFIG_PATH.get().map(std::path::PathBuf::from)
1173}
1174
1175pub fn set_user_overrides(config: Option<ProvidersConfig>) {
1179 USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
1180}
1181
1182pub fn clear_user_overrides() {
1184 set_user_overrides(None);
1185}
1186
1187pub fn set_runtime_catalog_overlay(config: Option<ProvidersConfig>) {
1192 *runtime_catalog_overlay()
1193 .write()
1194 .expect("runtime catalog overlay poisoned") = config;
1195}
1196
1197pub fn clear_runtime_catalog_overlay() {
1198 set_runtime_catalog_overlay(None);
1199}
1200
1201pub(crate) fn effective_config() -> ProvidersConfig {
1202 let user_overrides = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1203 effective_config_with_user_overrides(user_overrides.as_ref())
1204}
1205
1206pub fn embedded_config(explicit_overlay: Option<&ProvidersConfig>) -> ProvidersConfig {
1224 let mut config = default_config();
1225 if let Some(overlay) = explicit_overlay {
1226 config.merge_from(overlay);
1227 }
1228 config
1229}
1230
1231pub(crate) fn effective_config_with_user_overrides(
1232 user_overrides: Option<&ProvidersConfig>,
1233) -> ProvidersConfig {
1234 let mut merged = load_config().clone();
1235 if let Some(overlay) = runtime_catalog_overlay()
1236 .read()
1237 .expect("runtime catalog overlay poisoned")
1238 .as_ref()
1239 {
1240 merged.merge_from(overlay);
1241 }
1242 if let Some(overlay) = user_overrides {
1243 merged.merge_from(overlay);
1244 }
1245 merged
1246}
1247
1248fn runtime_catalog_overlay() -> &'static RwLock<Option<ProvidersConfig>> {
1249 RUNTIME_CATALOG_OVERLAY.get_or_init(|| RwLock::new(None))
1250}
1251
1252pub fn resolve_model(alias: &str) -> (String, Option<String>) {
1254 let config = effective_config();
1255 if let Some(a) = config.aliases.get(alias) {
1256 return (a.id.clone(), Some(a.provider.clone()));
1257 }
1258 (normalize_model_id(alias), None)
1259}
1260
1261pub fn normalize_model_id(raw: &str) -> String {
1268 for prefix in PROVIDER_SELECTOR_PREFIXES {
1269 if let Some(stripped) = raw.strip_prefix(prefix) {
1270 return stripped.to_string();
1271 }
1272 }
1273 raw.to_string()
1274}
1275
1276const PROVIDER_SELECTOR_PREFIXES: &[&str] =
1277 &["ollama:", "local:", "huggingface:", "hf:", "cerebras/"];
1278
1279pub fn resolve_model_info(selector: &str) -> ResolvedModel {
1282 let config = effective_config();
1283 if let Some(alias) = config.aliases.get(selector) {
1284 let id = alias.id.clone();
1285 let provider = alias.provider.clone();
1286 let requested = alias
1287 .tool_format
1288 .clone()
1289 .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
1290 let tool_format = guard_tool_format(&provider, &id, &requested, Some(selector));
1291 return ResolvedModel {
1292 tier: model_tier_with_config(&config, &id),
1293 family: model_family_with_config(&config, &provider, &id),
1294 lineage: model_lineage_with_config(&config, &provider, &id),
1295 id,
1296 provider,
1297 alias: Some(selector.to_string()),
1298 tool_format,
1299 };
1300 }
1301
1302 let id = normalize_model_id(selector);
1303 let inference = infer_provider_with_config(&config, selector);
1304 let source = inference.source;
1305 let provider = inference.provider;
1306 let requested = default_tool_format_with_config(&config, &id, &provider);
1307 let tool_format = guard_tool_format(&provider, &id, &requested, None);
1308 let tier = model_tier_with_config(&config, &id);
1309 let family = model_family_with_inference_source(&config, &provider, &id, source);
1310 let lineage = model_lineage_with_inference_source(&config, &provider, &id, source);
1311 ResolvedModel {
1312 id,
1313 provider,
1314 alias: None,
1315 tool_format,
1316 tier,
1317 family,
1318 lineage,
1319 }
1320}
1321
1322fn guard_tool_format(provider: &str, model: &str, requested: &str, alias: Option<&str>) -> String {
1329 let decision = crate::llm::capabilities::validate_tool_format(provider, model, requested);
1330 if let Some(reason) = &decision.correction {
1331 tracing::warn!(
1332 target: "harn::llm::tool_format",
1333 alias = alias.unwrap_or(""),
1334 "{reason}"
1335 );
1336 }
1337 decision.effective
1338}
1339
1340pub fn infer_provider(model_id: &str) -> String {
1342 infer_provider_detail(model_id).provider
1343}
1344
1345pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
1347 let config = effective_config();
1348 infer_provider_with_config(&config, model_id)
1349}
1350
1351fn infer_provider_with_config(
1352 config: &ProvidersConfig,
1353 model_id: &str,
1354) -> crate::llm::provider::ProviderInference {
1355 if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
1356 return crate::llm::provider::ProviderInference::builtin("ollama");
1357 }
1358 if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
1359 return crate::llm::provider::ProviderInference::builtin("huggingface");
1360 }
1361 let normalized_id = normalize_model_id(model_id);
1367 if let Some(model) = config
1368 .models
1369 .get(model_id)
1370 .or_else(|| config.models.get(&normalized_id))
1371 {
1372 return crate::llm::provider::ProviderInference::builtin(model.provider.clone());
1373 }
1374 for rule in &config.inference_rules {
1375 if let Some(exact) = &rule.exact {
1376 if model_id == exact {
1377 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1378 }
1379 }
1380 if let Some(pattern) = &rule.pattern {
1381 if glob_match(pattern, model_id) {
1382 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1383 }
1384 }
1385 if let Some(substr) = &rule.contains {
1386 if model_id.contains(substr.as_str()) {
1387 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1388 }
1389 }
1390 }
1391 crate::llm::provider::infer_provider_from_model_id(
1392 model_id,
1393 &default_provider_with_config(config),
1394 )
1395}
1396
1397pub fn default_provider() -> String {
1398 let config = effective_config();
1399 default_provider_with_config(&config)
1400}
1401
1402fn default_provider_with_config(config: &ProvidersConfig) -> String {
1403 std::env::var("HARN_DEFAULT_PROVIDER")
1404 .ok()
1405 .map(|value| value.trim().to_string())
1406 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1407 .or_else(|| {
1408 config
1409 .default_provider
1410 .as_deref()
1411 .map(str::trim)
1412 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1413 .map(str::to_string)
1414 })
1415 .unwrap_or_else(|| auto_select_provider(config))
1416}
1417
1418const FALLBACK_PROVIDER: &str = "anthropic";
1424
1425static AUTO_PROVIDER_WARNED: AtomicBool = AtomicBool::new(false);
1426
1427fn provider_has_credentials(def: &ProviderDef) -> bool {
1429 auth_env_names(&def.auth_env)
1430 .iter()
1431 .any(|name| std::env::var(name).is_ok_and(|value| !value.trim().is_empty()))
1432}
1433
1434fn provider_is_local(def: &ProviderDef) -> bool {
1437 def.local_runtime.is_some() || matches!(def.auth_env, AuthEnv::None)
1438}
1439
1440fn warn_auto_provider_once(message: &str) {
1442 if !AUTO_PROVIDER_WARNED.swap(true, Ordering::Relaxed) {
1443 crate::events::log_warn("llm_config", message);
1444 }
1445}
1446
1447fn auto_select_provider(config: &ProvidersConfig) -> String {
1453 const PREFERRED: &[&str] = &[
1456 "anthropic",
1457 "openai",
1458 "google",
1459 "azure-openai",
1460 "groq",
1461 "mistral",
1462 "deepseek",
1463 "xai",
1464 "openrouter",
1465 ];
1466 for name in PREFERRED {
1467 if config
1468 .providers
1469 .get(*name)
1470 .is_some_and(provider_has_credentials)
1471 {
1472 if *name != FALLBACK_PROVIDER {
1473 warn_auto_provider_once(&format!(
1474 "no default provider configured; using '{name}' (its API key is set). \
1475 Set HARN_DEFAULT_PROVIDER or `default_provider` to silence this."
1476 ));
1477 }
1478 return (*name).to_string();
1479 }
1480 }
1481 for (name, def) in &config.providers {
1482 if provider_has_credentials(def) {
1483 warn_auto_provider_once(&format!(
1484 "no default provider configured; using '{name}' (its API key is set). \
1485 Set HARN_DEFAULT_PROVIDER or `default_provider` to silence this."
1486 ));
1487 return name.clone();
1488 }
1489 }
1490 for (name, def) in &config.providers {
1492 if provider_is_local(def) {
1493 warn_auto_provider_once(&format!(
1494 "no provider API keys found; using local provider '{name}'. \
1495 Set an API key + HARN_DEFAULT_PROVIDER to use a cloud provider."
1496 ));
1497 return name.clone();
1498 }
1499 }
1500 warn_auto_provider_once(&format!(
1502 "no LLM provider configured and no API keys detected; defaulting to \
1503 '{FALLBACK_PROVIDER}'. Set ANTHROPIC_API_KEY (or another provider's key plus \
1504 HARN_DEFAULT_PROVIDER), or run a local model with `harn local launch`."
1505 ));
1506 FALLBACK_PROVIDER.to_string()
1507}
1508
1509pub fn model_tier(model_id: &str) -> String {
1511 let config = effective_config();
1512 model_tier_with_config(&config, model_id)
1513}
1514
1515pub(crate) fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
1516 if let Some(model) = config.models.get(model_id) {
1518 if let Some(tier) = model.tier.as_deref() {
1519 let trimmed = tier.trim();
1520 if !trimmed.is_empty() {
1521 return trimmed.to_string();
1522 }
1523 }
1524 }
1525 for rule in &config.tier_rules {
1529 if let Some(exact) = &rule.exact {
1530 if model_id == exact {
1531 return rule.tier.clone();
1532 }
1533 }
1534 if let Some(pattern) = &rule.pattern {
1535 if glob_match(pattern, model_id) {
1536 return rule.tier.clone();
1537 }
1538 }
1539 if let Some(substr) = &rule.contains {
1540 if model_id.contains(substr.as_str()) {
1541 return rule.tier.clone();
1542 }
1543 }
1544 }
1545 config.tier_defaults.default.clone()
1546}
1547
1548pub fn model_family(provider: &str, model_id: &str) -> String {
1550 let config = effective_config();
1551 model_family_with_config(&config, provider, model_id)
1552}
1553
1554pub(crate) fn model_family_with_config(
1555 config: &ProvidersConfig,
1556 provider: &str,
1557 model_id: &str,
1558) -> String {
1559 catalog_family_token(config, model_id)
1560 .unwrap_or_else(|| derive_model_family(provider, model_id))
1561}
1562
1563fn model_family_with_inference_source(
1564 config: &ProvidersConfig,
1565 provider: &str,
1566 model_id: &str,
1567 source: crate::llm::provider::ProviderInferenceSource,
1568) -> String {
1569 if let Some(family) = catalog_family_token(config, model_id) {
1570 return family;
1571 }
1572 let id_family = derive_model_family("", model_id);
1573 if id_family != "unknown" {
1574 return id_family;
1575 }
1576 if matches!(
1577 source,
1578 crate::llm::provider::ProviderInferenceSource::DefaultFallback
1579 ) {
1580 return "unknown".to_string();
1581 }
1582 derive_model_family(provider, model_id)
1583}
1584
1585pub fn model_lineage(provider: &str, model_id: &str) -> String {
1587 let config = effective_config();
1588 model_lineage_with_config(&config, provider, model_id)
1589}
1590
1591pub(crate) fn model_lineage_with_config(
1592 config: &ProvidersConfig,
1593 provider: &str,
1594 model_id: &str,
1595) -> String {
1596 catalog_lineage_token(config, model_id)
1597 .unwrap_or_else(|| derive_model_lineage(provider, model_id))
1598}
1599
1600fn model_lineage_with_inference_source(
1601 config: &ProvidersConfig,
1602 provider: &str,
1603 model_id: &str,
1604 source: crate::llm::provider::ProviderInferenceSource,
1605) -> String {
1606 if let Some(lineage) = catalog_lineage_token(config, model_id) {
1607 return lineage;
1608 }
1609 let id_lineage = derive_model_lineage("", model_id);
1610 if id_lineage != "unknown" {
1611 return id_lineage;
1612 }
1613 if matches!(
1614 source,
1615 crate::llm::provider::ProviderInferenceSource::DefaultFallback
1616 ) {
1617 return "unknown".to_string();
1618 }
1619 derive_model_lineage(provider, model_id)
1620}
1621
1622fn catalog_family_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1623 config
1624 .models
1625 .get(model_id)
1626 .and_then(|model| normalized_catalog_token(model.family.as_deref()))
1627}
1628
1629fn catalog_lineage_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1630 config
1631 .models
1632 .get(model_id)
1633 .and_then(|model| normalized_catalog_token(model.lineage.as_deref()))
1634}
1635
1636fn normalized_catalog_token(value: Option<&str>) -> Option<String> {
1637 value
1638 .map(str::trim)
1639 .filter(|value| !value.is_empty())
1640 .map(|value| value.to_ascii_lowercase().replace('_', "-"))
1641}
1642
1643fn derive_model_family(provider: &str, model_id: &str) -> String {
1644 let id = model_id.to_ascii_lowercase();
1645 if contains_any(&id, &["claude", "anthropic.claude"]) {
1646 return "anthropic-claude".to_string();
1647 }
1648 if contains_any(&id, &["gemini", "google/gemini"]) {
1649 return "google-gemini".to_string();
1650 }
1651 if contains_any(&id, &["deepseek"]) {
1652 return "deepseek".to_string();
1653 }
1654 if contains_any(&id, &["qwen"]) {
1655 return "qwen".to_string();
1656 }
1657 if contains_any(&id, &["kimi", "moonshot"]) {
1658 return "kimi".to_string();
1659 }
1660 if contains_any(&id, &["glm", "z-ai/glm", "zhipu"]) {
1661 return "glm".to_string();
1662 }
1663 if contains_any(&id, &["mistral", "mixtral", "devstral"]) {
1664 return "mistral".to_string();
1665 }
1666 if contains_any(&id, &["minimax"]) {
1667 return "minimax".to_string();
1668 }
1669 if contains_any(&id, &["llama"]) {
1670 return "llama".to_string();
1671 }
1672 if contains_any(&id, &["gemma"]) {
1673 return "gemma".to_string();
1674 }
1675 if is_openai_reasoning_model(&id) {
1676 return "openai-reasoning".to_string();
1677 }
1678 if contains_any(&id, &["gpt-oss", "openai/gpt", "gpt-"]) {
1679 return "openai-gpt".to_string();
1680 }
1681 match provider {
1682 "anthropic" | "bedrock" | "vertex-anthropic" => "anthropic-claude".to_string(),
1683 "openai" | "azure" | "azure_openai" => "openai-gpt".to_string(),
1684 "gemini" | "vertex" | "google" => "google-gemini".to_string(),
1685 "deepseek" => "deepseek".to_string(),
1686 "zai" => "glm".to_string(),
1687 "minimax" => "minimax".to_string(),
1688 other if !other.is_empty() => normalize_identifier_token(other),
1689 _ => "unknown".to_string(),
1690 }
1691}
1692
1693fn derive_model_lineage(provider: &str, model_id: &str) -> String {
1694 let id = model_id.to_ascii_lowercase();
1695 if contains_any(&id, &["haiku"]) {
1696 return "claude-haiku".to_string();
1697 }
1698 if contains_any(&id, &["opus-4-7", "opus-4-8", "opus-mythos"]) {
1699 return "claude-opus-adaptive".to_string();
1700 }
1701 if contains_any(&id, &["claude"]) {
1702 return "claude-sonnet-opus".to_string();
1703 }
1704 if contains_any(&id, &["gpt-5"]) {
1705 return "openai-gpt5".to_string();
1706 }
1707 if is_openai_reasoning_model(&id) {
1708 return "openai-reasoning".to_string();
1709 }
1710 if contains_any(&id, &["gpt-", "gpt_"]) {
1711 return "openai-legacy".to_string();
1712 }
1713 if contains_any(&id, &["gemini"]) {
1714 if contains_any(&id, &["flash"]) {
1715 return "gemini-flash".to_string();
1716 }
1717 return "gemini-pro".to_string();
1718 }
1719 if contains_any(&id, &["qwen3", "qwen/qwen3"]) {
1720 return "qwen3".to_string();
1721 }
1722 if contains_any(&id, &["gemma4", "gemma-4"]) {
1723 return "gemma4".to_string();
1724 }
1725 let family = derive_model_family(provider, model_id);
1726 if family == "unknown" {
1727 "unknown".to_string()
1728 } else {
1729 family
1730 }
1731}
1732
1733fn contains_any(haystack: &str, needles: &[&str]) -> bool {
1734 needles.iter().any(|needle| haystack.contains(needle))
1735}
1736
1737fn starts_with_any(haystack: &str, prefixes: &[&str]) -> bool {
1738 prefixes.iter().any(|prefix| haystack.starts_with(prefix))
1739}
1740
1741fn is_openai_reasoning_model(id: &str) -> bool {
1742 starts_with_any(id, &["o1", "o3", "o4"])
1743 || contains_any(
1744 id,
1745 &[
1746 "/o1", "/o3", "/o4", ":o1", ":o3", ":o4", ".o1", ".o3", ".o4",
1747 ],
1748 )
1749}
1750
1751fn normalize_identifier_token(value: &str) -> String {
1752 value
1753 .trim()
1754 .to_ascii_lowercase()
1755 .chars()
1756 .map(|ch| {
1757 if ch.is_ascii_alphanumeric() || ch == '-' {
1758 ch
1759 } else {
1760 '-'
1761 }
1762 })
1763 .collect::<String>()
1764 .split('-')
1765 .filter(|part| !part.is_empty())
1766 .collect::<Vec<_>>()
1767 .join("-")
1768}
1769
1770pub fn provider_config(name: &str) -> Option<ProviderDef> {
1772 effective_config().providers.get(name).cloned()
1773}
1774
1775pub fn provider_protocol(name: &str) -> Option<String> {
1776 provider_config(name).and_then(|def| def.protocol)
1777}
1778
1779pub fn provider_uses_acp(name: &str) -> bool {
1780 provider_protocol(name)
1781 .as_deref()
1782 .is_some_and(|protocol| protocol.eq_ignore_ascii_case("acp"))
1783}
1784
1785pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
1788 let config = effective_config();
1789 let mut params = BTreeMap::new();
1790 for (pattern, defaults) in &config.model_defaults {
1791 if glob_match(pattern, model_id) {
1792 for (k, v) in defaults {
1793 params.insert(k.clone(), v.clone());
1794 }
1795 }
1796 }
1797 params
1798}
1799
1800pub fn model_role_defaults(role: &str) -> BTreeMap<String, toml::Value> {
1814 let normalized = normalize_model_role_name(role);
1815 if normalized.is_empty() {
1816 return BTreeMap::new();
1817 }
1818 let config = effective_config();
1819 let mut params = BTreeMap::new();
1820 for key in role_lookup_keys(&normalized) {
1821 extend_model_role_defaults(&config, &key, &mut params);
1822 }
1823 apply_model_role_env_overrides(&normalized, &mut params);
1824 params
1825}
1826
1827fn extend_model_role_defaults(
1828 config: &ProvidersConfig,
1829 role: &str,
1830 params: &mut BTreeMap<String, toml::Value>,
1831) {
1832 for (configured_role, defaults) in &config.model_roles {
1833 if normalize_model_role_name(configured_role) == role {
1834 params.extend(defaults.clone());
1835 }
1836 }
1837 if let Some(defaults) = config.model_roles.get(role) {
1838 params.extend(defaults.clone());
1839 }
1840}
1841
1842fn normalize_model_role_name(role: &str) -> String {
1843 role.trim().to_ascii_lowercase().replace('-', "_")
1844}
1845
1846fn role_lookup_keys(role: &str) -> Vec<String> {
1847 if role == "merge" {
1848 vec!["fast_apply".to_string(), "merge".to_string()]
1849 } else if role == "fast_apply" {
1850 vec!["merge".to_string(), "fast_apply".to_string()]
1851 } else {
1852 vec![role.to_string()]
1853 }
1854}
1855
1856fn role_env_token(role: &str) -> String {
1857 role.chars()
1858 .map(|ch| {
1859 if ch.is_ascii_alphanumeric() {
1860 ch.to_ascii_uppercase()
1861 } else {
1862 '_'
1863 }
1864 })
1865 .collect::<String>()
1866 .split('_')
1867 .filter(|part| !part.is_empty())
1868 .collect::<Vec<_>>()
1869 .join("_")
1870}
1871
1872fn apply_model_role_env_overrides(role: &str, params: &mut BTreeMap<String, toml::Value>) {
1873 for alias in role_env_aliases(role) {
1874 apply_model_role_env_var(&format!("HARN_LLM_{alias}_PROVIDER"), "provider", params);
1875 apply_model_role_env_var(&format!("HARN_LLM_{alias}_MODEL"), "model", params);
1876 apply_model_role_env_var(
1877 &format!("HARN_LLM_{alias}_ROUTE_POLICY"),
1878 "route_policy",
1879 params,
1880 );
1881 apply_model_role_env_var(
1882 &format!("HARN_LLM_ROLE_{alias}_PROVIDER"),
1883 "provider",
1884 params,
1885 );
1886 apply_model_role_env_var(&format!("HARN_LLM_ROLE_{alias}_MODEL"), "model", params);
1887 apply_model_role_env_var(
1888 &format!("HARN_LLM_ROLE_{alias}_ROUTE_POLICY"),
1889 "route_policy",
1890 params,
1891 );
1892 }
1893}
1894
1895fn role_env_aliases(role: &str) -> Vec<String> {
1896 let token = role_env_token(role);
1897 if token.is_empty() {
1898 return Vec::new();
1899 }
1900 if token == "MERGE" {
1901 vec!["FAST_APPLY".to_string(), "MERGE".to_string()]
1902 } else if token == "FAST_APPLY" {
1903 vec!["MERGE".to_string(), "FAST_APPLY".to_string()]
1904 } else {
1905 vec![token]
1906 }
1907}
1908
1909fn apply_model_role_env_var(
1910 env_name: &str,
1911 option_name: &str,
1912 params: &mut BTreeMap<String, toml::Value>,
1913) {
1914 let Ok(value) = std::env::var(env_name) else {
1915 return;
1916 };
1917 let trimmed = value.trim();
1918 if trimmed.is_empty() {
1919 return;
1920 }
1921 params.insert(
1922 option_name.to_string(),
1923 toml::Value::String(trimmed.to_string()),
1924 );
1925}
1926
1927pub fn provider_names() -> Vec<String> {
1929 effective_config().providers.keys().cloned().collect()
1930}
1931
1932pub fn known_model_names() -> Vec<String> {
1934 effective_config().aliases.keys().cloned().collect()
1935}
1936
1937pub fn alias_entries() -> Vec<(String, AliasDef)> {
1938 effective_config().aliases.into_iter().collect()
1939}
1940
1941pub fn alias_tool_calling_entry(alias: &str) -> Option<AliasToolCallingDef> {
1942 effective_config().alias_tool_calling.get(alias).cloned()
1943}
1944
1945pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
1947 let config = effective_config();
1948 model_catalog_entries_with_config(&config)
1949}
1950
1951pub(crate) fn model_catalog_entries_with_config(
1952 config: &ProvidersConfig,
1953) -> Vec<(String, ModelDef)> {
1954 sorted_model_entries_with_config(config)
1955 .into_iter()
1956 .map(|(id, model)| {
1957 let provider = model.provider.clone();
1958 (
1959 id.clone(),
1960 with_effective_capability_tags(id, provider, model),
1961 )
1962 })
1963 .collect()
1964}
1965
1966pub(crate) fn sorted_model_entries_with_config(
1967 config: &ProvidersConfig,
1968) -> Vec<(String, ModelDef)> {
1969 let mut entries: Vec<_> = config
1970 .models
1971 .iter()
1972 .map(|(id, model)| (id.clone(), model.clone()))
1973 .collect();
1974 entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
1975 model_a
1976 .provider
1977 .cmp(&model_b.provider)
1978 .then_with(|| id_a.cmp(id_b))
1979 });
1980 entries
1981}
1982
1983pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
1984 effective_config()
1985 .models
1986 .get(model_id)
1987 .cloned()
1988 .map(|model| {
1989 let provider = model.provider.clone();
1990 with_effective_capability_tags(model_id.to_string(), provider, model)
1991 })
1992}
1993
1994pub fn model_rate_limits(model_id: &str) -> Option<RateLimitsDef> {
1995 model_catalog_entry(model_id).and_then(|model| model.rate_limits)
1996}
1997
1998pub fn wire_model_id(model_id: &str) -> String {
1999 model_catalog_entry(model_id)
2000 .and_then(|model| model.wire_model)
2001 .unwrap_or_else(|| model_id.to_string())
2002}
2003
2004pub fn provider_rate_limits(provider: &str) -> Option<RateLimitsDef> {
2005 provider_config(provider).and_then(|provider| {
2006 provider
2007 .rate_limits
2008 .unwrap_or_default()
2009 .with_rpm_fallback(provider.rpm)
2010 })
2011}
2012
2013pub fn model_equivalence_group(model_id: &str) -> Option<String> {
2014 model_catalog_entry(model_id).and_then(|model| {
2015 model
2016 .equivalence_group
2017 .or(model.logical_model)
2018 .filter(|group| !group.trim().is_empty())
2019 })
2020}
2021
2022pub fn equivalent_model_catalog_entries(selector: &str) -> Vec<(String, ModelDef)> {
2026 let resolved = resolve_model_info(selector);
2027 let Some(group) = model_equivalence_group(&resolved.id) else {
2028 return Vec::new();
2029 };
2030 let config = effective_config();
2031 let Some(source) = config.models.get(&resolved.id) else {
2032 return Vec::new();
2033 };
2034 let source_caps = crate::llm::capabilities::lookup(&source.provider, &resolved.id);
2035 let source_context = source
2036 .runtime_context_window
2037 .unwrap_or(source.context_window);
2038
2039 sorted_model_entries_with_config(&config)
2040 .into_iter()
2041 .filter(|(id, model)| !(id == &resolved.id && model.provider == resolved.provider))
2042 .filter(|(_, model)| !model.deprecated)
2043 .filter(|(_, model)| model.availability != ModelAvailability::Dedicated)
2044 .filter(|(_, model)| {
2045 model.equivalence_group.as_deref() == Some(group.as_str())
2046 || model.logical_model.as_deref() == Some(group.as_str())
2047 })
2048 .filter(|(id, model)| {
2049 let caps = crate::llm::capabilities::lookup(&model.provider, id);
2050 let candidate_context = model.runtime_context_window.unwrap_or(model.context_window);
2051 candidate_context >= source_context
2052 && (!source_caps.native_tools || caps.native_tools)
2053 && (!source_caps.text_tool_wire_format_supported
2054 || caps.text_tool_wire_format_supported)
2055 && (!source_caps.reasoning_effort_supported || caps.reasoning_effort_supported)
2056 && source_caps.structured_output_mode == caps.structured_output_mode
2057 })
2058 .map(|(id, model)| {
2059 let provider = model.provider.clone();
2060 (
2061 id.clone(),
2062 with_effective_capability_tags(id, provider, model),
2063 )
2064 })
2065 .collect()
2066}
2067
2068pub fn qc_default_model(provider: &str) -> Option<String> {
2069 std::env::var("BURIN_QC_MODEL")
2070 .ok()
2071 .filter(|value| !value.trim().is_empty())
2072 .or_else(|| {
2073 effective_config()
2074 .qc_defaults
2075 .get(&provider.to_lowercase())
2076 .cloned()
2077 })
2078}
2079
2080pub fn default_model_for_provider(provider: &str) -> String {
2081 if provider_uses_acp(provider) {
2082 return "default".to_string();
2083 }
2084 match provider {
2085 "local" => std::env::var("LOCAL_LLM_MODEL")
2086 .or_else(|_| std::env::var("HARN_LLM_MODEL"))
2087 .unwrap_or_else(|_| "gemma-4-26b-a4b-it".to_string()),
2088 "mlx" => std::env::var("MLX_MODEL_ID")
2089 .unwrap_or_else(|_| "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit".to_string()),
2090 "openai" => "gpt-4o-mini".to_string(),
2091 "ollama" => "llama3.2".to_string(),
2092 "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
2093 _ => "claude-sonnet-4-6".to_string(),
2094 }
2095}
2096
2097pub fn qc_defaults() -> BTreeMap<String, String> {
2098 effective_config().qc_defaults
2099}
2100
2101pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
2102 effective_config()
2103 .models
2104 .get(model_id)
2105 .and_then(|model| model.pricing.clone())
2106}
2107
2108pub fn model_fast_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
2113 effective_config()
2114 .models
2115 .get(model_id)
2116 .and_then(|model| model.fast_mode.as_ref())
2117 .and_then(|fast_mode| fast_mode.pricing.clone())
2118}
2119
2120pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
2121 model_pricing_per_mtok(model_id)
2122 .map(|pricing| {
2123 (
2124 pricing.input_per_mtok / 1000.0,
2125 pricing.output_per_mtok / 1000.0,
2126 )
2127 })
2128 .or_else(|| {
2129 let (input, output, _) = provider_economics(provider);
2130 match (input, output) {
2131 (Some(input), Some(output)) => Some((input, output)),
2132 _ => None,
2133 }
2134 })
2135}
2136
2137pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
2138 match auth_env {
2139 AuthEnv::None => Vec::new(),
2140 AuthEnv::Single(name) => vec![name.clone()],
2141 AuthEnv::Multiple(names) => names.clone(),
2142 }
2143}
2144
2145pub fn provider_key_available(provider: &str) -> bool {
2146 let Some(pdef) = provider_config(provider) else {
2147 return provider == "ollama";
2148 };
2149 if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
2150 return true;
2151 }
2152 auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
2153 std::env::var(env_name)
2154 .ok()
2155 .is_some_and(|value| !value.trim().is_empty())
2156 })
2157}
2158
2159pub fn available_provider_names() -> Vec<String> {
2160 provider_names()
2161 .into_iter()
2162 .filter(|provider| provider_key_available(provider))
2163 .collect()
2164}
2165
2166pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
2168 provider_config(provider)
2169 .map(|p| p.features.iter().any(|f| f == feature))
2170 .unwrap_or(false)
2171}
2172
2173pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
2177 provider_config(provider)
2178 .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
2179 .unwrap_or((None, None, None))
2180}
2181
2182#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2191pub enum ToolFormatChannel {
2192 Native,
2194 Text,
2196}
2197
2198pub fn tool_format_channel(format: &str) -> Option<ToolFormatChannel> {
2206 match format {
2207 "native" => Some(ToolFormatChannel::Native),
2208 "text" | "json" => Some(ToolFormatChannel::Text),
2209 _ => None,
2210 }
2211}
2212
2213pub fn is_known_tool_format(format: &str) -> bool {
2218 tool_format_channel(format).is_some()
2219}
2220
2221pub fn default_tool_format(model: &str, provider: &str) -> String {
2227 let config = effective_config();
2228 default_tool_format_with_config(&config, model, provider)
2229}
2230
2231fn default_tool_format_with_config(
2232 config: &ProvidersConfig,
2233 model: &str,
2234 provider: &str,
2235) -> String {
2236 for (name, alias) in &config.aliases {
2238 let matches = (alias.id == model && alias.provider == provider) || name == model;
2239 if matches {
2240 if let Some(ref fmt) = alias.tool_format {
2241 return fmt.clone();
2242 }
2243 }
2244 }
2245 let capabilities = crate::llm::capabilities::lookup(provider, model);
2246 if let Some(format) = capabilities.preferred_tool_format.as_deref() {
2247 if is_known_tool_format(format) {
2254 return format.to_string();
2255 }
2256 }
2257 let capability_matrix_native = capabilities.native_tools;
2258 let legacy_provider_native = config
2259 .providers
2260 .get(provider)
2261 .map(|p| p.features.iter().any(|f| f == "native_tools"))
2262 .unwrap_or(false);
2263 if capability_matrix_native || legacy_provider_native {
2264 "native".to_string()
2265 } else {
2266 "json".to_string()
2277 }
2278}
2279
2280fn with_effective_capability_tags(
2281 model_id: String,
2282 provider: String,
2283 mut model: ModelDef,
2284) -> ModelDef {
2285 model.capabilities = effective_model_capability_tags(&provider, &model_id);
2286 model
2287}
2288
2289pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
2293 let caps = crate::llm::capabilities::lookup(provider, model_id);
2294 capability_tags_from_capabilities(&caps)
2295}
2296
2297pub(crate) fn capability_tags_from_capabilities(
2298 caps: &crate::llm::capabilities::Capabilities,
2299) -> Vec<String> {
2300 let mut tags = Vec::new();
2301 tags.push("streaming".to_string());
2304 if caps.native_tools || caps.text_tool_wire_format_supported {
2305 tags.push("tools".to_string());
2306 }
2307 if !caps.tool_search.is_empty() {
2308 tags.push("tool_search".to_string());
2309 }
2310 if caps.vision || caps.vision_supported {
2311 tags.push("vision".to_string());
2312 }
2313 if caps.audio {
2314 tags.push("audio".to_string());
2315 }
2316 if caps.pdf {
2317 tags.push("pdf".to_string());
2318 }
2319 if caps.video {
2320 tags.push("video".to_string());
2321 }
2322 if caps.files_api_supported {
2323 tags.push("files".to_string());
2324 }
2325 if caps.prompt_caching {
2326 tags.push("prompt_caching".to_string());
2327 }
2328 if !caps.thinking_modes.is_empty() {
2329 tags.push("thinking".to_string());
2330 }
2331 if caps.interleaved_thinking_supported
2332 || caps
2333 .thinking_modes
2334 .iter()
2335 .any(|mode| mode == "adaptive" || mode == "effort")
2336 {
2337 tags.push("extended_thinking".to_string());
2338 }
2339 if caps.structured_output.is_some() || caps.json_schema.is_some() {
2340 tags.push("structured_output".to_string());
2341 }
2342 tags
2343}
2344
2345pub fn resolve_tier_model(
2347 target: &str,
2348 preferred_provider: Option<&str>,
2349) -> Option<(String, String)> {
2350 let config = effective_config();
2351
2352 if let Some(alias) = config.aliases.get(target) {
2353 return Some((alias.id.clone(), alias.provider.clone()));
2354 }
2355
2356 let candidate_aliases = if let Some(provider) = preferred_provider {
2357 vec![
2358 format!("{provider}/{target}"),
2359 format!("{provider}:{target}"),
2360 format!("tier/{target}"),
2361 target.to_string(),
2362 ]
2363 } else {
2364 vec![format!("tier/{target}"), target.to_string()]
2365 };
2366
2367 for alias_name in candidate_aliases {
2368 if let Some(alias) = config.aliases.get(&alias_name) {
2369 return Some((alias.id.clone(), alias.provider.clone()));
2370 }
2371 }
2372
2373 None
2374}
2375
2376pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
2380 let config = effective_config();
2381 let mut seen = std::collections::BTreeSet::new();
2382 let mut candidates = Vec::new();
2383
2384 for alias in config.aliases.values() {
2385 let pair = (alias.id.clone(), alias.provider.clone());
2386 if seen.contains(&pair) {
2387 continue;
2388 }
2389 if model_tier(&alias.id) == target {
2390 seen.insert(pair.clone());
2391 candidates.push(pair);
2392 }
2393 }
2394
2395 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2396 provider_a
2397 .cmp(provider_b)
2398 .then_with(|| model_a.cmp(model_b))
2399 });
2400 candidates
2401}
2402
2403pub fn all_model_candidates() -> Vec<(String, String)> {
2406 let config = effective_config();
2407 let mut seen = std::collections::BTreeSet::new();
2408 let mut candidates = Vec::new();
2409
2410 for alias in config.aliases.values() {
2411 let pair = (alias.id.clone(), alias.provider.clone());
2412 if seen.insert(pair.clone()) {
2413 candidates.push(pair);
2414 }
2415 }
2416
2417 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2418 provider_a
2419 .cmp(provider_b)
2420 .then_with(|| model_a.cmp(model_b))
2421 });
2422 candidates
2423}
2424
2425pub fn pick_complementary_reviewer(
2426 options: ComplementaryReviewerOptions,
2427) -> ComplementaryReviewerSelection {
2428 let config = effective_config();
2429 let mut author = resolve_model_info(&options.author_model);
2430 if let Some(provider) = options
2431 .author_provider
2432 .as_deref()
2433 .map(str::trim)
2434 .filter(|provider| !provider.is_empty())
2435 {
2436 author.provider = provider.to_string();
2437 author.family = model_family_with_config(&config, &author.provider, &author.id);
2438 author.lineage = model_lineage_with_config(&config, &author.provider, &author.id);
2439 author.tool_format = default_tool_format_with_config(&config, &author.id, &author.provider);
2440 }
2441 let author_entry = config.models.get(&author.id);
2442 let author_identity = complementary_identity(
2443 author.id.clone(),
2444 author.provider.clone(),
2445 author.family.clone(),
2446 author.lineage.clone(),
2447 author.tier.clone(),
2448 author_entry.and_then(|model| model.pricing.clone()),
2449 );
2450
2451 let fallback =
2452 |code: ReviewerFallbackCode, fallback_reason: String| ComplementaryReviewerSelection {
2453 intent: options.intent.as_str().to_string(),
2454 reviewer: author_identity.clone(),
2455 estimated_incremental_cost: cost_estimate(
2456 author_identity.pricing.as_ref(),
2457 author_identity.pricing.as_ref(),
2458 ),
2459 author: author_identity.clone(),
2460 fallback: true,
2461 reason: format!(
2462 "using author model {} because {fallback_reason}",
2463 author_identity.id
2464 ),
2465 fallback_reason: Some(fallback_reason),
2466 fallback_code: Some(code.as_code().to_string()),
2467 };
2468
2469 if author_identity.family == "unknown" {
2470 return fallback(
2471 ReviewerFallbackCode::UnknownAuthorFamily,
2472 "author model family is unknown".to_string(),
2473 );
2474 }
2475
2476 let preferred_families = author_entry
2477 .map(|model| model.complementary_with.clone())
2478 .unwrap_or_default();
2479 let author_refs = reviewer_match_refs(&author_identity);
2480 let mut rejected_by_price = 0usize;
2481 let mut diff_family_seen = 0usize;
2482 let mut candidates = Vec::new();
2483
2484 for (id, model) in config.models.iter() {
2485 if id == &author_identity.id && model.provider == author_identity.provider {
2486 continue;
2487 }
2488 if model.deprecated || model.availability != ModelAvailability::Serverless {
2489 continue;
2490 }
2491 let family = model_family_with_config(&config, &model.provider, id);
2492 if family == "unknown" || family == author_identity.family {
2493 continue;
2494 }
2495 diff_family_seen += 1;
2496 let lineage = model_lineage_with_config(&config, &model.provider, id);
2497 let candidate_identity = complementary_identity(
2498 id.clone(),
2499 model.provider.clone(),
2500 family,
2501 lineage,
2502 model_tier_with_config(&config, id),
2503 model.pricing.clone(),
2504 );
2505 if model
2506 .avoid_as_reviewer_for
2507 .iter()
2508 .any(|selector| refs_contain_selector(&author_refs, selector))
2509 {
2510 continue;
2511 }
2512 if exceeds_price_cap(
2513 author_identity.pricing.as_ref(),
2514 candidate_identity.pricing.as_ref(),
2515 options.max_price_multiplier,
2516 ) {
2517 rejected_by_price += 1;
2518 continue;
2519 }
2520 let score = reviewer_score(
2521 &options,
2522 &author_identity,
2523 &candidate_identity,
2524 model,
2525 &preferred_families,
2526 );
2527 candidates.push(ReviewerCandidate {
2528 identity: candidate_identity,
2529 score,
2530 });
2531 }
2532
2533 candidates.sort_by(|left, right| {
2534 right
2535 .score
2536 .partial_cmp(&left.score)
2537 .unwrap_or(std::cmp::Ordering::Equal)
2538 .then_with(|| left.identity.provider.cmp(&right.identity.provider))
2539 .then_with(|| left.identity.id.cmp(&right.identity.id))
2540 });
2541
2542 let Some(best) = candidates.into_iter().next() else {
2543 if rejected_by_price > 0 {
2544 let cap = options.max_price_multiplier.unwrap_or_default();
2545 return fallback(
2546 ReviewerFallbackCode::NoDiffFamilyWithinPrice,
2547 format!("no different-family reviewer satisfied max_price_multiplier {cap}"),
2548 );
2549 }
2550 if diff_family_seen == 0 {
2551 return fallback(
2552 ReviewerFallbackCode::NoDiffFamilyServerless,
2553 "no active serverless different-family reviewer is cataloged".to_string(),
2554 );
2555 }
2556 return fallback(
2557 ReviewerFallbackCode::AllDiffFamilyExcluded,
2558 "all different-family reviewer candidates were excluded".to_string(),
2559 );
2560 };
2561
2562 let estimate = cost_estimate(
2563 best.identity.pricing.as_ref(),
2564 author_identity.pricing.as_ref(),
2565 );
2566 ComplementaryReviewerSelection {
2567 intent: options.intent.as_str().to_string(),
2568 reason: reviewer_reason(&author_identity, &best.identity, estimate.as_ref()),
2569 estimated_incremental_cost: estimate,
2570 author: author_identity,
2571 reviewer: best.identity,
2572 fallback: false,
2573 fallback_reason: None,
2574 fallback_code: None,
2575 }
2576}
2577
2578#[derive(Debug, Clone)]
2579struct ReviewerCandidate {
2580 identity: ComplementaryModelIdentity,
2581 score: f64,
2582}
2583
2584fn complementary_identity(
2585 id: String,
2586 provider: String,
2587 family: String,
2588 lineage: String,
2589 tier: String,
2590 pricing: Option<ModelPricing>,
2591) -> ComplementaryModelIdentity {
2592 ComplementaryModelIdentity {
2593 id,
2594 provider,
2595 family,
2596 lineage,
2597 tier,
2598 pricing,
2599 }
2600}
2601
2602fn reviewer_score(
2603 options: &ComplementaryReviewerOptions,
2604 author: &ComplementaryModelIdentity,
2605 candidate: &ComplementaryModelIdentity,
2606 model: &ModelDef,
2607 preferred_families: &[String],
2608) -> f64 {
2609 let candidate_refs = reviewer_match_refs(candidate);
2610 let mut score = 0.0;
2611 if let Some(rank) = preferred_families
2612 .iter()
2613 .position(|selector| refs_contain_selector(&candidate_refs, selector))
2614 {
2615 score += 1_000.0 - rank as f64;
2616 }
2617 if candidate.provider != author.provider {
2618 score += 100.0;
2619 }
2620 score += match tier_distance(&author.tier, &candidate.tier) {
2621 0 => 80.0,
2622 1 => 45.0,
2623 2 => 15.0,
2624 _ => 0.0,
2625 };
2626 for strength in intent_strengths(options.intent) {
2627 if model.strengths.iter().any(|tag| tag == strength) {
2628 score += 8.0;
2629 }
2630 }
2631 if model.capabilities.iter().any(|tag| tag == "tools") {
2632 score += 4.0;
2633 }
2634 if let (Some(author_total), Some(candidate_total)) = (
2635 pricing_total(author.pricing.as_ref()),
2636 pricing_total(candidate.pricing.as_ref()),
2637 ) {
2638 if author_total > 0.0 {
2639 let ratio = candidate_total / author_total;
2640 if ratio <= 1.0 {
2641 score += 20.0;
2642 }
2643 score -= (ratio - 1.0).abs().min(10.0) * 8.0;
2644 }
2645 }
2646 score
2647}
2648
2649fn intent_strengths(intent: ComplementaryReviewerIntent) -> &'static [&'static str] {
2650 match intent {
2651 ComplementaryReviewerIntent::Review => &["reasoning", "coding", "tool_use"],
2652 ComplementaryReviewerIntent::Critique => &["reasoning", "long_context", "tool_use"],
2653 ComplementaryReviewerIntent::PlanReview => {
2654 &["reasoning", "coding", "agentic", "long_context", "tool_use"]
2655 }
2656 }
2657}
2658
2659fn tier_distance(left: &str, right: &str) -> u8 {
2660 let left = tier_rank(left);
2661 let right = tier_rank(right);
2662 left.abs_diff(right)
2663}
2664
2665fn tier_rank(tier: &str) -> u8 {
2666 match tier {
2667 "small" => 0,
2668 "mid" => 1,
2669 "frontier" | "reasoning" => 2,
2670 _ => 1,
2671 }
2672}
2673
2674fn exceeds_price_cap(
2675 author_pricing: Option<&ModelPricing>,
2676 candidate_pricing: Option<&ModelPricing>,
2677 max_price_multiplier: Option<f64>,
2678) -> bool {
2679 let Some(max_price_multiplier) = max_price_multiplier else {
2680 return false;
2681 };
2682 let Some(author_total) = pricing_total(author_pricing) else {
2683 return false;
2684 };
2685 let Some(candidate_total) = pricing_total(candidate_pricing) else {
2686 return true;
2687 };
2688 author_total > 0.0 && candidate_total > author_total * max_price_multiplier
2689}
2690
2691fn cost_estimate(
2692 reviewer_pricing: Option<&ModelPricing>,
2693 author_pricing: Option<&ModelPricing>,
2694) -> Option<ComplementaryCostEstimate> {
2695 let reviewer_pricing = reviewer_pricing?;
2696 let total_per_mtok = reviewer_pricing.input_per_mtok + reviewer_pricing.output_per_mtok;
2697 let multiplier_vs_author = pricing_total(author_pricing)
2698 .filter(|author_total| *author_total > 0.0)
2699 .map(|author_total| total_per_mtok / author_total);
2700 Some(ComplementaryCostEstimate {
2701 input_per_mtok: reviewer_pricing.input_per_mtok,
2702 output_per_mtok: reviewer_pricing.output_per_mtok,
2703 total_per_mtok,
2704 multiplier_vs_author,
2705 })
2706}
2707
2708fn pricing_total(pricing: Option<&ModelPricing>) -> Option<f64> {
2709 pricing.map(|pricing| pricing.input_per_mtok + pricing.output_per_mtok)
2710}
2711
2712fn reviewer_reason(
2713 author: &ComplementaryModelIdentity,
2714 reviewer: &ComplementaryModelIdentity,
2715 estimate: Option<&ComplementaryCostEstimate>,
2716) -> String {
2717 let cost = estimate
2718 .and_then(|estimate| estimate.multiplier_vs_author)
2719 .map(|multiplier| format!("{multiplier:.2}x the author model price"))
2720 .unwrap_or_else(|| "price ratio unavailable".to_string());
2721 format!(
2722 "selected {} via {} because family {} differs from author family {}, tier {} matches author tier {}, and {}",
2723 reviewer.id,
2724 reviewer.provider,
2725 reviewer.family,
2726 author.family,
2727 reviewer.tier,
2728 author.tier,
2729 cost
2730 )
2731}
2732
2733fn reviewer_match_refs(identity: &ComplementaryModelIdentity) -> BTreeSet<String> {
2734 BTreeSet::from([
2735 identity.id.to_ascii_lowercase(),
2736 identity.provider.to_ascii_lowercase(),
2737 format!("{}/{}", identity.provider, identity.id).to_ascii_lowercase(),
2738 format!("{}:{}", identity.provider, identity.id).to_ascii_lowercase(),
2739 identity.family.to_ascii_lowercase(),
2740 identity.lineage.to_ascii_lowercase(),
2741 ])
2742}
2743
2744fn refs_contain_selector(refs: &BTreeSet<String>, selector: &str) -> bool {
2745 normalized_catalog_token(Some(selector))
2746 .or_else(|| Some(selector.trim().to_ascii_lowercase()))
2747 .is_some_and(|selector| refs.contains(&selector))
2748}
2749
2750use harn_glob::match_name as glob_match;
2753
2754fn dirs_or_home() -> Option<String> {
2755 crate::user_dirs::home_dir().map(|home| home.to_string_lossy().into_owned())
2756}
2757
2758pub fn resolve_base_url(pdef: &ProviderDef) -> String {
2761 if let Some(env_name) = &pdef.base_url_env {
2762 if let Ok(val) = std::env::var(env_name) {
2763 let trimmed = val.trim().trim_matches('"').trim_matches('\'');
2765 if !trimmed.is_empty() {
2766 return trimmed.to_string();
2767 }
2768 }
2769 }
2770 pdef.base_url.clone()
2771}
2772
2773const EMBEDDED_PROVIDERS_TOML: &str = include_str!("llm/providers.toml");
2777
2778fn default_config() -> ProvidersConfig {
2792 parse_config_toml(EMBEDDED_PROVIDERS_TOML)
2793 .expect("embedded providers.toml must parse — invariant checked by harn-vm tests")
2794}
2795
2796#[cfg(test)]
2797fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
2798 let mut config = default_config();
2799 config.merge_from(&overlay);
2800 config
2801}
2802
2803#[cfg(test)]
2804mod tests {
2805 use super::*;
2806
2807 fn reset_overrides() {
2808 clear_user_overrides();
2809 }
2810
2811 #[test]
2812 fn resolve_model_info_guards_bad_native_pin_on_unreliable_route() {
2813 reset_overrides();
2814 let overlay = parse_config_toml(
2821 "[aliases.guard-ds]\nid = \"deepseek/deepseek-v3.2\"\nprovider = \"openrouter\"\ntool_format = \"native\"\n",
2822 )
2823 .expect("overlay parses");
2824 set_user_overrides(Some(overlay));
2825 let resolved = resolve_model_info("guard-ds");
2826 assert_eq!(
2827 resolved.tool_format, "text",
2828 "a native pin on a native_unreliable route must be auto-corrected to text"
2829 );
2830 clear_user_overrides();
2831
2832 let overlay_ok = parse_config_toml(
2834 "[aliases.guard-ds-ok]\nid = \"deepseek/deepseek-v3-base\"\nprovider = \"openrouter\"\ntool_format = \"native\"\n",
2835 )
2836 .expect("overlay parses");
2837 set_user_overrides(Some(overlay_ok));
2838 let resolved_ok = resolve_model_info("guard-ds-ok");
2839 assert_eq!(resolved_ok.tool_format, "native");
2840 clear_user_overrides();
2841 }
2842
2843 #[test]
2844 fn auto_select_prefers_local_provider_without_cloud_credentials() {
2845 let config = parse_config_toml(
2849 "[providers.ollama]\nbase_url = \"http://localhost:11434\"\nchat_endpoint = \"/v1/chat/completions\"\n",
2850 )
2851 .expect("config parses");
2852 assert!(provider_is_local(config.providers.get("ollama").unwrap()));
2853 assert_eq!(auto_select_provider(&config), "ollama");
2854 }
2855
2856 #[test]
2857 fn auto_select_falls_back_to_documented_default_when_empty() {
2858 let config = parse_config_toml("").expect("config parses");
2859 assert_eq!(auto_select_provider(&config), FALLBACK_PROVIDER);
2860 }
2861
2862 #[test]
2863 fn suppress_routes_parse_and_merge_dedupe() {
2864 let mut base =
2865 parse_config_toml("[suppress]\nroutes = [\"together:Qwen/Qwen3-Coder-Next-FP8\"]\n")
2866 .expect("base parses");
2867 assert!(!base.is_empty(), "a suppress-only overlay is not empty");
2868 let overlay = parse_config_toml(
2869 "[suppress]\nroutes = [\"together:Qwen/Qwen3-Coder-Next-FP8\", \"ollama:img:tag\"]\n",
2870 )
2871 .expect("overlay parses");
2872 base.merge_from(&overlay);
2873 assert_eq!(
2874 base.suppress.routes,
2875 vec![
2876 "together:Qwen/Qwen3-Coder-Next-FP8".to_string(),
2877 "ollama:img:tag".to_string(),
2878 ],
2879 "merge appends new selectors without duplicating existing ones"
2880 );
2881 }
2882
2883 #[test]
2884 fn test_glob_match_prefix() {
2885 assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
2886 assert!(glob_match("gpt-*", "gpt-4o"));
2887 assert!(!glob_match("claude-*", "gpt-4o"));
2888 }
2889
2890 #[test]
2891 fn test_glob_match_suffix() {
2892 assert!(glob_match("*-latest", "llama3.2-latest"));
2893 assert!(!glob_match("*-latest", "llama3.2"));
2894 }
2895
2896 #[test]
2897 fn test_glob_match_middle() {
2898 assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
2899 assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
2900 }
2901
2902 #[test]
2903 fn test_glob_match_exact() {
2904 assert!(glob_match("gpt-4o", "gpt-4o"));
2905 assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
2906 }
2907
2908 #[test]
2909 fn test_infer_provider_from_defaults() {
2910 let _guard = crate::llm::env_guard();
2911 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2912 unsafe {
2913 std::env::remove_var("HARN_DEFAULT_PROVIDER");
2914 }
2915
2916 assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
2917 assert_eq!(infer_provider("gpt-4o"), "openai");
2918 assert_eq!(infer_provider("o1-preview"), "openai");
2919 assert_eq!(infer_provider("o3-mini"), "openai");
2920 assert_eq!(infer_provider("o4-mini"), "openai");
2921 assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
2922 assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
2923 assert_eq!(infer_provider("llama3.2:latest"), "ollama");
2924 assert_eq!(infer_provider("unknown-model"), "anthropic");
2925
2926 unsafe {
2927 match prev_default_provider {
2928 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2929 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2930 }
2931 }
2932 }
2933
2934 #[test]
2935 fn test_infer_provider_prefix_rules() {
2936 assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
2937 assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
2938 assert_eq!(infer_provider("local:owner/model"), "ollama");
2940 assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
2941 }
2942
2943 #[test]
2944 fn test_openrouter_inference_requires_one_slash() {
2945 let _guard = crate::llm::env_guard();
2946 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2947 unsafe {
2948 std::env::remove_var("HARN_DEFAULT_PROVIDER");
2949 }
2950
2951 assert_eq!(infer_provider("org/model"), "openrouter");
2952 assert_eq!(infer_provider("org/team/model"), "anthropic");
2953
2954 unsafe {
2955 match prev_default_provider {
2956 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2957 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2958 }
2959 }
2960 }
2961
2962 #[test]
2963 fn test_cerebras_inference_beats_openrouter_slash_fallback() {
2964 let _guard = crate::llm::env_guard();
2965 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2966 unsafe {
2967 std::env::remove_var("HARN_DEFAULT_PROVIDER");
2968 }
2969
2970 assert_eq!(infer_provider("cerebras/gpt-oss-120b"), "cerebras");
2971 assert_eq!(infer_provider("cerebras/zai-glm-4.7"), "cerebras");
2972 assert_eq!(infer_provider("cerebras/llama-3.3-70b"), "cerebras");
2973
2974 unsafe {
2975 match prev_default_provider {
2976 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2977 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2978 }
2979 }
2980 }
2981
2982 #[test]
2983 fn test_direct_catalog_model_id_resolves_to_catalog_provider() {
2984 let _guard = crate::llm::env_guard();
2989 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2990 unsafe {
2991 std::env::remove_var("HARN_DEFAULT_PROVIDER");
2992 }
2993
2994 for model in ["gpt-oss-120b", "zai-glm-4.7", "llama-3.3-70b"] {
2995 assert_eq!(
2996 infer_provider(model),
2997 "cerebras",
2998 "{model} should route to its catalog provider"
2999 );
3000 let resolved = resolve_model_info(model);
3001 assert_eq!(resolved.id, model);
3002 assert_eq!(resolved.provider, "cerebras");
3003 }
3004
3005 unsafe {
3006 match prev_default_provider {
3007 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3008 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3009 }
3010 }
3011 }
3012
3013 #[test]
3014 fn test_equivalent_model_catalog_entries_use_capability_compatible_routes() {
3015 reset_overrides();
3016
3017 assert_eq!(
3018 wire_model_id("groq/openai/gpt-oss-120b"),
3019 "openai/gpt-oss-120b"
3020 );
3021 assert_eq!(wire_model_id("gpt-oss-120b"), "gpt-oss-120b");
3022
3023 let equivalents = equivalent_model_catalog_entries("gpt-oss-120b");
3024 let ids = equivalents
3025 .iter()
3026 .map(|(id, _)| id.as_str())
3027 .collect::<Vec<_>>();
3028
3029 assert!(
3030 ids.contains(&"groq/openai/gpt-oss-120b"),
3031 "Cerebras GPT-OSS should surface the Groq serving variant"
3032 );
3033 assert!(
3034 !ids.contains(&"gpt-oss-120b"),
3035 "equivalence results should not include the source row"
3036 );
3037 assert!(equivalents.iter().all(|(_, model)| {
3038 model.equivalence_group.as_deref() == Some("openai-gpt-oss-120b")
3039 }));
3040 }
3041
3042 #[test]
3043 fn fireworks_gpt_oss_route_has_real_context_window() {
3044 reset_overrides();
3051
3052 let entry = model_catalog_entry("accounts/fireworks/models/gpt-oss-120b")
3053 .expect("Fireworks gpt-oss-120b must be in the model catalog");
3054 assert_eq!(entry.context_window, 131_072);
3055 assert_eq!(entry.provider, "fireworks");
3056 assert_eq!(
3057 entry.equivalence_group.as_deref(),
3058 Some("openai-gpt-oss-120b"),
3059 );
3060 }
3061
3062 #[test]
3063 fn test_user_catalog_overlay_re_homes_model_provider() {
3064 reset_overrides();
3068 let mut overlay = ProvidersConfig::default();
3069 overlay.models.insert(
3070 "gpt-4o".to_string(),
3071 ModelDef {
3072 name: "GPT-4o via OpenRouter".to_string(),
3073 provider: "openrouter".to_string(),
3074 context_window: 128_000,
3075 logical_model: None,
3076 equivalence_group: None,
3077 served_variant: None,
3078 wire_model: None,
3079 api_dialect: None,
3080 rate_limits: None,
3081 performance: None,
3082 architecture: None,
3083 local_memory: None,
3084 runtime_context_window: None,
3085 stream_timeout: None,
3086 capabilities: Vec::new(),
3087 pricing: None,
3088 deprecated: false,
3089 deprecation_note: None,
3090 superseded_by: None,
3091 fast_mode: None,
3092 quality_tags: Vec::new(),
3093 availability: ModelAvailability::default(),
3094 tier: None,
3095 open_weight: None,
3096 strengths: Vec::new(),
3097 benchmarks: std::collections::BTreeMap::new(),
3098 family: None,
3099 lineage: None,
3100 complementary_with: Vec::new(),
3101 avoid_as_reviewer_for: Vec::new(),
3102 },
3103 );
3104 set_user_overrides(Some(overlay));
3105
3106 assert_eq!(infer_provider("gpt-4o"), "openrouter");
3107
3108 reset_overrides();
3109 }
3110
3111 #[test]
3112 fn test_resolve_model_info_normalizes_provider_prefixes() {
3113 let local = resolve_model_info("local:gemma-4-e4b-it");
3114 assert_eq!(local.id, "gemma-4-e4b-it");
3115 assert_eq!(local.provider, "ollama");
3116
3117 let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
3118 assert_eq!(ollama.id, "qwen3:30b-a3b");
3119 assert_eq!(ollama.provider, "ollama");
3120
3121 let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
3122 assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
3123 assert_eq!(hf.provider, "huggingface");
3124
3125 let cerebras = resolve_model_info("cerebras/gpt-oss-120b");
3126 assert_eq!(cerebras.id, "gpt-oss-120b");
3127 assert_eq!(cerebras.provider, "cerebras");
3128
3129 let cerebras_glm = resolve_model_info("cerebras/zai-glm-4.7");
3130 assert_eq!(cerebras_glm.id, "zai-glm-4.7");
3131 assert_eq!(cerebras_glm.provider, "cerebras");
3132 }
3133
3134 #[test]
3135 fn test_model_tier_from_defaults() {
3136 assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
3140 assert_eq!(model_tier("gpt-4o"), "frontier");
3141 assert_eq!(model_tier("Qwen/Qwen3.5-9B"), "small");
3142 assert_eq!(model_tier("deepseek-v4-flash"), "mid");
3143 assert_eq!(model_tier("deepseek-v4-pro"), "frontier");
3144 assert_eq!(model_tier("MiniMax-M2.7"), "frontier");
3145 assert_eq!(model_tier("glm-5.1"), "frontier");
3146 assert_eq!(model_tier("definitely-not-a-real-model"), "mid");
3148 }
3149
3150 #[test]
3151 fn test_model_family_preserves_underlying_hosted_lineage() {
3152 assert_eq!(
3153 model_family("openrouter", "anthropic/claude-sonnet-4-6"),
3154 "anthropic-claude"
3155 );
3156 assert_eq!(
3157 model_family("openrouter", "google/gemini-2.5-flash"),
3158 "google-gemini"
3159 );
3160 assert_eq!(
3161 model_family("openrouter", "openai/o3-mini"),
3162 "openai-reasoning"
3163 );
3164 assert_eq!(model_lineage("openrouter", "openai/gpt-5.5"), "openai-gpt5");
3165 assert_eq!(
3166 model_lineage("openrouter", "openai/o3-mini"),
3167 "openai-reasoning"
3168 );
3169 assert_eq!(
3170 model_lineage("anthropic", "claude-opus-4-8"),
3171 "claude-opus-adaptive"
3172 );
3173 assert_eq!(model_lineage("llamacpp", "qwen3.6-35b-a3b"), "qwen3");
3174 }
3175
3176 #[test]
3177 fn test_complementary_reviewer_uses_different_family() {
3178 let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
3179 author_model: "claude-sonnet-4-6".to_string(),
3180 author_provider: None,
3181 intent: ComplementaryReviewerIntent::PlanReview,
3182 max_price_multiplier: Some(3.0),
3183 });
3184
3185 assert!(!selection.fallback, "{selection:?}");
3186 assert_eq!(selection.author.family, "anthropic-claude");
3187 assert_ne!(selection.reviewer.family, selection.author.family);
3188 assert_eq!(selection.reviewer.tier, "frontier");
3189 assert!(selection.estimated_incremental_cost.is_some());
3190 assert_eq!(selection.fallback_code, None, "{selection:?}");
3193 }
3194
3195 #[test]
3196 fn test_complementary_reviewer_falls_back_deterministically_on_price_cap() {
3197 let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
3198 author_model: "gpt-4o-mini".to_string(),
3199 author_provider: Some("openai".to_string()),
3200 intent: ComplementaryReviewerIntent::Review,
3201 max_price_multiplier: Some(0.01),
3202 });
3203
3204 assert!(selection.fallback, "{selection:?}");
3205 assert_eq!(selection.reviewer.id, "gpt-4o-mini");
3206 assert_eq!(selection.reviewer.family, selection.author.family);
3207 assert!(selection
3208 .fallback_reason
3209 .as_deref()
3210 .is_some_and(|reason| reason.contains("max_price_multiplier")));
3211 assert_eq!(
3215 selection.fallback_code.as_deref(),
3216 Some(ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code()),
3217 "{selection:?}"
3218 );
3219 assert_eq!(
3220 ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code(),
3221 "no_diff_family_within_price"
3222 );
3223 }
3224
3225 #[test]
3226 fn test_reviewer_fallback_codes_are_stable_strings() {
3227 assert_eq!(
3230 ReviewerFallbackCode::UnknownAuthorFamily.as_code(),
3231 "unknown_author_family"
3232 );
3233 assert_eq!(
3234 ReviewerFallbackCode::NoDiffFamilyWithinPrice.as_code(),
3235 "no_diff_family_within_price"
3236 );
3237 assert_eq!(
3238 ReviewerFallbackCode::NoDiffFamilyServerless.as_code(),
3239 "no_diff_family_serverless"
3240 );
3241 assert_eq!(
3242 ReviewerFallbackCode::AllDiffFamilyExcluded.as_code(),
3243 "all_diff_family_excluded"
3244 );
3245 }
3246
3247 #[test]
3248 fn test_resolve_model_unknown_alias() {
3249 let (id, provider) = resolve_model("gpt-4o");
3250 assert_eq!(id, "gpt-4o");
3251 assert!(provider.is_none());
3252 }
3253
3254 #[test]
3255 fn test_provider_names() {
3256 let names = provider_names();
3257 assert!(names.len() >= 7);
3258 assert!(names.contains(&"anthropic".to_string()));
3259 assert!(names.contains(&"together".to_string()));
3260 assert!(names.contains(&"local".to_string()));
3261 assert!(names.contains(&"mlx".to_string()));
3262 assert!(names.contains(&"openai".to_string()));
3263 assert!(names.contains(&"ollama".to_string()));
3264 assert!(names.contains(&"bedrock".to_string()));
3265 assert!(names.contains(&"azure_openai".to_string()));
3266 assert!(names.contains(&"vertex".to_string()));
3267 }
3268
3269 #[test]
3270 fn global_provider_file_is_an_overlay_on_builtin_defaults() {
3271 let mut overlay = ProvidersConfig {
3272 default_provider: Some("ollama".to_string()),
3273 ..Default::default()
3274 };
3275 overlay.aliases.insert(
3276 "quickstart".to_string(),
3277 AliasDef {
3278 id: "llama3.2".to_string(),
3279 provider: "ollama".to_string(),
3280 tool_format: None,
3281 },
3282 );
3283
3284 let merged = merge_global_config(overlay);
3285
3286 assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
3287 assert!(merged.providers.contains_key("anthropic"));
3288 assert!(merged.providers.contains_key("ollama"));
3289 assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
3290 }
3291
3292 #[test]
3293 fn partial_provider_overlay_preserves_builtin_provider_metadata() {
3294 let overlay = parse_config_toml(
3295 r#"
3296 [providers.ollama]
3297 base_url = "http://localhost:11435"
3298 extra_headers = { "x-local" = "1" }
3299 "#,
3300 )
3301 .expect("provider overlay parses");
3302
3303 let merged = merge_global_config(overlay);
3304 let ollama = merged
3305 .providers
3306 .get("ollama")
3307 .expect("ollama remains configured");
3308
3309 assert_eq!(ollama.base_url, "http://localhost:11435");
3310 assert_eq!(ollama.auth_style, "none");
3311 assert_eq!(ollama.chat_endpoint, "/api/chat");
3312 assert_eq!(ollama.completion_endpoint.as_deref(), Some("/api/generate"));
3313 assert_eq!(ollama.cost_per_1k_in, Some(0.0));
3314 assert_eq!(ollama.cost_per_1k_out, Some(0.0));
3315 assert_eq!(
3316 ollama
3317 .healthcheck
3318 .as_ref()
3319 .and_then(|healthcheck| healthcheck.path.as_deref()),
3320 Some("/api/tags")
3321 );
3322 assert_eq!(
3323 ollama.extra_headers.get("x-local").map(String::as_str),
3324 Some("1")
3325 );
3326 }
3327
3328 #[test]
3329 fn partial_provider_overlay_can_explicitly_replace_default_auth_style() {
3330 let overlay = parse_config_toml(
3331 r#"
3332 [providers.ollama]
3333 auth_style = "bearer"
3334 auth_env = "OLLAMA_API_KEY"
3335 "#,
3336 )
3337 .expect("provider overlay parses");
3338
3339 let merged = merge_global_config(overlay);
3340 let ollama = merged
3341 .providers
3342 .get("ollama")
3343 .expect("ollama remains configured");
3344
3345 assert_eq!(ollama.auth_style, "bearer");
3346 assert_eq!(auth_env_names(&ollama.auth_env), vec!["OLLAMA_API_KEY"]);
3347 assert_eq!(ollama.chat_endpoint, "/api/chat");
3348 }
3349
3350 #[test]
3351 fn test_resolve_tier_model_default_aliases() {
3352 let (model, provider) = resolve_tier_model("frontier", None)
3357 .expect("frontier alias must resolve from the embedded catalog");
3358 assert_eq!(provider, "anthropic");
3359 assert!(
3360 model_catalog_entry(&model)
3361 .is_some_and(|entry| entry.provider == "anthropic" && !entry.deprecated),
3362 "frontier alias must point at a registered, non-deprecated anthropic model (got {model})"
3363 );
3364
3365 let (model, provider) = resolve_tier_model("small", None)
3366 .expect("small alias must resolve from the embedded catalog");
3367 assert!(
3368 [
3369 "openrouter",
3370 "huggingface",
3371 "local",
3372 "llamacpp",
3373 "mlx",
3374 "ollama"
3375 ]
3376 .contains(&provider.as_str()),
3377 "small tier should resolve to an open-weight provider (got {provider} / {model})"
3378 );
3379 }
3380
3381 #[test]
3382 fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
3383 let (model, provider) = resolve_tier_model("mid", Some("openai"))
3387 .expect("mid tier scoped to openai must resolve");
3388 assert_eq!(provider, "openai");
3389 assert!(
3390 model_catalog_entry(&model).is_some(),
3391 "mid/openai alias must point at a registered model (got {model})"
3392 );
3393 }
3394
3395 #[test]
3396 fn test_provider_config_anthropic() {
3397 let pdef = provider_config("anthropic").unwrap();
3398 assert_eq!(pdef.auth_style, "header");
3399 assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
3400 }
3401
3402 #[test]
3403 fn test_provider_config_mlx() {
3404 let pdef = provider_config("mlx").unwrap();
3405 assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
3406 assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
3407 assert_eq!(
3408 pdef.healthcheck.unwrap().path.as_deref(),
3409 Some("/v1/models")
3410 );
3411
3412 let (model, provider) = resolve_model("mlx-qwen36-27b");
3413 assert_eq!(model, "unsloth/Qwen3.6-35B-A3B-UD-MLX-4bit");
3414 assert_eq!(provider.as_deref(), Some("mlx"));
3415 }
3416
3417 #[test]
3418 fn test_enterprise_provider_defaults_and_inference() {
3419 let bedrock = provider_config("bedrock").unwrap();
3420 assert_eq!(bedrock.auth_style, "aws_sigv4");
3421 assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
3422 assert_eq!(
3423 infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
3424 "bedrock"
3425 );
3426 assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
3427
3428 let azure = provider_config("azure_openai").unwrap();
3429 assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
3430 assert_eq!(
3431 auth_env_names(&azure.auth_env),
3432 vec![
3433 "AZURE_OPENAI_API_KEY".to_string(),
3434 "AZURE_OPENAI_AD_TOKEN".to_string(),
3435 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
3436 ]
3437 );
3438
3439 let vertex = provider_config("vertex").unwrap();
3440 assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
3441 assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
3442 }
3443
3444 #[test]
3445 fn test_default_provider_env_override_for_unknown_model() {
3446 let _guard = crate::llm::env_guard();
3447 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3448 unsafe {
3449 std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
3450 }
3451
3452 let inference = infer_provider_detail("unknown-model");
3453
3454 unsafe {
3455 match prev_default_provider {
3456 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3457 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3458 }
3459 }
3460
3461 assert_eq!(inference.provider, "openai");
3462 assert_eq!(
3463 inference.source,
3464 crate::llm::provider::ProviderInferenceSource::DefaultFallback
3465 );
3466 }
3467
3468 #[test]
3469 fn test_unknown_model_family_ignores_default_provider_fallback() {
3470 let _guard = crate::llm::env_guard();
3471 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3472 unsafe {
3473 std::env::set_var("HARN_DEFAULT_PROVIDER", "ollama");
3474 }
3475
3476 let unknown = resolve_model_info("mystery-model-xyz");
3477 let known_family = resolve_model_info("deepseek-mystery-model");
3478
3479 unsafe {
3480 match prev_default_provider {
3481 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3482 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3483 }
3484 }
3485
3486 assert_eq!(unknown.provider, "ollama");
3487 assert_eq!(unknown.family, "unknown");
3488 assert_eq!(unknown.lineage, "unknown");
3489 assert_eq!(known_family.family, "deepseek");
3490 assert_eq!(known_family.lineage, "deepseek");
3491 }
3492
3493 #[test]
3494 fn test_resolve_base_url_no_env() {
3495 let pdef = ProviderDef {
3496 base_url: "https://example.com".to_string(),
3497 ..Default::default()
3498 };
3499 assert_eq!(resolve_base_url(&pdef), "https://example.com");
3500 }
3501
3502 #[test]
3503 fn test_default_config_roundtrip() {
3504 let config = default_config();
3505 assert!(!config.providers.is_empty());
3506 assert!(!config.inference_rules.is_empty());
3507 assert_eq!(config.tier_defaults.default, "mid");
3510 let frontiers = config
3512 .models
3513 .iter()
3514 .filter(|(_, m)| m.tier.as_deref() == Some("frontier"))
3515 .count();
3516 assert!(
3517 frontiers >= 4,
3518 "expected at least 4 frontier-tagged models, got {frontiers}"
3519 );
3520 }
3521
3522 #[test]
3523 fn test_local_ollama_catalog_metadata() {
3524 reset_overrides();
3525
3526 let devstral =
3527 model_catalog_entry("devstral-small-2:24b").expect("devstral-small-2 catalog entry");
3528 assert_eq!(devstral.context_window, 262_144);
3529 assert!(!devstral.capabilities.iter().any(|cap| cap == "vision"));
3530
3531 let gemma4 = model_catalog_entry("gemma4:26b").expect("gemma4 catalog entry");
3532 assert_eq!(gemma4.context_window, 262_144);
3533 assert!(gemma4.capabilities.iter().any(|cap| cap == "vision"));
3534 }
3535
3536 #[test]
3537 fn local_gemma4_source_tags_match_structured_capability_tags() {
3538 reset_overrides();
3539 let config = default_config();
3540 for id in [
3541 "gemma-4-e2b-it",
3542 "gemma-4-e4b-it",
3543 "gemma-4-12b-it",
3544 "gemma-4-26b-a4b-it",
3545 "gemma-4-31b-it",
3546 ] {
3547 let source = config
3548 .models
3549 .get(id)
3550 .unwrap_or_else(|| panic!("{id} should be in the embedded catalog"));
3551 let derived = effective_model_capability_tags(&source.provider, id);
3552 assert_eq!(
3553 source.capabilities, derived,
3554 "{}/{} source capabilities must match derived capability_tags",
3555 source.provider, id
3556 );
3557 }
3558 }
3559
3560 #[test]
3561 fn capability_tags_include_structured_capability_flags() {
3562 let caps = crate::llm::capabilities::Capabilities {
3563 native_tools: true,
3564 tool_search: vec!["web".to_string()],
3565 vision_supported: true,
3566 audio: true,
3567 pdf: true,
3568 video: true,
3569 files_api_supported: true,
3570 prompt_caching: true,
3571 thinking_modes: vec!["enabled".to_string()],
3572 structured_output: Some("native".to_string()),
3573 ..Default::default()
3574 };
3575
3576 assert_eq!(
3577 capability_tags_from_capabilities(&caps),
3578 vec![
3579 "streaming",
3580 "tools",
3581 "tool_search",
3582 "vision",
3583 "audio",
3584 "pdf",
3585 "video",
3586 "files",
3587 "prompt_caching",
3588 "thinking",
3589 "structured_output",
3590 ]
3591 );
3592 }
3593
3594 #[test]
3595 fn test_external_config_overlays_default_catalog() {
3596 let mut config = default_config();
3597 let mut overlay = ProvidersConfig {
3598 default_provider: Some("ollama".to_string()),
3599 ..Default::default()
3600 };
3601 overlay.providers.insert(
3602 "custom".to_string(),
3603 ProviderDef {
3604 base_url: "https://llm.example.test/v1".to_string(),
3605 chat_endpoint: "/chat/completions".to_string(),
3606 ..Default::default()
3607 },
3608 );
3609
3610 config.merge_from(&overlay);
3611
3612 assert_eq!(config.default_provider.as_deref(), Some("ollama"));
3613 assert!(config.providers.contains_key("custom"));
3614 assert!(config.providers.contains_key("anthropic"));
3615 assert!(config.providers.contains_key("ollama"));
3616 }
3617
3618 #[test]
3619 fn test_model_params_empty() {
3620 let params = model_params("claude-sonnet-4-20250514");
3621 assert!(params.is_empty());
3622 }
3623
3624 #[test]
3625 fn test_user_overrides_add_provider_and_alias() {
3626 reset_overrides();
3627 let mut overlay = ProvidersConfig::default();
3628 overlay.providers.insert(
3629 "acme".to_string(),
3630 ProviderDef {
3631 base_url: "https://llm.acme.test/v1".to_string(),
3632 chat_endpoint: "/chat/completions".to_string(),
3633 ..Default::default()
3634 },
3635 );
3636 overlay.aliases.insert(
3637 "acme-fast".to_string(),
3638 AliasDef {
3639 id: "acme/model-fast".to_string(),
3640 provider: "acme".to_string(),
3641 tool_format: Some("native".to_string()),
3642 },
3643 );
3644 set_user_overrides(Some(overlay));
3645
3646 let (model, provider) = resolve_model("acme-fast");
3647 assert_eq!(model, "acme/model-fast");
3648 assert_eq!(provider.as_deref(), Some("acme"));
3649 assert!(provider_names().contains(&"acme".to_string()));
3650 assert_eq!(
3651 provider_config("acme").map(|provider| provider.base_url),
3652 Some("https://llm.acme.test/v1".to_string())
3653 );
3654
3655 reset_overrides();
3656 }
3657
3658 #[test]
3659 fn test_default_tool_format_uses_capability_matrix() {
3660 reset_overrides();
3661
3662 assert_eq!(
3663 default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
3664 "native"
3665 );
3666 assert_eq!(
3671 default_tool_format("devstral-small-2:24b", "ollama"),
3672 "json"
3673 );
3674 assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "native");
3678 assert_eq!(
3681 default_tool_format("deepseek/deepseek-v3.2", "openrouter"),
3682 "text"
3683 );
3684 assert_eq!(
3685 default_tool_format("qwen/qwen3-coder-flash", "openrouter"),
3686 "text"
3687 );
3688 assert_eq!(
3694 default_tool_format("openai/gpt-oss-120b", "openrouter"),
3695 "text"
3696 );
3697 assert_eq!(
3698 default_tool_format("accounts/fireworks/models/gpt-oss-120b", "fireworks"),
3699 "text"
3700 );
3701 assert_eq!(default_tool_format("gpt-oss-120b", "cerebras"), "native");
3702 assert_eq!(
3703 default_tool_format("openai/gpt-oss-120b", "deepinfra"),
3704 "text"
3705 );
3706 assert_eq!(default_tool_format("openai/gpt-oss-120b", "groq"), "native");
3707 }
3708
3709 #[test]
3710 fn test_default_tool_format_unpinned_text_channel_is_json() {
3711 reset_overrides();
3712
3713 assert_eq!(default_tool_format("mystery-model-xyz", "ollama"), "json");
3719 }
3720
3721 #[test]
3722 fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
3723 reset_overrides();
3724 let mut overlay = ProvidersConfig::default();
3725 overlay.models.insert(
3726 "acme/model-fast".to_string(),
3727 ModelDef {
3728 name: "Acme Fast".to_string(),
3729 provider: "acme".to_string(),
3730 context_window: 65_536,
3731 logical_model: None,
3732 equivalence_group: None,
3733 served_variant: None,
3734 wire_model: None,
3735 api_dialect: None,
3736 rate_limits: None,
3737 performance: None,
3738 architecture: None,
3739 local_memory: None,
3740 runtime_context_window: None,
3741 stream_timeout: Some(42.0),
3742 capabilities: vec!["tools".to_string(), "streaming".to_string()],
3743 pricing: Some(ModelPricing {
3744 input_per_mtok: 1.25,
3745 output_per_mtok: 2.5,
3746 cache_read_per_mtok: Some(0.25),
3747 cache_write_per_mtok: None,
3748 }),
3749 deprecated: false,
3750 deprecation_note: None,
3751 superseded_by: None,
3752 fast_mode: None,
3753 quality_tags: Vec::new(),
3754 availability: ModelAvailability::default(),
3755 tier: None,
3756 open_weight: None,
3757 strengths: Vec::new(),
3758 benchmarks: std::collections::BTreeMap::new(),
3759 family: None,
3760 lineage: None,
3761 complementary_with: Vec::new(),
3762 avoid_as_reviewer_for: Vec::new(),
3763 },
3764 );
3765 overlay
3766 .qc_defaults
3767 .insert("acme".to_string(), "acme/model-cheap".to_string());
3768 set_user_overrides(Some(overlay));
3769
3770 let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
3771 assert_eq!(entry.context_window, 65_536);
3772 assert_eq!(
3773 entry.capabilities,
3774 vec!["streaming".to_string(), "tools".to_string()]
3775 );
3776 assert_eq!(
3777 entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
3778 Some(1.25)
3779 );
3780 assert_eq!(
3781 pricing_per_1k_for("acme", "acme/model-fast"),
3782 Some((0.00125, 0.0025))
3783 );
3784 assert_eq!(
3785 qc_default_model("acme").as_deref(),
3786 Some("acme/model-cheap")
3787 );
3788
3789 reset_overrides();
3790 }
3791
3792 #[test]
3793 fn test_user_overrides_prepend_inference_rules() {
3794 reset_overrides();
3795 let mut overlay = ProvidersConfig::default();
3796 overlay.inference_rules.push(InferenceRule {
3797 pattern: Some("internal-*".to_string()),
3798 contains: None,
3799 exact: None,
3800 provider: "openai".to_string(),
3801 });
3802 set_user_overrides(Some(overlay));
3803
3804 assert_eq!(infer_provider("internal-foo"), "openai");
3805
3806 reset_overrides();
3807 }
3808
3809 #[test]
3816 fn embedded_providers_toml_parses_and_is_not_trivially_empty() {
3817 let config = default_config();
3818 assert!(
3819 config.providers.len() >= 10,
3820 "expected >=10 providers in embedded catalog, got {}",
3821 config.providers.len()
3822 );
3823 assert!(
3824 config.models.len() >= 20,
3825 "expected >=20 models in embedded catalog, got {}",
3826 config.models.len()
3827 );
3828 assert!(
3829 config.aliases.len() >= 15,
3830 "expected >=15 aliases in embedded catalog, got {}",
3831 config.aliases.len()
3832 );
3833 assert_eq!(config.default_provider.as_deref(), Some("anthropic"));
3834 }
3835
3836 #[test]
3837 fn embedded_catalog_every_deprecated_model_has_a_note() {
3838 let config = default_config();
3839 let offenders: Vec<&str> = config
3840 .models
3841 .iter()
3842 .filter(|(_, model)| {
3843 model.deprecated
3844 && model
3845 .deprecation_note
3846 .as_deref()
3847 .unwrap_or("")
3848 .trim()
3849 .is_empty()
3850 })
3851 .map(|(id, _)| id.as_str())
3852 .collect();
3853 assert!(
3854 offenders.is_empty(),
3855 "deprecated models missing a deprecation_note: {offenders:?}"
3856 );
3857 }
3858
3859 #[test]
3860 fn embedded_cerebras_catalog_separates_public_and_dedicated_routes() {
3861 let config = default_config();
3862 for id in ["gpt-oss-120b", "zai-glm-4.7"] {
3863 let model = config.models.get(id).expect("current public Cerebras row");
3864 assert_eq!(model.provider, "cerebras");
3865 assert_eq!(model.availability, ModelAvailability::Serverless);
3866 assert!(!model.deprecated);
3867 }
3868
3869 let llama = config
3870 .models
3871 .get("llama-3.3-70b")
3872 .expect("legacy Cerebras row");
3873 assert_eq!(llama.provider, "cerebras");
3874 assert_eq!(llama.availability, ModelAvailability::Dedicated);
3875 assert!(llama.deprecated);
3876 }
3877
3878 #[test]
3879 fn embedded_openrouter_gpt_oss_120b_has_no_fragment_bleed() {
3880 let config = default_config();
3888 let model = config
3889 .models
3890 .get("openai/gpt-oss-120b")
3891 .expect("openrouter gpt-oss-120b row");
3892 assert_eq!(model.provider, "openrouter");
3893 assert_eq!(
3894 model.open_weight,
3895 Some(true),
3896 "gpt-oss-120b is Apache-2.0 open weight, not the bled-in open_weight=false"
3897 );
3898 assert!(
3899 !model.strengths.iter().any(|s| s == "vision"),
3900 "gpt-oss-120b is text-only; the bled-in `vision` strength must be gone: {:?}",
3901 model.strengths
3902 );
3903 assert!(
3904 !model.strengths.is_empty(),
3905 "gpt-oss-120b must carry its own strengths, not None"
3906 );
3907
3908 let group_tiers: std::collections::BTreeSet<_> = config
3911 .models
3912 .values()
3913 .filter(|m| {
3914 m.equivalence_group.as_deref() == Some("openai-gpt-oss-120b") && !m.deprecated
3915 })
3916 .map(|m| m.tier.clone())
3917 .collect();
3918 assert_eq!(
3919 group_tiers.len(),
3920 1,
3921 "openai-gpt-oss-120b group must share one tier, got {group_tiers:?}"
3922 );
3923 }
3924
3925 #[test]
3926 fn embedded_catalog_every_model_targets_a_registered_provider() {
3927 let config = default_config();
3928 let known: std::collections::BTreeSet<&str> =
3929 config.providers.keys().map(String::as_str).collect();
3930 let orphans: Vec<(&str, &str)> = config
3931 .models
3932 .iter()
3933 .filter(|(_, model)| !known.contains(model.provider.as_str()))
3934 .map(|(id, model)| (id.as_str(), model.provider.as_str()))
3935 .collect();
3936 assert!(
3937 orphans.is_empty(),
3938 "models reference unknown providers: {orphans:?}"
3939 );
3940 }
3941
3942 #[test]
3943 fn embedded_catalog_every_alias_targets_a_registered_provider() {
3944 let config = default_config();
3945 let known: std::collections::BTreeSet<&str> =
3946 config.providers.keys().map(String::as_str).collect();
3947 let orphans: Vec<(&str, &str)> = config
3948 .aliases
3949 .iter()
3950 .filter(|(_, alias)| !known.contains(alias.provider.as_str()))
3951 .map(|(name, alias)| (name.as_str(), alias.provider.as_str()))
3952 .collect();
3953 assert!(
3954 orphans.is_empty(),
3955 "aliases reference unknown providers: {orphans:?}"
3956 );
3957 }
3958
3959 #[test]
3960 fn embedded_catalog_every_qc_default_targets_a_known_model() {
3961 let config = default_config();
3962 let orphans: Vec<(&str, &str)> = config
3963 .qc_defaults
3964 .iter()
3965 .filter(|(_, model_id)| !config.models.contains_key(model_id.as_str()))
3966 .map(|(provider, model_id)| (provider.as_str(), model_id.as_str()))
3967 .collect();
3968 assert!(
3969 orphans.is_empty(),
3970 "qc_defaults reference unknown models: {orphans:?}"
3971 );
3972 }
3973
3974 #[test]
3975 fn embedded_catalog_pricing_rates_are_non_negative() {
3976 let config = default_config();
3977 for (id, model) in &config.models {
3978 let Some(pricing) = &model.pricing else {
3979 continue;
3980 };
3981 assert!(
3982 pricing.input_per_mtok >= 0.0 && pricing.output_per_mtok >= 0.0,
3983 "{id}: negative pricing — in={} out={}",
3984 pricing.input_per_mtok,
3985 pricing.output_per_mtok
3986 );
3987 if let Some(rate) = pricing.cache_read_per_mtok {
3988 assert!(rate >= 0.0, "{id}: negative cache_read rate {rate}");
3989 }
3990 if let Some(rate) = pricing.cache_write_per_mtok {
3991 assert!(rate >= 0.0, "{id}: negative cache_write rate {rate}");
3992 }
3993 }
3994 }
3995
3996 #[test]
3997 fn model_availability_parses_known_strings() {
3998 assert_eq!(
3999 ModelAvailability::parse("serverless"),
4000 Some(ModelAvailability::Serverless)
4001 );
4002 assert_eq!(
4003 ModelAvailability::parse("dedicated"),
4004 Some(ModelAvailability::Dedicated)
4005 );
4006 assert_eq!(
4007 ModelAvailability::parse("unknown"),
4008 Some(ModelAvailability::Unknown)
4009 );
4010 assert_eq!(ModelAvailability::parse("provisioned"), None);
4011 for value in [
4012 ModelAvailability::Serverless,
4013 ModelAvailability::Dedicated,
4014 ModelAvailability::Unknown,
4015 ] {
4016 assert_eq!(ModelAvailability::parse(value.as_str()), Some(value));
4017 }
4018 }
4019
4020 #[test]
4021 fn embedded_catalog_marks_together_dedicated_route_as_dedicated() {
4022 let config = default_config();
4023 let model = config
4024 .models
4025 .get("Qwen/Qwen3-Coder-Next-FP8")
4026 .expect("Together Qwen3 Coder Next FP8 is cataloged");
4027 assert_eq!(model.provider, "together");
4028 assert_eq!(model.availability, ModelAvailability::Dedicated);
4029 }
4030
4031 #[test]
4032 fn embedded_catalog_dedicated_models_are_not_targeted_by_tier_aliases() {
4033 let config = default_config();
4037 let dedicated: std::collections::BTreeSet<(&str, &str)> = config
4038 .models
4039 .iter()
4040 .filter(|(_, model)| model.availability == ModelAvailability::Dedicated)
4041 .map(|(id, model)| (model.provider.as_str(), id.as_str()))
4042 .collect();
4043 for (name, alias) in &config.aliases {
4044 if matches!(
4045 name.as_str(),
4046 "frontier"
4047 | "mid"
4048 | "small"
4049 | "tier/frontier"
4050 | "tier/mid"
4051 | "tier/small"
4052 | "sonnet"
4053 | "opus"
4054 | "haiku"
4055 ) {
4056 assert!(
4057 !dedicated.contains(&(alias.provider.as_str(), alias.id.as_str())),
4058 "tier alias `{name}` targets dedicated-only route `{}/{}`",
4059 alias.provider,
4060 alias.id,
4061 );
4062 }
4063 }
4064 }
4065
4066 #[test]
4067 fn embedded_catalog_tier_aliases_resolve_to_active_models() {
4068 for alias in ["frontier", "mid", "small"] {
4072 let (model, _provider) = resolve_tier_model(alias, None)
4073 .unwrap_or_else(|| panic!("tier alias `{alias}` must resolve"));
4074 let entry = model_catalog_entry(&model).unwrap_or_else(|| {
4075 panic!("tier alias `{alias}` -> `{model}` must be a registered catalog entry")
4076 });
4077 assert!(
4078 !entry.deprecated,
4079 "tier alias `{alias}` resolves to deprecated model `{model}` ({:?})",
4080 entry.deprecation_note
4081 );
4082 }
4083 }
4084
4085 #[test]
4086 fn opus_alias_tracks_claude_opus_4_8_with_fast_mode() {
4087 let (model, provider) = resolve_model("opus");
4090 assert_eq!(model, "claude-opus-4-8");
4091 assert_eq!(provider.as_deref(), Some("anthropic"));
4092
4093 let opus48 = model_catalog_entry("claude-opus-4-8").expect("opus 4.8 catalog entry");
4094 assert!(!opus48.deprecated, "newest Opus must not be deprecated");
4095 let fast = opus48.fast_mode.expect("opus 4.8 advertises fast mode");
4096 assert_eq!(fast.param, "speed");
4097 assert_eq!(fast.value, "fast");
4098 assert_eq!(fast.status.as_deref(), Some("research_preview"));
4099 let fast_pricing = fast.pricing.expect("fast mode carries premium pricing");
4100 let standard = opus48.pricing.expect("opus 4.8 standard pricing");
4101 assert!(
4102 fast_pricing.input_per_mtok > standard.input_per_mtok,
4103 "fast mode must be premium-priced relative to standard"
4104 );
4105 }
4106
4107 #[test]
4108 fn superseded_opus_models_point_at_claude_opus_4_8() {
4109 for model in ["claude-opus-4-7", "claude-opus-4-6"] {
4112 let entry =
4113 model_catalog_entry(model).unwrap_or_else(|| panic!("{model} catalog entry"));
4114 assert!(entry.deprecated, "{model} should be deprecated");
4115 assert_eq!(
4116 entry.superseded_by.as_deref(),
4117 Some("claude-opus-4-8"),
4118 "{model} should be superseded by claude-opus-4-8"
4119 );
4120 }
4121 }
4122
4123 #[test]
4124 fn opus_46_no_longer_advertises_fast_mode() {
4125 let opus46 = model_catalog_entry("claude-opus-4-6").expect("opus 4.6 catalog entry");
4126 assert!(
4127 opus46.fast_mode.is_none(),
4128 "Anthropic removed Opus 4.6 fast mode on 2026-06-29; Harn should not advertise it"
4129 );
4130
4131 let opus47 = model_catalog_entry("claude-opus-4-7").expect("opus 4.7 catalog entry");
4132 assert!(
4133 opus47.fast_mode.is_some(),
4134 "Opus 4.7 still advertises its own fast-mode tier"
4135 );
4136 }
4137
4138 #[test]
4139 fn gpt_5_5_fast_mode_rides_service_tier() {
4140 let entry = model_catalog_entry("gpt-5.5").expect("gpt-5.5 catalog entry");
4143 let fast = entry.fast_mode.expect("gpt-5.5 advertises a fast tier");
4144 assert_eq!(fast.param, "service_tier");
4145 assert_eq!(fast.status.as_deref(), Some("ga"));
4146 }
4147}