1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::{BTreeMap, BTreeSet};
4use std::sync::{OnceLock, RwLock};
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8static RUNTIME_CATALOG_OVERLAY: OnceLock<RwLock<Option<ProvidersConfig>>> = OnceLock::new();
9
10thread_local! {
11 static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
16}
17
18#[derive(Debug, Clone, Deserialize, Default)]
19pub struct ProvidersConfig {
20 #[serde(default)]
21 pub default_provider: Option<String>,
22 #[serde(default)]
23 pub providers: BTreeMap<String, ProviderDef>,
24 #[serde(default)]
25 pub aliases: BTreeMap<String, AliasDef>,
26 #[serde(default)]
27 pub alias_tool_calling: BTreeMap<String, AliasToolCallingDef>,
28 #[serde(default)]
29 pub models: BTreeMap<String, ModelDef>,
30 #[serde(default)]
31 pub qc_defaults: BTreeMap<String, String>,
32 #[serde(default)]
33 pub inference_rules: Vec<InferenceRule>,
34 #[serde(default)]
35 pub tier_rules: Vec<TierRule>,
36 #[serde(default)]
37 pub tier_defaults: TierDefaults,
38 #[serde(default)]
39 pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
40 #[serde(default)]
41 pub model_roles: BTreeMap<String, BTreeMap<String, toml::Value>>,
42}
43
44impl ProvidersConfig {
45 pub fn is_empty(&self) -> bool {
46 self.default_provider.is_none()
47 && self.providers.is_empty()
48 && self.aliases.is_empty()
49 && self.alias_tool_calling.is_empty()
50 && self.models.is_empty()
51 && self.qc_defaults.is_empty()
52 && self.inference_rules.is_empty()
53 && self.tier_rules.is_empty()
54 && self.model_defaults.is_empty()
55 && self.model_roles.is_empty()
56 && self.tier_defaults.default == default_mid()
57 }
58
59 pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
60 for (name, provider) in &overlay.providers {
61 match self.providers.get_mut(name) {
62 Some(existing) => existing.merge_from(provider),
63 None => {
64 self.providers.insert(name.clone(), provider.clone());
65 }
66 }
67 }
68 self.aliases.extend(overlay.aliases.clone());
69 self.alias_tool_calling
70 .extend(overlay.alias_tool_calling.clone());
71 self.models.extend(overlay.models.clone());
72 self.qc_defaults.extend(overlay.qc_defaults.clone());
73
74 if overlay.default_provider.is_some() {
75 self.default_provider = overlay.default_provider.clone();
76 }
77
78 if !overlay.inference_rules.is_empty() {
79 let mut merged = overlay.inference_rules.clone();
80 merged.extend(self.inference_rules.clone());
81 self.inference_rules = merged;
82 }
83
84 if !overlay.tier_rules.is_empty() {
85 let mut merged = overlay.tier_rules.clone();
86 merged.extend(self.tier_rules.clone());
87 self.tier_rules = merged;
88 }
89
90 if overlay.tier_defaults.default != default_mid() {
91 self.tier_defaults = overlay.tier_defaults.clone();
92 }
93
94 for (pattern, defaults) in &overlay.model_defaults {
95 self.model_defaults
96 .entry(pattern.clone())
97 .or_default()
98 .extend(defaults.clone());
99 }
100
101 for (role, defaults) in &overlay.model_roles {
102 self.model_roles
103 .entry(role.clone())
104 .or_default()
105 .extend(defaults.clone());
106 }
107 }
108}
109
110#[derive(Debug, Clone)]
111pub struct ProviderDef {
112 pub display_name: Option<String>,
113 pub icon: Option<String>,
114 pub protocol: Option<String>,
118 pub base_url: String,
119 pub base_url_env: Option<String>,
120 pub auth_style: String,
121 pub auth_header: Option<String>,
122 pub auth_env: AuthEnv,
123 pub extra_headers: BTreeMap<String, String>,
124 pub chat_endpoint: String,
125 pub completion_endpoint: Option<String>,
126 pub command: Option<String>,
127 pub args: Vec<String>,
128 pub env: BTreeMap<String, String>,
129 pub cwd: Option<String>,
130 pub mcp_servers: Vec<serde_json::Value>,
131 pub healthcheck: Option<HealthcheckDef>,
132 pub local_runtime: Option<LocalRuntimeDef>,
136 pub features: Vec<String>,
137 pub fallback: Option<String>,
139 pub retry_count: Option<u32>,
141 pub retry_delay_ms: Option<u64>,
143 pub rpm: Option<u32>,
145 pub rate_limits: Option<RateLimitsDef>,
149 pub cost_per_1k_in: Option<f64>,
151 pub cost_per_1k_out: Option<f64>,
153 pub latency_p50_ms: Option<u64>,
155 #[doc(hidden)]
156 pub auth_style_explicit: bool,
157}
158
159#[derive(Debug, Clone, Deserialize)]
160struct ProviderDefWire {
161 #[serde(default)]
162 display_name: Option<String>,
163 #[serde(default)]
164 icon: Option<String>,
165 #[serde(default)]
166 protocol: Option<String>,
167 #[serde(default)]
168 base_url: String,
169 #[serde(default)]
170 base_url_env: Option<String>,
171 #[serde(default)]
172 auth_style: Option<String>,
173 #[serde(default)]
174 auth_header: Option<String>,
175 #[serde(default)]
176 auth_env: AuthEnv,
177 #[serde(default)]
178 extra_headers: BTreeMap<String, String>,
179 #[serde(default)]
180 chat_endpoint: String,
181 #[serde(default)]
182 completion_endpoint: Option<String>,
183 #[serde(default)]
184 command: Option<String>,
185 #[serde(default)]
186 args: Vec<String>,
187 #[serde(default)]
188 env: BTreeMap<String, String>,
189 #[serde(default)]
190 cwd: Option<String>,
191 #[serde(default)]
192 mcp_servers: Vec<serde_json::Value>,
193 #[serde(default)]
194 healthcheck: Option<HealthcheckDef>,
195 #[serde(default)]
196 local_runtime: Option<LocalRuntimeDef>,
197 #[serde(default)]
198 features: Vec<String>,
199 #[serde(default)]
200 fallback: Option<String>,
201 #[serde(default)]
202 retry_count: Option<u32>,
203 #[serde(default)]
204 retry_delay_ms: Option<u64>,
205 #[serde(default)]
206 rpm: Option<u32>,
207 #[serde(default)]
208 rate_limits: Option<RateLimitsDef>,
209 #[serde(default)]
210 cost_per_1k_in: Option<f64>,
211 #[serde(default)]
212 cost_per_1k_out: Option<f64>,
213 #[serde(default)]
214 latency_p50_ms: Option<u64>,
215}
216
217impl<'de> Deserialize<'de> for ProviderDef {
218 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
219 where
220 D: serde::Deserializer<'de>,
221 {
222 let wire = ProviderDefWire::deserialize(deserializer)?;
223 let auth_style_explicit = wire.auth_style.is_some();
224 Ok(Self {
225 display_name: wire.display_name,
226 icon: wire.icon,
227 protocol: wire.protocol,
228 base_url: wire.base_url,
229 base_url_env: wire.base_url_env,
230 auth_style: wire.auth_style.unwrap_or_else(default_bearer),
231 auth_header: wire.auth_header,
232 auth_env: wire.auth_env,
233 extra_headers: wire.extra_headers,
234 chat_endpoint: wire.chat_endpoint,
235 completion_endpoint: wire.completion_endpoint,
236 command: wire.command,
237 args: wire.args,
238 env: wire.env,
239 cwd: wire.cwd,
240 mcp_servers: wire.mcp_servers,
241 healthcheck: wire.healthcheck,
242 local_runtime: wire.local_runtime,
243 features: wire.features,
244 fallback: wire.fallback,
245 retry_count: wire.retry_count,
246 retry_delay_ms: wire.retry_delay_ms,
247 rpm: wire.rpm,
248 rate_limits: wire.rate_limits,
249 cost_per_1k_in: wire.cost_per_1k_in,
250 cost_per_1k_out: wire.cost_per_1k_out,
251 latency_p50_ms: wire.latency_p50_ms,
252 auth_style_explicit,
253 })
254 }
255}
256
257impl Default for ProviderDef {
258 fn default() -> Self {
259 Self {
260 display_name: None,
261 icon: None,
262 protocol: None,
263 base_url: String::new(),
264 base_url_env: None,
265 auth_style: default_bearer(),
266 auth_header: None,
267 auth_env: AuthEnv::None,
268 extra_headers: BTreeMap::new(),
269 chat_endpoint: String::new(),
270 completion_endpoint: None,
271 command: None,
272 args: Vec::new(),
273 env: BTreeMap::new(),
274 cwd: None,
275 mcp_servers: Vec::new(),
276 healthcheck: None,
277 local_runtime: None,
278 features: Vec::new(),
279 fallback: None,
280 retry_count: None,
281 retry_delay_ms: None,
282 rpm: None,
283 rate_limits: None,
284 cost_per_1k_in: None,
285 cost_per_1k_out: None,
286 latency_p50_ms: None,
287 auth_style_explicit: false,
288 }
289 }
290}
291
292impl ProviderDef {
293 fn merge_from(&mut self, overlay: &ProviderDef) {
294 merge_option(&mut self.display_name, &overlay.display_name);
295 merge_option(&mut self.icon, &overlay.icon);
296 merge_option(&mut self.protocol, &overlay.protocol);
297 merge_string(&mut self.base_url, &overlay.base_url);
298 merge_option(&mut self.base_url_env, &overlay.base_url_env);
299 let overlay_uses_default_auth_style = overlay.auth_style == default_bearer();
300 if overlay.auth_style_explicit
301 || !overlay_uses_default_auth_style
302 || self.auth_style == default_bearer()
303 {
304 self.auth_style = overlay.auth_style.clone();
305 self.auth_style_explicit |=
306 overlay.auth_style_explicit || !overlay_uses_default_auth_style;
307 }
308 merge_option(&mut self.auth_header, &overlay.auth_header);
309 if !overlay.auth_env.is_none() {
310 self.auth_env = overlay.auth_env.clone();
311 }
312 self.extra_headers.extend(overlay.extra_headers.clone());
313 merge_string(&mut self.chat_endpoint, &overlay.chat_endpoint);
314 merge_option(&mut self.completion_endpoint, &overlay.completion_endpoint);
315 merge_option(&mut self.command, &overlay.command);
316 merge_vec(&mut self.args, &overlay.args);
317 self.env.extend(overlay.env.clone());
318 merge_option(&mut self.cwd, &overlay.cwd);
319 merge_vec(&mut self.mcp_servers, &overlay.mcp_servers);
320 merge_option(&mut self.healthcheck, &overlay.healthcheck);
321 merge_option(&mut self.local_runtime, &overlay.local_runtime);
322 merge_vec(&mut self.features, &overlay.features);
323 merge_option(&mut self.fallback, &overlay.fallback);
324 merge_option(&mut self.retry_count, &overlay.retry_count);
325 merge_option(&mut self.retry_delay_ms, &overlay.retry_delay_ms);
326 merge_option(&mut self.rpm, &overlay.rpm);
327 merge_option(&mut self.rate_limits, &overlay.rate_limits);
328 merge_option(&mut self.cost_per_1k_in, &overlay.cost_per_1k_in);
329 merge_option(&mut self.cost_per_1k_out, &overlay.cost_per_1k_out);
330 merge_option(&mut self.latency_p50_ms, &overlay.latency_p50_ms);
331 }
332}
333
334fn merge_option<T: Clone>(base: &mut Option<T>, overlay: &Option<T>) {
335 if overlay.is_some() {
336 *base = overlay.clone();
337 }
338}
339
340fn merge_string(base: &mut String, overlay: &str) {
341 if !overlay.is_empty() {
342 *base = overlay.to_string();
343 }
344}
345
346fn merge_vec<T: Clone>(base: &mut Vec<T>, overlay: &[T]) {
347 if !overlay.is_empty() {
348 *base = overlay.to_vec();
349 }
350}
351
352fn default_bearer() -> String {
353 "bearer".to_string()
354}
355
356#[derive(Debug, Clone, Deserialize, Default)]
359#[serde(untagged)]
360pub enum AuthEnv {
361 #[default]
362 None,
363 Single(String),
364 Multiple(Vec<String>),
365}
366
367impl AuthEnv {
368 fn is_none(&self) -> bool {
369 matches!(self, AuthEnv::None)
370 }
371}
372
373#[derive(Debug, Clone, Deserialize)]
374pub struct HealthcheckDef {
375 pub method: String,
376 #[serde(default)]
377 pub path: Option<String>,
378 #[serde(default)]
379 pub url: Option<String>,
380 #[serde(default)]
381 pub body: Option<String>,
382}
383
384#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
385pub struct LocalRuntimeDef {
386 #[serde(default, skip_serializing_if = "Option::is_none")]
389 pub kind: Option<String>,
390 #[serde(default, skip_serializing_if = "Option::is_none")]
392 pub command: Option<String>,
393 #[serde(default, skip_serializing_if = "Option::is_none")]
396 pub model_source: Option<String>,
397 #[serde(default, skip_serializing_if = "Option::is_none")]
399 pub model_source_env: Option<String>,
400 #[serde(default, skip_serializing_if = "Option::is_none")]
402 pub default_port: Option<u16>,
403 #[serde(default, skip_serializing_if = "Option::is_none")]
405 pub model_arg: Option<String>,
406 #[serde(default, skip_serializing_if = "Option::is_none")]
407 pub served_model_arg: Option<String>,
408 #[serde(default, skip_serializing_if = "Option::is_none")]
409 pub host_arg: Option<String>,
410 #[serde(default, skip_serializing_if = "Option::is_none")]
411 pub port_arg: Option<String>,
412 #[serde(default, skip_serializing_if = "Option::is_none")]
413 pub ctx_arg: Option<String>,
414 #[serde(default, skip_serializing_if = "Option::is_none")]
415 pub parallel_arg: Option<String>,
416 #[serde(default, skip_serializing_if = "Option::is_none")]
417 pub gpu_layers_arg: Option<String>,
418 #[serde(default, skip_serializing_if = "Option::is_none")]
419 pub cache_type_k_arg: Option<String>,
420 #[serde(default, skip_serializing_if = "Option::is_none")]
421 pub cache_type_v_arg: Option<String>,
422 #[serde(default, skip_serializing_if = "Option::is_none")]
423 pub cache_ram_arg: Option<String>,
424 #[serde(default, skip_serializing_if = "Vec::is_empty")]
426 pub default_args: Vec<String>,
427 #[serde(default, skip_serializing_if = "Option::is_none")]
429 pub stop: Option<String>,
430 #[serde(default, skip_serializing_if = "Option::is_none")]
432 pub source_url: Option<String>,
433 #[serde(default, skip_serializing_if = "Option::is_none")]
435 pub last_verified: Option<String>,
436 #[serde(default, skip_serializing_if = "Option::is_none")]
438 pub notes: Option<String>,
439}
440
441#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
442pub struct LocalMemoryDef {
443 #[serde(default, skip_serializing_if = "Option::is_none")]
445 pub measured_resident_gib: Option<f64>,
446 #[serde(default, skip_serializing_if = "Option::is_none")]
448 pub measured_context_window: Option<u64>,
449 #[serde(default, skip_serializing_if = "Option::is_none")]
451 pub measured_cache_type: Option<String>,
452 #[serde(default, skip_serializing_if = "Option::is_none")]
454 pub base_resident_gib: Option<f64>,
455 #[serde(default, skip_serializing_if = "Option::is_none")]
458 pub kv_cache_gib_per_1k_ctx: Option<f64>,
459 #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
461 pub cache_type_multipliers: BTreeMap<String, f64>,
462 #[serde(default, skip_serializing_if = "Option::is_none")]
464 pub default_cache_type: Option<String>,
465 #[serde(default, skip_serializing_if = "Option::is_none")]
467 pub safety_margin_gib: Option<f64>,
468 #[serde(default, skip_serializing_if = "Option::is_none")]
470 pub max_recommended_context: Option<u64>,
471 #[serde(default, skip_serializing_if = "Option::is_none")]
473 pub source_url: Option<String>,
474 #[serde(default, skip_serializing_if = "Option::is_none")]
476 pub last_verified: Option<String>,
477 #[serde(default, skip_serializing_if = "Option::is_none")]
479 pub notes: Option<String>,
480}
481
482impl LocalMemoryDef {
483 pub fn is_empty(&self) -> bool {
484 self.measured_resident_gib.is_none()
485 && self.measured_context_window.is_none()
486 && self.measured_cache_type.is_none()
487 && self.base_resident_gib.is_none()
488 && self.kv_cache_gib_per_1k_ctx.is_none()
489 && self.cache_type_multipliers.is_empty()
490 && self.default_cache_type.is_none()
491 && self.safety_margin_gib.is_none()
492 && self.max_recommended_context.is_none()
493 && self.source_url.is_none()
494 && self.last_verified.is_none()
495 && self.notes.is_none()
496 }
497}
498
499#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
500pub struct AliasDef {
501 pub id: String,
502 pub provider: String,
503 #[serde(default)]
508 pub tool_format: Option<String>,
509}
510
511#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
512pub struct AliasToolCallingDef {
513 #[serde(default)]
514 #[serde(skip_serializing_if = "Option::is_none")]
515 pub native: Option<String>,
516 #[serde(default)]
517 #[serde(skip_serializing_if = "Option::is_none")]
518 pub text: Option<String>,
519 #[serde(default)]
520 #[serde(skip_serializing_if = "Option::is_none")]
521 pub streaming_native: Option<String>,
522 #[serde(default)]
523 #[serde(skip_serializing_if = "Option::is_none")]
524 pub fallback_mode: Option<String>,
525 #[serde(default)]
526 #[serde(skip_serializing_if = "Option::is_none")]
527 pub failure_reason: Option<String>,
528 #[serde(default)]
529 #[serde(skip_serializing_if = "Option::is_none")]
530 pub last_probe_at: Option<String>,
531}
532
533#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
534pub struct ModelPricing {
535 pub input_per_mtok: f64,
536 pub output_per_mtok: f64,
537 #[serde(default)]
538 pub cache_read_per_mtok: Option<f64>,
539 #[serde(default)]
540 pub cache_write_per_mtok: Option<f64>,
541}
542
543#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq, Eq)]
546pub struct RateLimitsDef {
547 #[serde(default, skip_serializing_if = "Option::is_none")]
549 pub rpm: Option<u32>,
550 #[serde(default, skip_serializing_if = "Option::is_none")]
552 pub rph: Option<u32>,
553 #[serde(default, skip_serializing_if = "Option::is_none")]
555 pub rpd: Option<u32>,
556 #[serde(default, skip_serializing_if = "Option::is_none")]
558 pub tpm: Option<u64>,
559 #[serde(default, skip_serializing_if = "Option::is_none")]
561 pub tph: Option<u64>,
562 #[serde(default, skip_serializing_if = "Option::is_none")]
564 pub tpd: Option<u64>,
565 #[serde(default, skip_serializing_if = "Option::is_none")]
567 pub input_tpm: Option<u64>,
568 #[serde(default, skip_serializing_if = "Option::is_none")]
570 pub output_tpm: Option<u64>,
571 #[serde(default, skip_serializing_if = "Option::is_none")]
573 pub concurrency: Option<u32>,
574 #[serde(default, skip_serializing_if = "Option::is_none")]
576 pub tier: Option<String>,
577 #[serde(default, skip_serializing_if = "Option::is_none")]
579 pub source_url: Option<String>,
580 #[serde(default, skip_serializing_if = "Option::is_none")]
582 pub last_verified: Option<String>,
583 #[serde(default, skip_serializing_if = "Option::is_none")]
585 pub notes: Option<String>,
586}
587
588impl RateLimitsDef {
589 pub fn is_empty(&self) -> bool {
590 self.rpm.is_none()
591 && self.rph.is_none()
592 && self.rpd.is_none()
593 && self.tpm.is_none()
594 && self.tph.is_none()
595 && self.tpd.is_none()
596 && self.input_tpm.is_none()
597 && self.output_tpm.is_none()
598 && self.concurrency.is_none()
599 && self.tier.is_none()
600 && self.source_url.is_none()
601 && self.last_verified.is_none()
602 && self.notes.is_none()
603 }
604
605 pub fn with_rpm_fallback(mut self, rpm: Option<u32>) -> Option<Self> {
606 if self.rpm.is_none() {
607 self.rpm = rpm;
608 }
609 (!self.is_empty()).then_some(self)
610 }
611}
612
613#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]
617pub struct ModelArchitectureDef {
618 #[serde(default, skip_serializing_if = "Option::is_none")]
620 pub parameter_count_b: Option<f64>,
621 #[serde(default, skip_serializing_if = "Option::is_none")]
623 pub active_parameter_count_b: Option<f64>,
624 #[serde(default, skip_serializing_if = "Option::is_none")]
626 pub moe: Option<bool>,
627 #[serde(default, skip_serializing_if = "Option::is_none")]
629 pub quantization: Option<String>,
630 #[serde(default, skip_serializing_if = "Option::is_none")]
632 pub precision: Option<String>,
633 #[serde(default, skip_serializing_if = "Option::is_none")]
635 pub license: Option<String>,
636 #[serde(default, skip_serializing_if = "Option::is_none")]
638 pub tokenizer: Option<String>,
639 #[serde(default, skip_serializing_if = "Option::is_none")]
641 pub knowledge_cutoff: Option<String>,
642 #[serde(default, skip_serializing_if = "Option::is_none")]
644 pub source_url: Option<String>,
645 #[serde(default, skip_serializing_if = "Option::is_none")]
647 pub last_verified: Option<String>,
648}
649
650impl ModelArchitectureDef {
651 pub fn is_empty(&self) -> bool {
652 self.parameter_count_b.is_none()
653 && self.active_parameter_count_b.is_none()
654 && self.moe.is_none()
655 && self.quantization.is_none()
656 && self.precision.is_none()
657 && self.license.is_none()
658 && self.tokenizer.is_none()
659 && self.knowledge_cutoff.is_none()
660 && self.source_url.is_none()
661 && self.last_verified.is_none()
662 }
663}
664
665#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
676pub struct FastModeDef {
677 pub param: String,
680 pub value: String,
682 #[serde(default)]
685 pub beta_header: Option<String>,
686 #[serde(default)]
688 pub otps_speedup: Option<f64>,
689 #[serde(default)]
692 pub status: Option<String>,
693 #[serde(default)]
696 pub pricing: Option<ModelPricing>,
697 #[serde(default)]
699 pub note: Option<String>,
700}
701
702#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
703pub struct ModelDef {
704 pub name: String,
705 pub provider: String,
706 pub context_window: u64,
707 #[serde(default)]
710 pub logical_model: Option<String>,
711 #[serde(default)]
715 pub equivalence_group: Option<String>,
716 #[serde(default)]
719 pub served_variant: Option<String>,
720 #[serde(default)]
724 pub wire_model: Option<String>,
725 #[serde(default)]
728 pub api_dialect: Option<String>,
729 #[serde(default)]
731 pub rate_limits: Option<RateLimitsDef>,
732 #[serde(default)]
734 pub architecture: Option<ModelArchitectureDef>,
735 #[serde(default)]
737 pub local_memory: Option<LocalMemoryDef>,
738 #[serde(default)]
739 pub runtime_context_window: Option<u64>,
740 #[serde(default)]
741 pub stream_timeout: Option<f64>,
742 #[serde(default)]
743 pub capabilities: Vec<String>,
744 #[serde(default)]
745 pub pricing: Option<ModelPricing>,
746 #[serde(default)]
747 pub deprecated: bool,
748 #[serde(default)]
749 pub deprecation_note: Option<String>,
750 #[serde(default)]
758 pub superseded_by: Option<String>,
759 #[serde(default)]
763 pub fast_mode: Option<FastModeDef>,
764 #[serde(default)]
765 pub quality_tags: Vec<String>,
766 #[serde(default)]
772 pub availability: ModelAvailability,
773 #[serde(default)]
780 pub tier: Option<String>,
781 #[serde(default)]
786 pub open_weight: Option<bool>,
787 #[serde(default)]
792 pub strengths: Vec<String>,
793 #[serde(default)]
799 pub benchmarks: BTreeMap<String, f64>,
800 #[serde(default)]
805 pub family: Option<String>,
806 #[serde(default)]
808 pub lineage: Option<String>,
809 #[serde(default)]
811 pub complementary_with: Vec<String>,
812 #[serde(default)]
815 pub avoid_as_reviewer_for: Vec<String>,
816}
817
818#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Default)]
819#[serde(rename_all = "snake_case")]
820pub enum ModelAvailability {
821 #[default]
825 Serverless,
826 Dedicated,
830 Unknown,
834}
835
836impl ModelAvailability {
837 pub fn as_str(self) -> &'static str {
838 match self {
839 Self::Serverless => "serverless",
840 Self::Dedicated => "dedicated",
841 Self::Unknown => "unknown",
842 }
843 }
844
845 pub fn parse(value: &str) -> Option<Self> {
846 match value {
847 "serverless" => Some(Self::Serverless),
848 "dedicated" => Some(Self::Dedicated),
849 "unknown" => Some(Self::Unknown),
850 _ => None,
851 }
852 }
853}
854
855#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
856pub struct ResolvedModel {
857 pub id: String,
858 pub provider: String,
859 pub alias: Option<String>,
860 pub tool_format: String,
861 pub tier: String,
862 pub family: String,
863 pub lineage: String,
864}
865
866#[derive(Debug, Clone, PartialEq)]
867pub struct ComplementaryReviewerOptions {
868 pub author_model: String,
869 pub author_provider: Option<String>,
870 pub intent: ComplementaryReviewerIntent,
871 pub max_price_multiplier: Option<f64>,
872}
873
874#[derive(Debug, Clone, Copy, PartialEq, Eq)]
875pub enum ComplementaryReviewerIntent {
876 Review,
877 Critique,
878 PlanReview,
879}
880
881impl ComplementaryReviewerIntent {
882 pub fn parse(value: &str) -> Option<Self> {
883 match value {
884 "review" => Some(Self::Review),
885 "critique" => Some(Self::Critique),
886 "plan_review" => Some(Self::PlanReview),
887 _ => None,
888 }
889 }
890
891 pub fn as_str(self) -> &'static str {
892 match self {
893 Self::Review => "review",
894 Self::Critique => "critique",
895 Self::PlanReview => "plan_review",
896 }
897 }
898}
899
900#[derive(Debug, Clone, Serialize, PartialEq)]
901pub struct ComplementaryReviewerSelection {
902 pub intent: String,
903 pub author: ComplementaryModelIdentity,
904 pub reviewer: ComplementaryModelIdentity,
905 pub fallback: bool,
906 pub fallback_reason: Option<String>,
907 pub reason: String,
908 pub estimated_incremental_cost: Option<ComplementaryCostEstimate>,
909}
910
911#[derive(Debug, Clone, Serialize, PartialEq)]
912pub struct ComplementaryModelIdentity {
913 pub id: String,
914 pub provider: String,
915 pub family: String,
916 pub lineage: String,
917 pub tier: String,
918 #[serde(skip_serializing_if = "Option::is_none")]
919 pub pricing: Option<ModelPricing>,
920}
921
922#[derive(Debug, Clone, Serialize, PartialEq)]
923pub struct ComplementaryCostEstimate {
924 pub input_per_mtok: f64,
925 pub output_per_mtok: f64,
926 pub total_per_mtok: f64,
927 #[serde(skip_serializing_if = "Option::is_none")]
928 pub multiplier_vs_author: Option<f64>,
929}
930
931#[derive(Debug, Clone, Deserialize)]
932pub struct InferenceRule {
933 #[serde(default)]
934 pub pattern: Option<String>,
935 #[serde(default)]
936 pub contains: Option<String>,
937 #[serde(default)]
938 pub exact: Option<String>,
939 pub provider: String,
940}
941
942#[derive(Debug, Clone, Deserialize)]
943pub struct TierRule {
944 #[serde(default)]
945 pub pattern: Option<String>,
946 #[serde(default)]
947 pub contains: Option<String>,
948 #[serde(default)]
949 pub exact: Option<String>,
950 pub tier: String,
951}
952
953#[derive(Debug, Clone, Deserialize)]
954pub struct TierDefaults {
955 #[serde(default = "default_mid")]
956 pub default: String,
957}
958
959impl Default for TierDefaults {
960 fn default() -> Self {
961 Self {
962 default: default_mid(),
963 }
964 }
965}
966
967fn default_mid() -> String {
968 "mid".to_string()
969}
970
971pub fn load_config() -> &'static ProvidersConfig {
973 CONFIG.get_or_init(|| {
974 let mut config = default_config();
975 let verbose_config_logging = matches!(
976 std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
977 Some("1" | "true" | "TRUE" | "yes" | "YES")
978 ) || matches!(
979 std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
980 Some("1" | "true" | "TRUE" | "yes" | "YES")
981 );
982 if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
983 if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
984 config.merge_from(&overlay);
985 let _ = CONFIG_PATH.set(path);
986 return config;
987 }
988 }
989 if should_load_home_config() {
990 if let Some(home) = dirs_or_home() {
991 let path = format!("{home}/.config/harn/providers.toml");
992 if let Some(overlay) = read_external_config(&path, false) {
993 config.merge_from(&overlay);
994 let _ = CONFIG_PATH.set(path);
995 return config;
996 }
997 }
998 }
999 config
1000 })
1001}
1002
1003fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
1004 match std::fs::read_to_string(path) {
1005 Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
1006 Ok(config) => {
1007 if verbose {
1008 eprintln!(
1009 "[llm_config] Loaded {} providers, {} aliases from {}",
1010 config.providers.len(),
1011 config.aliases.len(),
1012 path
1013 );
1014 }
1015 Some(config)
1016 }
1017 Err(error) => {
1018 eprintln!("[llm_config] TOML parse error in {path}: {error}");
1019 None
1020 }
1021 },
1022 Err(error) => {
1023 if verbose {
1024 eprintln!("[llm_config] Cannot read {path}: {error}");
1025 }
1026 None
1027 }
1028 }
1029}
1030
1031fn should_load_home_config() -> bool {
1032 !cfg!(test)
1035}
1036
1037pub fn parse_config_toml(src: &str) -> Result<ProvidersConfig, toml::de::Error> {
1040 toml::from_str::<ProvidersConfig>(src)
1041}
1042
1043pub fn loaded_config_path() -> Option<std::path::PathBuf> {
1046 let _ = load_config();
1048 CONFIG_PATH.get().map(std::path::PathBuf::from)
1049}
1050
1051pub fn set_user_overrides(config: Option<ProvidersConfig>) {
1055 USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
1056}
1057
1058pub fn clear_user_overrides() {
1060 set_user_overrides(None);
1061}
1062
1063pub fn set_runtime_catalog_overlay(config: Option<ProvidersConfig>) {
1068 *runtime_catalog_overlay()
1069 .write()
1070 .expect("runtime catalog overlay poisoned") = config;
1071}
1072
1073pub fn clear_runtime_catalog_overlay() {
1074 set_runtime_catalog_overlay(None);
1075}
1076
1077pub(crate) fn effective_config() -> ProvidersConfig {
1078 let user_overrides = USER_OVERRIDES.with(|cell| cell.borrow().clone());
1079 effective_config_with_user_overrides(user_overrides.as_ref())
1080}
1081
1082pub fn embedded_config(explicit_overlay: Option<&ProvidersConfig>) -> ProvidersConfig {
1100 let mut config = default_config();
1101 if let Some(overlay) = explicit_overlay {
1102 config.merge_from(overlay);
1103 }
1104 config
1105}
1106
1107pub(crate) fn effective_config_with_user_overrides(
1108 user_overrides: Option<&ProvidersConfig>,
1109) -> ProvidersConfig {
1110 let mut merged = load_config().clone();
1111 if let Some(overlay) = runtime_catalog_overlay()
1112 .read()
1113 .expect("runtime catalog overlay poisoned")
1114 .as_ref()
1115 {
1116 merged.merge_from(overlay);
1117 }
1118 if let Some(overlay) = user_overrides {
1119 merged.merge_from(overlay);
1120 }
1121 merged
1122}
1123
1124fn runtime_catalog_overlay() -> &'static RwLock<Option<ProvidersConfig>> {
1125 RUNTIME_CATALOG_OVERLAY.get_or_init(|| RwLock::new(None))
1126}
1127
1128pub fn resolve_model(alias: &str) -> (String, Option<String>) {
1130 let config = effective_config();
1131 if let Some(a) = config.aliases.get(alias) {
1132 return (a.id.clone(), Some(a.provider.clone()));
1133 }
1134 (normalize_model_id(alias), None)
1135}
1136
1137pub fn normalize_model_id(raw: &str) -> String {
1144 for prefix in PROVIDER_SELECTOR_PREFIXES {
1145 if let Some(stripped) = raw.strip_prefix(prefix) {
1146 return stripped.to_string();
1147 }
1148 }
1149 raw.to_string()
1150}
1151
1152const PROVIDER_SELECTOR_PREFIXES: &[&str] =
1153 &["ollama:", "local:", "huggingface:", "hf:", "cerebras/"];
1154
1155pub fn resolve_model_info(selector: &str) -> ResolvedModel {
1158 let config = effective_config();
1159 if let Some(alias) = config.aliases.get(selector) {
1160 let id = alias.id.clone();
1161 let provider = alias.provider.clone();
1162 let tool_format = alias
1163 .tool_format
1164 .clone()
1165 .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
1166 return ResolvedModel {
1167 tier: model_tier_with_config(&config, &id),
1168 family: model_family_with_config(&config, &provider, &id),
1169 lineage: model_lineage_with_config(&config, &provider, &id),
1170 id,
1171 provider,
1172 alias: Some(selector.to_string()),
1173 tool_format,
1174 };
1175 }
1176
1177 let id = normalize_model_id(selector);
1178 let inference = infer_provider_with_config(&config, selector);
1179 let source = inference.source;
1180 let provider = inference.provider;
1181 let tool_format = default_tool_format_with_config(&config, &id, &provider);
1182 let tier = model_tier_with_config(&config, &id);
1183 let family = model_family_with_inference_source(&config, &provider, &id, source);
1184 let lineage = model_lineage_with_inference_source(&config, &provider, &id, source);
1185 ResolvedModel {
1186 id,
1187 provider,
1188 alias: None,
1189 tool_format,
1190 tier,
1191 family,
1192 lineage,
1193 }
1194}
1195
1196pub fn infer_provider(model_id: &str) -> String {
1198 infer_provider_detail(model_id).provider
1199}
1200
1201pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
1203 let config = effective_config();
1204 infer_provider_with_config(&config, model_id)
1205}
1206
1207fn infer_provider_with_config(
1208 config: &ProvidersConfig,
1209 model_id: &str,
1210) -> crate::llm::provider::ProviderInference {
1211 if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
1212 return crate::llm::provider::ProviderInference::builtin("ollama");
1213 }
1214 if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
1215 return crate::llm::provider::ProviderInference::builtin("huggingface");
1216 }
1217 let normalized_id = normalize_model_id(model_id);
1223 if let Some(model) = config
1224 .models
1225 .get(model_id)
1226 .or_else(|| config.models.get(&normalized_id))
1227 {
1228 return crate::llm::provider::ProviderInference::builtin(model.provider.clone());
1229 }
1230 for rule in &config.inference_rules {
1231 if let Some(exact) = &rule.exact {
1232 if model_id == exact {
1233 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1234 }
1235 }
1236 if let Some(pattern) = &rule.pattern {
1237 if glob_match(pattern, model_id) {
1238 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1239 }
1240 }
1241 if let Some(substr) = &rule.contains {
1242 if model_id.contains(substr.as_str()) {
1243 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
1244 }
1245 }
1246 }
1247 crate::llm::provider::infer_provider_from_model_id(
1248 model_id,
1249 &default_provider_with_config(config),
1250 )
1251}
1252
1253pub fn default_provider() -> String {
1254 let config = effective_config();
1255 default_provider_with_config(&config)
1256}
1257
1258fn default_provider_with_config(config: &ProvidersConfig) -> String {
1259 std::env::var("HARN_DEFAULT_PROVIDER")
1260 .ok()
1261 .map(|value| value.trim().to_string())
1262 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1263 .or_else(|| {
1264 config
1265 .default_provider
1266 .as_deref()
1267 .map(str::trim)
1268 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
1269 .map(str::to_string)
1270 })
1271 .unwrap_or_else(|| "anthropic".to_string())
1272}
1273
1274pub fn model_tier(model_id: &str) -> String {
1276 let config = effective_config();
1277 model_tier_with_config(&config, model_id)
1278}
1279
1280pub(crate) fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
1281 if let Some(model) = config.models.get(model_id) {
1283 if let Some(tier) = model.tier.as_deref() {
1284 let trimmed = tier.trim();
1285 if !trimmed.is_empty() {
1286 return trimmed.to_string();
1287 }
1288 }
1289 }
1290 for rule in &config.tier_rules {
1294 if let Some(exact) = &rule.exact {
1295 if model_id == exact {
1296 return rule.tier.clone();
1297 }
1298 }
1299 if let Some(pattern) = &rule.pattern {
1300 if glob_match(pattern, model_id) {
1301 return rule.tier.clone();
1302 }
1303 }
1304 if let Some(substr) = &rule.contains {
1305 if model_id.contains(substr.as_str()) {
1306 return rule.tier.clone();
1307 }
1308 }
1309 }
1310 config.tier_defaults.default.clone()
1311}
1312
1313pub fn model_family(provider: &str, model_id: &str) -> String {
1315 let config = effective_config();
1316 model_family_with_config(&config, provider, model_id)
1317}
1318
1319pub(crate) fn model_family_with_config(
1320 config: &ProvidersConfig,
1321 provider: &str,
1322 model_id: &str,
1323) -> String {
1324 catalog_family_token(config, model_id)
1325 .unwrap_or_else(|| derive_model_family(provider, model_id))
1326}
1327
1328fn model_family_with_inference_source(
1329 config: &ProvidersConfig,
1330 provider: &str,
1331 model_id: &str,
1332 source: crate::llm::provider::ProviderInferenceSource,
1333) -> String {
1334 if let Some(family) = catalog_family_token(config, model_id) {
1335 return family;
1336 }
1337 let id_family = derive_model_family("", model_id);
1338 if id_family != "unknown" {
1339 return id_family;
1340 }
1341 if matches!(
1342 source,
1343 crate::llm::provider::ProviderInferenceSource::DefaultFallback
1344 ) {
1345 return "unknown".to_string();
1346 }
1347 derive_model_family(provider, model_id)
1348}
1349
1350pub fn model_lineage(provider: &str, model_id: &str) -> String {
1352 let config = effective_config();
1353 model_lineage_with_config(&config, provider, model_id)
1354}
1355
1356pub(crate) fn model_lineage_with_config(
1357 config: &ProvidersConfig,
1358 provider: &str,
1359 model_id: &str,
1360) -> String {
1361 catalog_lineage_token(config, model_id)
1362 .unwrap_or_else(|| derive_model_lineage(provider, model_id))
1363}
1364
1365fn model_lineage_with_inference_source(
1366 config: &ProvidersConfig,
1367 provider: &str,
1368 model_id: &str,
1369 source: crate::llm::provider::ProviderInferenceSource,
1370) -> String {
1371 if let Some(lineage) = catalog_lineage_token(config, model_id) {
1372 return lineage;
1373 }
1374 let id_lineage = derive_model_lineage("", model_id);
1375 if id_lineage != "unknown" {
1376 return id_lineage;
1377 }
1378 if matches!(
1379 source,
1380 crate::llm::provider::ProviderInferenceSource::DefaultFallback
1381 ) {
1382 return "unknown".to_string();
1383 }
1384 derive_model_lineage(provider, model_id)
1385}
1386
1387fn catalog_family_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1388 config
1389 .models
1390 .get(model_id)
1391 .and_then(|model| normalized_catalog_token(model.family.as_deref()))
1392}
1393
1394fn catalog_lineage_token(config: &ProvidersConfig, model_id: &str) -> Option<String> {
1395 config
1396 .models
1397 .get(model_id)
1398 .and_then(|model| normalized_catalog_token(model.lineage.as_deref()))
1399}
1400
1401fn normalized_catalog_token(value: Option<&str>) -> Option<String> {
1402 value
1403 .map(str::trim)
1404 .filter(|value| !value.is_empty())
1405 .map(|value| value.to_ascii_lowercase().replace('_', "-"))
1406}
1407
1408fn derive_model_family(provider: &str, model_id: &str) -> String {
1409 let id = model_id.to_ascii_lowercase();
1410 if contains_any(&id, &["claude", "anthropic.claude"]) {
1411 return "anthropic-claude".to_string();
1412 }
1413 if contains_any(&id, &["gemini", "google/gemini"]) {
1414 return "google-gemini".to_string();
1415 }
1416 if contains_any(&id, &["deepseek"]) {
1417 return "deepseek".to_string();
1418 }
1419 if contains_any(&id, &["qwen"]) {
1420 return "qwen".to_string();
1421 }
1422 if contains_any(&id, &["kimi", "moonshot"]) {
1423 return "kimi".to_string();
1424 }
1425 if contains_any(&id, &["glm", "z-ai/glm", "zhipu"]) {
1426 return "glm".to_string();
1427 }
1428 if contains_any(&id, &["mistral", "mixtral", "devstral"]) {
1429 return "mistral".to_string();
1430 }
1431 if contains_any(&id, &["minimax"]) {
1432 return "minimax".to_string();
1433 }
1434 if contains_any(&id, &["llama"]) {
1435 return "llama".to_string();
1436 }
1437 if contains_any(&id, &["gemma"]) {
1438 return "gemma".to_string();
1439 }
1440 if is_openai_reasoning_model(&id) {
1441 return "openai-reasoning".to_string();
1442 }
1443 if contains_any(&id, &["gpt-oss", "openai/gpt", "gpt-"]) {
1444 return "openai-gpt".to_string();
1445 }
1446 match provider {
1447 "anthropic" | "bedrock" | "vertex-anthropic" => "anthropic-claude".to_string(),
1448 "openai" | "azure" | "azure_openai" => "openai-gpt".to_string(),
1449 "gemini" | "vertex" | "google" => "google-gemini".to_string(),
1450 "deepseek" => "deepseek".to_string(),
1451 "zai" => "glm".to_string(),
1452 "minimax" => "minimax".to_string(),
1453 other if !other.is_empty() => normalize_identifier_token(other),
1454 _ => "unknown".to_string(),
1455 }
1456}
1457
1458fn derive_model_lineage(provider: &str, model_id: &str) -> String {
1459 let id = model_id.to_ascii_lowercase();
1460 if contains_any(&id, &["haiku"]) {
1461 return "claude-haiku".to_string();
1462 }
1463 if contains_any(&id, &["opus-4-7", "opus-4-8", "opus-mythos"]) {
1464 return "claude-opus-adaptive".to_string();
1465 }
1466 if contains_any(&id, &["claude"]) {
1467 return "claude-sonnet-opus".to_string();
1468 }
1469 if contains_any(&id, &["gpt-5"]) {
1470 return "openai-gpt5".to_string();
1471 }
1472 if is_openai_reasoning_model(&id) {
1473 return "openai-reasoning".to_string();
1474 }
1475 if contains_any(&id, &["gpt-", "gpt_"]) {
1476 return "openai-legacy".to_string();
1477 }
1478 if contains_any(&id, &["gemini"]) {
1479 if contains_any(&id, &["flash"]) {
1480 return "gemini-flash".to_string();
1481 }
1482 return "gemini-pro".to_string();
1483 }
1484 if contains_any(&id, &["qwen3", "qwen/qwen3"]) {
1485 return "qwen3".to_string();
1486 }
1487 if contains_any(&id, &["gemma4", "gemma-4"]) {
1488 return "gemma4".to_string();
1489 }
1490 let family = derive_model_family(provider, model_id);
1491 if family == "unknown" {
1492 "unknown".to_string()
1493 } else {
1494 family
1495 }
1496}
1497
1498fn contains_any(haystack: &str, needles: &[&str]) -> bool {
1499 needles.iter().any(|needle| haystack.contains(needle))
1500}
1501
1502fn starts_with_any(haystack: &str, prefixes: &[&str]) -> bool {
1503 prefixes.iter().any(|prefix| haystack.starts_with(prefix))
1504}
1505
1506fn is_openai_reasoning_model(id: &str) -> bool {
1507 starts_with_any(id, &["o1", "o3", "o4"])
1508 || contains_any(
1509 id,
1510 &[
1511 "/o1", "/o3", "/o4", ":o1", ":o3", ":o4", ".o1", ".o3", ".o4",
1512 ],
1513 )
1514}
1515
1516fn normalize_identifier_token(value: &str) -> String {
1517 value
1518 .trim()
1519 .to_ascii_lowercase()
1520 .chars()
1521 .map(|ch| {
1522 if ch.is_ascii_alphanumeric() || ch == '-' {
1523 ch
1524 } else {
1525 '-'
1526 }
1527 })
1528 .collect::<String>()
1529 .split('-')
1530 .filter(|part| !part.is_empty())
1531 .collect::<Vec<_>>()
1532 .join("-")
1533}
1534
1535pub fn provider_config(name: &str) -> Option<ProviderDef> {
1537 effective_config().providers.get(name).cloned()
1538}
1539
1540pub fn provider_protocol(name: &str) -> Option<String> {
1541 provider_config(name).and_then(|def| def.protocol)
1542}
1543
1544pub fn provider_uses_acp(name: &str) -> bool {
1545 provider_protocol(name)
1546 .as_deref()
1547 .is_some_and(|protocol| protocol.eq_ignore_ascii_case("acp"))
1548}
1549
1550pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
1553 let config = effective_config();
1554 let mut params = BTreeMap::new();
1555 for (pattern, defaults) in &config.model_defaults {
1556 if glob_match(pattern, model_id) {
1557 for (k, v) in defaults {
1558 params.insert(k.clone(), v.clone());
1559 }
1560 }
1561 }
1562 params
1563}
1564
1565pub fn model_role_defaults(role: &str) -> BTreeMap<String, toml::Value> {
1579 let normalized = normalize_model_role_name(role);
1580 if normalized.is_empty() {
1581 return BTreeMap::new();
1582 }
1583 let config = effective_config();
1584 let mut params = BTreeMap::new();
1585 for key in role_lookup_keys(&normalized) {
1586 extend_model_role_defaults(&config, &key, &mut params);
1587 }
1588 apply_model_role_env_overrides(&normalized, &mut params);
1589 params
1590}
1591
1592fn extend_model_role_defaults(
1593 config: &ProvidersConfig,
1594 role: &str,
1595 params: &mut BTreeMap<String, toml::Value>,
1596) {
1597 for (configured_role, defaults) in &config.model_roles {
1598 if normalize_model_role_name(configured_role) == role {
1599 params.extend(defaults.clone());
1600 }
1601 }
1602 if let Some(defaults) = config.model_roles.get(role) {
1603 params.extend(defaults.clone());
1604 }
1605}
1606
1607fn normalize_model_role_name(role: &str) -> String {
1608 role.trim().to_ascii_lowercase().replace('-', "_")
1609}
1610
1611fn role_lookup_keys(role: &str) -> Vec<String> {
1612 if role == "merge" {
1613 vec!["fast_apply".to_string(), "merge".to_string()]
1614 } else if role == "fast_apply" {
1615 vec!["merge".to_string(), "fast_apply".to_string()]
1616 } else {
1617 vec![role.to_string()]
1618 }
1619}
1620
1621fn role_env_token(role: &str) -> String {
1622 role.chars()
1623 .map(|ch| {
1624 if ch.is_ascii_alphanumeric() {
1625 ch.to_ascii_uppercase()
1626 } else {
1627 '_'
1628 }
1629 })
1630 .collect::<String>()
1631 .split('_')
1632 .filter(|part| !part.is_empty())
1633 .collect::<Vec<_>>()
1634 .join("_")
1635}
1636
1637fn apply_model_role_env_overrides(role: &str, params: &mut BTreeMap<String, toml::Value>) {
1638 for alias in role_env_aliases(role) {
1639 apply_model_role_env_var(&format!("HARN_LLM_{alias}_PROVIDER"), "provider", params);
1640 apply_model_role_env_var(&format!("HARN_LLM_{alias}_MODEL"), "model", params);
1641 apply_model_role_env_var(
1642 &format!("HARN_LLM_{alias}_ROUTE_POLICY"),
1643 "route_policy",
1644 params,
1645 );
1646 apply_model_role_env_var(
1647 &format!("HARN_LLM_ROLE_{alias}_PROVIDER"),
1648 "provider",
1649 params,
1650 );
1651 apply_model_role_env_var(&format!("HARN_LLM_ROLE_{alias}_MODEL"), "model", params);
1652 apply_model_role_env_var(
1653 &format!("HARN_LLM_ROLE_{alias}_ROUTE_POLICY"),
1654 "route_policy",
1655 params,
1656 );
1657 }
1658}
1659
1660fn role_env_aliases(role: &str) -> Vec<String> {
1661 let token = role_env_token(role);
1662 if token.is_empty() {
1663 return Vec::new();
1664 }
1665 if token == "MERGE" {
1666 vec!["FAST_APPLY".to_string(), "MERGE".to_string()]
1667 } else if token == "FAST_APPLY" {
1668 vec!["MERGE".to_string(), "FAST_APPLY".to_string()]
1669 } else {
1670 vec![token]
1671 }
1672}
1673
1674fn apply_model_role_env_var(
1675 env_name: &str,
1676 option_name: &str,
1677 params: &mut BTreeMap<String, toml::Value>,
1678) {
1679 let Ok(value) = std::env::var(env_name) else {
1680 return;
1681 };
1682 let trimmed = value.trim();
1683 if trimmed.is_empty() {
1684 return;
1685 }
1686 params.insert(
1687 option_name.to_string(),
1688 toml::Value::String(trimmed.to_string()),
1689 );
1690}
1691
1692pub fn provider_names() -> Vec<String> {
1694 effective_config().providers.keys().cloned().collect()
1695}
1696
1697pub fn known_model_names() -> Vec<String> {
1699 effective_config().aliases.keys().cloned().collect()
1700}
1701
1702pub fn alias_entries() -> Vec<(String, AliasDef)> {
1703 effective_config().aliases.into_iter().collect()
1704}
1705
1706pub fn alias_tool_calling_entry(alias: &str) -> Option<AliasToolCallingDef> {
1707 effective_config().alias_tool_calling.get(alias).cloned()
1708}
1709
1710pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
1712 let config = effective_config();
1713 model_catalog_entries_with_config(&config)
1714}
1715
1716pub(crate) fn model_catalog_entries_with_config(
1717 config: &ProvidersConfig,
1718) -> Vec<(String, ModelDef)> {
1719 sorted_model_entries_with_config(config)
1720 .into_iter()
1721 .map(|(id, model)| {
1722 let provider = model.provider.clone();
1723 (
1724 id.clone(),
1725 with_effective_capability_tags(id, provider, model),
1726 )
1727 })
1728 .collect()
1729}
1730
1731pub(crate) fn sorted_model_entries_with_config(
1732 config: &ProvidersConfig,
1733) -> Vec<(String, ModelDef)> {
1734 let mut entries: Vec<_> = config
1735 .models
1736 .iter()
1737 .map(|(id, model)| (id.clone(), model.clone()))
1738 .collect();
1739 entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
1740 model_a
1741 .provider
1742 .cmp(&model_b.provider)
1743 .then_with(|| id_a.cmp(id_b))
1744 });
1745 entries
1746}
1747
1748pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
1749 effective_config()
1750 .models
1751 .get(model_id)
1752 .cloned()
1753 .map(|model| {
1754 let provider = model.provider.clone();
1755 with_effective_capability_tags(model_id.to_string(), provider, model)
1756 })
1757}
1758
1759pub fn model_rate_limits(model_id: &str) -> Option<RateLimitsDef> {
1760 model_catalog_entry(model_id).and_then(|model| model.rate_limits)
1761}
1762
1763pub fn wire_model_id(model_id: &str) -> String {
1764 model_catalog_entry(model_id)
1765 .and_then(|model| model.wire_model)
1766 .unwrap_or_else(|| model_id.to_string())
1767}
1768
1769pub fn provider_rate_limits(provider: &str) -> Option<RateLimitsDef> {
1770 provider_config(provider).and_then(|provider| {
1771 provider
1772 .rate_limits
1773 .unwrap_or_default()
1774 .with_rpm_fallback(provider.rpm)
1775 })
1776}
1777
1778pub fn model_equivalence_group(model_id: &str) -> Option<String> {
1779 model_catalog_entry(model_id).and_then(|model| {
1780 model
1781 .equivalence_group
1782 .or(model.logical_model)
1783 .filter(|group| !group.trim().is_empty())
1784 })
1785}
1786
1787pub fn equivalent_model_catalog_entries(selector: &str) -> Vec<(String, ModelDef)> {
1791 let resolved = resolve_model_info(selector);
1792 let Some(group) = model_equivalence_group(&resolved.id) else {
1793 return Vec::new();
1794 };
1795 let config = effective_config();
1796 let Some(source) = config.models.get(&resolved.id) else {
1797 return Vec::new();
1798 };
1799 let source_caps = crate::llm::capabilities::lookup(&source.provider, &resolved.id);
1800 let source_context = source
1801 .runtime_context_window
1802 .unwrap_or(source.context_window);
1803
1804 sorted_model_entries_with_config(&config)
1805 .into_iter()
1806 .filter(|(id, model)| !(id == &resolved.id && model.provider == resolved.provider))
1807 .filter(|(_, model)| !model.deprecated)
1808 .filter(|(_, model)| model.availability != ModelAvailability::Dedicated)
1809 .filter(|(_, model)| {
1810 model.equivalence_group.as_deref() == Some(group.as_str())
1811 || model.logical_model.as_deref() == Some(group.as_str())
1812 })
1813 .filter(|(id, model)| {
1814 let caps = crate::llm::capabilities::lookup(&model.provider, id);
1815 let candidate_context = model.runtime_context_window.unwrap_or(model.context_window);
1816 candidate_context >= source_context
1817 && (!source_caps.native_tools || caps.native_tools)
1818 && (!source_caps.text_tool_wire_format_supported
1819 || caps.text_tool_wire_format_supported)
1820 && (!source_caps.reasoning_effort_supported || caps.reasoning_effort_supported)
1821 && source_caps.structured_output_mode == caps.structured_output_mode
1822 })
1823 .map(|(id, model)| {
1824 let provider = model.provider.clone();
1825 (
1826 id.clone(),
1827 with_effective_capability_tags(id, provider, model),
1828 )
1829 })
1830 .collect()
1831}
1832
1833pub fn qc_default_model(provider: &str) -> Option<String> {
1834 std::env::var("BURIN_QC_MODEL")
1835 .ok()
1836 .filter(|value| !value.trim().is_empty())
1837 .or_else(|| {
1838 effective_config()
1839 .qc_defaults
1840 .get(&provider.to_lowercase())
1841 .cloned()
1842 })
1843}
1844
1845pub fn default_model_for_provider(provider: &str) -> String {
1846 if provider_uses_acp(provider) {
1847 return "default".to_string();
1848 }
1849 match provider {
1850 "local" => std::env::var("LOCAL_LLM_MODEL")
1851 .or_else(|_| std::env::var("HARN_LLM_MODEL"))
1852 .unwrap_or_else(|_| "gemma-4-26b-a4b-it".to_string()),
1853 "mlx" => std::env::var("MLX_MODEL_ID")
1854 .unwrap_or_else(|_| "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string()),
1855 "openai" => "gpt-4o-mini".to_string(),
1856 "ollama" => "llama3.2".to_string(),
1857 "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
1858 _ => "claude-sonnet-4-6".to_string(),
1859 }
1860}
1861
1862pub fn qc_defaults() -> BTreeMap<String, String> {
1863 effective_config().qc_defaults
1864}
1865
1866pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
1867 effective_config()
1868 .models
1869 .get(model_id)
1870 .and_then(|model| model.pricing.clone())
1871}
1872
1873pub fn model_fast_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
1878 effective_config()
1879 .models
1880 .get(model_id)
1881 .and_then(|model| model.fast_mode.as_ref())
1882 .and_then(|fast_mode| fast_mode.pricing.clone())
1883}
1884
1885pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
1886 model_pricing_per_mtok(model_id)
1887 .map(|pricing| {
1888 (
1889 pricing.input_per_mtok / 1000.0,
1890 pricing.output_per_mtok / 1000.0,
1891 )
1892 })
1893 .or_else(|| {
1894 let (input, output, _) = provider_economics(provider);
1895 match (input, output) {
1896 (Some(input), Some(output)) => Some((input, output)),
1897 _ => None,
1898 }
1899 })
1900}
1901
1902pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
1903 match auth_env {
1904 AuthEnv::None => Vec::new(),
1905 AuthEnv::Single(name) => vec![name.clone()],
1906 AuthEnv::Multiple(names) => names.clone(),
1907 }
1908}
1909
1910pub fn provider_key_available(provider: &str) -> bool {
1911 let Some(pdef) = provider_config(provider) else {
1912 return provider == "ollama";
1913 };
1914 if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
1915 return true;
1916 }
1917 auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
1918 std::env::var(env_name)
1919 .ok()
1920 .is_some_and(|value| !value.trim().is_empty())
1921 })
1922}
1923
1924pub fn available_provider_names() -> Vec<String> {
1925 provider_names()
1926 .into_iter()
1927 .filter(|provider| provider_key_available(provider))
1928 .collect()
1929}
1930
1931pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
1933 provider_config(provider)
1934 .map(|p| p.features.iter().any(|f| f == feature))
1935 .unwrap_or(false)
1936}
1937
1938pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
1942 provider_config(provider)
1943 .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
1944 .unwrap_or((None, None, None))
1945}
1946
1947pub fn default_tool_format(model: &str, provider: &str) -> String {
1951 let config = effective_config();
1952 default_tool_format_with_config(&config, model, provider)
1953}
1954
1955fn default_tool_format_with_config(
1956 config: &ProvidersConfig,
1957 model: &str,
1958 provider: &str,
1959) -> String {
1960 for (name, alias) in &config.aliases {
1962 let matches = (alias.id == model && alias.provider == provider) || name == model;
1963 if matches {
1964 if let Some(ref fmt) = alias.tool_format {
1965 return fmt.clone();
1966 }
1967 }
1968 }
1969 let capabilities = crate::llm::capabilities::lookup(provider, model);
1970 if let Some(format) = capabilities.preferred_tool_format.as_deref() {
1971 if matches!(format, "native" | "text") {
1972 return format.to_string();
1973 }
1974 }
1975 let capability_matrix_native = capabilities.native_tools;
1976 let legacy_provider_native = config
1977 .providers
1978 .get(provider)
1979 .map(|p| p.features.iter().any(|f| f == "native_tools"))
1980 .unwrap_or(false);
1981 if capability_matrix_native || legacy_provider_native {
1982 "native".to_string()
1983 } else {
1984 "text".to_string()
1985 }
1986}
1987
1988fn with_effective_capability_tags(
1989 model_id: String,
1990 provider: String,
1991 mut model: ModelDef,
1992) -> ModelDef {
1993 model.capabilities = effective_model_capability_tags(&provider, &model_id);
1994 model
1995}
1996
1997pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
2001 let caps = crate::llm::capabilities::lookup(provider, model_id);
2002 capability_tags_from_capabilities(&caps)
2003}
2004
2005pub(crate) fn capability_tags_from_capabilities(
2006 caps: &crate::llm::capabilities::Capabilities,
2007) -> Vec<String> {
2008 let mut tags = Vec::new();
2009 tags.push("streaming".to_string());
2012 if caps.native_tools || caps.text_tool_wire_format_supported {
2013 tags.push("tools".to_string());
2014 }
2015 if !caps.tool_search.is_empty() {
2016 tags.push("tool_search".to_string());
2017 }
2018 if caps.vision || caps.vision_supported {
2019 tags.push("vision".to_string());
2020 }
2021 if caps.audio {
2022 tags.push("audio".to_string());
2023 }
2024 if caps.pdf {
2025 tags.push("pdf".to_string());
2026 }
2027 if caps.video {
2028 tags.push("video".to_string());
2029 }
2030 if caps.files_api_supported {
2031 tags.push("files".to_string());
2032 }
2033 if caps.prompt_caching {
2034 tags.push("prompt_caching".to_string());
2035 }
2036 if !caps.thinking_modes.is_empty() {
2037 tags.push("thinking".to_string());
2038 }
2039 if caps.interleaved_thinking_supported
2040 || caps
2041 .thinking_modes
2042 .iter()
2043 .any(|mode| mode == "adaptive" || mode == "effort")
2044 {
2045 tags.push("extended_thinking".to_string());
2046 }
2047 if caps.json_schema.is_some() {
2048 tags.push("structured_output".to_string());
2049 }
2050 tags
2051}
2052
2053pub fn resolve_tier_model(
2055 target: &str,
2056 preferred_provider: Option<&str>,
2057) -> Option<(String, String)> {
2058 let config = effective_config();
2059
2060 if let Some(alias) = config.aliases.get(target) {
2061 return Some((alias.id.clone(), alias.provider.clone()));
2062 }
2063
2064 let candidate_aliases = if let Some(provider) = preferred_provider {
2065 vec![
2066 format!("{provider}/{target}"),
2067 format!("{provider}:{target}"),
2068 format!("tier/{target}"),
2069 target.to_string(),
2070 ]
2071 } else {
2072 vec![format!("tier/{target}"), target.to_string()]
2073 };
2074
2075 for alias_name in candidate_aliases {
2076 if let Some(alias) = config.aliases.get(&alias_name) {
2077 return Some((alias.id.clone(), alias.provider.clone()));
2078 }
2079 }
2080
2081 None
2082}
2083
2084pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
2088 let config = effective_config();
2089 let mut seen = std::collections::BTreeSet::new();
2090 let mut candidates = Vec::new();
2091
2092 for alias in config.aliases.values() {
2093 let pair = (alias.id.clone(), alias.provider.clone());
2094 if seen.contains(&pair) {
2095 continue;
2096 }
2097 if model_tier(&alias.id) == target {
2098 seen.insert(pair.clone());
2099 candidates.push(pair);
2100 }
2101 }
2102
2103 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2104 provider_a
2105 .cmp(provider_b)
2106 .then_with(|| model_a.cmp(model_b))
2107 });
2108 candidates
2109}
2110
2111pub fn all_model_candidates() -> Vec<(String, String)> {
2114 let config = effective_config();
2115 let mut seen = std::collections::BTreeSet::new();
2116 let mut candidates = Vec::new();
2117
2118 for alias in config.aliases.values() {
2119 let pair = (alias.id.clone(), alias.provider.clone());
2120 if seen.insert(pair.clone()) {
2121 candidates.push(pair);
2122 }
2123 }
2124
2125 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
2126 provider_a
2127 .cmp(provider_b)
2128 .then_with(|| model_a.cmp(model_b))
2129 });
2130 candidates
2131}
2132
2133pub fn pick_complementary_reviewer(
2134 options: ComplementaryReviewerOptions,
2135) -> ComplementaryReviewerSelection {
2136 let config = effective_config();
2137 let mut author = resolve_model_info(&options.author_model);
2138 if let Some(provider) = options
2139 .author_provider
2140 .as_deref()
2141 .map(str::trim)
2142 .filter(|provider| !provider.is_empty())
2143 {
2144 author.provider = provider.to_string();
2145 author.family = model_family_with_config(&config, &author.provider, &author.id);
2146 author.lineage = model_lineage_with_config(&config, &author.provider, &author.id);
2147 author.tool_format = default_tool_format_with_config(&config, &author.id, &author.provider);
2148 }
2149 let author_entry = config.models.get(&author.id);
2150 let author_identity = complementary_identity(
2151 author.id.clone(),
2152 author.provider.clone(),
2153 author.family.clone(),
2154 author.lineage.clone(),
2155 author.tier.clone(),
2156 author_entry.and_then(|model| model.pricing.clone()),
2157 );
2158
2159 let fallback = |fallback_reason: String| ComplementaryReviewerSelection {
2160 intent: options.intent.as_str().to_string(),
2161 reviewer: author_identity.clone(),
2162 estimated_incremental_cost: cost_estimate(
2163 author_identity.pricing.as_ref(),
2164 author_identity.pricing.as_ref(),
2165 ),
2166 author: author_identity.clone(),
2167 fallback: true,
2168 reason: format!(
2169 "using author model {} because {fallback_reason}",
2170 author_identity.id
2171 ),
2172 fallback_reason: Some(fallback_reason),
2173 };
2174
2175 if author_identity.family == "unknown" {
2176 return fallback("author model family is unknown".to_string());
2177 }
2178
2179 let preferred_families = author_entry
2180 .map(|model| model.complementary_with.clone())
2181 .unwrap_or_default();
2182 let author_refs = reviewer_match_refs(&author_identity);
2183 let mut rejected_by_price = 0usize;
2184 let mut diff_family_seen = 0usize;
2185 let mut candidates = Vec::new();
2186
2187 for (id, model) in config.models.iter() {
2188 if id == &author_identity.id && model.provider == author_identity.provider {
2189 continue;
2190 }
2191 if model.deprecated || model.availability != ModelAvailability::Serverless {
2192 continue;
2193 }
2194 let family = model_family_with_config(&config, &model.provider, id);
2195 if family == "unknown" || family == author_identity.family {
2196 continue;
2197 }
2198 diff_family_seen += 1;
2199 let lineage = model_lineage_with_config(&config, &model.provider, id);
2200 let candidate_identity = complementary_identity(
2201 id.clone(),
2202 model.provider.clone(),
2203 family,
2204 lineage,
2205 model_tier_with_config(&config, id),
2206 model.pricing.clone(),
2207 );
2208 if model
2209 .avoid_as_reviewer_for
2210 .iter()
2211 .any(|selector| refs_contain_selector(&author_refs, selector))
2212 {
2213 continue;
2214 }
2215 if exceeds_price_cap(
2216 author_identity.pricing.as_ref(),
2217 candidate_identity.pricing.as_ref(),
2218 options.max_price_multiplier,
2219 ) {
2220 rejected_by_price += 1;
2221 continue;
2222 }
2223 let score = reviewer_score(
2224 &options,
2225 &author_identity,
2226 &candidate_identity,
2227 model,
2228 &preferred_families,
2229 );
2230 candidates.push(ReviewerCandidate {
2231 identity: candidate_identity,
2232 score,
2233 });
2234 }
2235
2236 candidates.sort_by(|left, right| {
2237 right
2238 .score
2239 .partial_cmp(&left.score)
2240 .unwrap_or(std::cmp::Ordering::Equal)
2241 .then_with(|| left.identity.provider.cmp(&right.identity.provider))
2242 .then_with(|| left.identity.id.cmp(&right.identity.id))
2243 });
2244
2245 let Some(best) = candidates.into_iter().next() else {
2246 if rejected_by_price > 0 {
2247 let cap = options.max_price_multiplier.unwrap_or_default();
2248 return fallback(format!(
2249 "no different-family reviewer satisfied max_price_multiplier {cap}"
2250 ));
2251 }
2252 if diff_family_seen == 0 {
2253 return fallback(
2254 "no active serverless different-family reviewer is cataloged".to_string(),
2255 );
2256 }
2257 return fallback("all different-family reviewer candidates were excluded".to_string());
2258 };
2259
2260 let estimate = cost_estimate(
2261 best.identity.pricing.as_ref(),
2262 author_identity.pricing.as_ref(),
2263 );
2264 ComplementaryReviewerSelection {
2265 intent: options.intent.as_str().to_string(),
2266 reason: reviewer_reason(&author_identity, &best.identity, estimate.as_ref()),
2267 estimated_incremental_cost: estimate,
2268 author: author_identity,
2269 reviewer: best.identity,
2270 fallback: false,
2271 fallback_reason: None,
2272 }
2273}
2274
2275#[derive(Debug, Clone)]
2276struct ReviewerCandidate {
2277 identity: ComplementaryModelIdentity,
2278 score: f64,
2279}
2280
2281fn complementary_identity(
2282 id: String,
2283 provider: String,
2284 family: String,
2285 lineage: String,
2286 tier: String,
2287 pricing: Option<ModelPricing>,
2288) -> ComplementaryModelIdentity {
2289 ComplementaryModelIdentity {
2290 id,
2291 provider,
2292 family,
2293 lineage,
2294 tier,
2295 pricing,
2296 }
2297}
2298
2299fn reviewer_score(
2300 options: &ComplementaryReviewerOptions,
2301 author: &ComplementaryModelIdentity,
2302 candidate: &ComplementaryModelIdentity,
2303 model: &ModelDef,
2304 preferred_families: &[String],
2305) -> f64 {
2306 let candidate_refs = reviewer_match_refs(candidate);
2307 let mut score = 0.0;
2308 if let Some(rank) = preferred_families
2309 .iter()
2310 .position(|selector| refs_contain_selector(&candidate_refs, selector))
2311 {
2312 score += 1_000.0 - rank as f64;
2313 }
2314 if candidate.provider != author.provider {
2315 score += 100.0;
2316 }
2317 score += match tier_distance(&author.tier, &candidate.tier) {
2318 0 => 80.0,
2319 1 => 45.0,
2320 2 => 15.0,
2321 _ => 0.0,
2322 };
2323 for strength in intent_strengths(options.intent) {
2324 if model.strengths.iter().any(|tag| tag == strength) {
2325 score += 8.0;
2326 }
2327 }
2328 if model.capabilities.iter().any(|tag| tag == "tools") {
2329 score += 4.0;
2330 }
2331 if let (Some(author_total), Some(candidate_total)) = (
2332 pricing_total(author.pricing.as_ref()),
2333 pricing_total(candidate.pricing.as_ref()),
2334 ) {
2335 if author_total > 0.0 {
2336 let ratio = candidate_total / author_total;
2337 if ratio <= 1.0 {
2338 score += 20.0;
2339 }
2340 score -= (ratio - 1.0).abs().min(10.0) * 8.0;
2341 }
2342 }
2343 score
2344}
2345
2346fn intent_strengths(intent: ComplementaryReviewerIntent) -> &'static [&'static str] {
2347 match intent {
2348 ComplementaryReviewerIntent::Review => &["reasoning", "coding", "tool_use"],
2349 ComplementaryReviewerIntent::Critique => &["reasoning", "long_context", "tool_use"],
2350 ComplementaryReviewerIntent::PlanReview => {
2351 &["reasoning", "coding", "agentic", "long_context", "tool_use"]
2352 }
2353 }
2354}
2355
2356fn tier_distance(left: &str, right: &str) -> u8 {
2357 let left = tier_rank(left);
2358 let right = tier_rank(right);
2359 left.abs_diff(right)
2360}
2361
2362fn tier_rank(tier: &str) -> u8 {
2363 match tier {
2364 "small" => 0,
2365 "mid" => 1,
2366 "frontier" | "reasoning" => 2,
2367 _ => 1,
2368 }
2369}
2370
2371fn exceeds_price_cap(
2372 author_pricing: Option<&ModelPricing>,
2373 candidate_pricing: Option<&ModelPricing>,
2374 max_price_multiplier: Option<f64>,
2375) -> bool {
2376 let Some(max_price_multiplier) = max_price_multiplier else {
2377 return false;
2378 };
2379 let Some(author_total) = pricing_total(author_pricing) else {
2380 return false;
2381 };
2382 let Some(candidate_total) = pricing_total(candidate_pricing) else {
2383 return true;
2384 };
2385 author_total > 0.0 && candidate_total > author_total * max_price_multiplier
2386}
2387
2388fn cost_estimate(
2389 reviewer_pricing: Option<&ModelPricing>,
2390 author_pricing: Option<&ModelPricing>,
2391) -> Option<ComplementaryCostEstimate> {
2392 let reviewer_pricing = reviewer_pricing?;
2393 let total_per_mtok = reviewer_pricing.input_per_mtok + reviewer_pricing.output_per_mtok;
2394 let multiplier_vs_author = pricing_total(author_pricing)
2395 .filter(|author_total| *author_total > 0.0)
2396 .map(|author_total| total_per_mtok / author_total);
2397 Some(ComplementaryCostEstimate {
2398 input_per_mtok: reviewer_pricing.input_per_mtok,
2399 output_per_mtok: reviewer_pricing.output_per_mtok,
2400 total_per_mtok,
2401 multiplier_vs_author,
2402 })
2403}
2404
2405fn pricing_total(pricing: Option<&ModelPricing>) -> Option<f64> {
2406 pricing.map(|pricing| pricing.input_per_mtok + pricing.output_per_mtok)
2407}
2408
2409fn reviewer_reason(
2410 author: &ComplementaryModelIdentity,
2411 reviewer: &ComplementaryModelIdentity,
2412 estimate: Option<&ComplementaryCostEstimate>,
2413) -> String {
2414 let cost = estimate
2415 .and_then(|estimate| estimate.multiplier_vs_author)
2416 .map(|multiplier| format!("{multiplier:.2}x the author model price"))
2417 .unwrap_or_else(|| "price ratio unavailable".to_string());
2418 format!(
2419 "selected {} via {} because family {} differs from author family {}, tier {} matches author tier {}, and {}",
2420 reviewer.id,
2421 reviewer.provider,
2422 reviewer.family,
2423 author.family,
2424 reviewer.tier,
2425 author.tier,
2426 cost
2427 )
2428}
2429
2430fn reviewer_match_refs(identity: &ComplementaryModelIdentity) -> BTreeSet<String> {
2431 BTreeSet::from([
2432 identity.id.to_ascii_lowercase(),
2433 identity.provider.to_ascii_lowercase(),
2434 format!("{}/{}", identity.provider, identity.id).to_ascii_lowercase(),
2435 format!("{}:{}", identity.provider, identity.id).to_ascii_lowercase(),
2436 identity.family.to_ascii_lowercase(),
2437 identity.lineage.to_ascii_lowercase(),
2438 ])
2439}
2440
2441fn refs_contain_selector(refs: &BTreeSet<String>, selector: &str) -> bool {
2442 normalized_catalog_token(Some(selector))
2443 .or_else(|| Some(selector.trim().to_ascii_lowercase()))
2444 .is_some_and(|selector| refs.contains(&selector))
2445}
2446
2447fn glob_match(pattern: &str, input: &str) -> bool {
2449 if let Some(prefix) = pattern.strip_suffix('*') {
2450 input.starts_with(prefix)
2451 } else if let Some(suffix) = pattern.strip_prefix('*') {
2452 input.ends_with(suffix)
2453 } else if pattern.contains('*') {
2454 let parts: Vec<&str> = pattern.split('*').collect();
2455 if parts.len() == 2 {
2456 input.starts_with(parts[0]) && input.ends_with(parts[1])
2457 } else {
2458 input == pattern
2459 }
2460 } else {
2461 input == pattern
2462 }
2463}
2464
2465fn dirs_or_home() -> Option<String> {
2466 crate::user_dirs::home_dir().map(|home| home.to_string_lossy().into_owned())
2467}
2468
2469pub fn resolve_base_url(pdef: &ProviderDef) -> String {
2472 if let Some(env_name) = &pdef.base_url_env {
2473 if let Ok(val) = std::env::var(env_name) {
2474 let trimmed = val.trim().trim_matches('"').trim_matches('\'');
2476 if !trimmed.is_empty() {
2477 return trimmed.to_string();
2478 }
2479 }
2480 }
2481 pdef.base_url.clone()
2482}
2483
2484const EMBEDDED_PROVIDERS_TOML: &str = include_str!("llm/providers.toml");
2488
2489fn default_config() -> ProvidersConfig {
2503 parse_config_toml(EMBEDDED_PROVIDERS_TOML)
2504 .expect("embedded providers.toml must parse — invariant checked by harn-vm tests")
2505}
2506
2507#[cfg(test)]
2508fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
2509 let mut config = default_config();
2510 config.merge_from(&overlay);
2511 config
2512}
2513
2514#[cfg(test)]
2515mod tests {
2516 use super::*;
2517
2518 fn reset_overrides() {
2519 clear_user_overrides();
2520 }
2521
2522 #[test]
2523 fn test_glob_match_prefix() {
2524 assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
2525 assert!(glob_match("gpt-*", "gpt-4o"));
2526 assert!(!glob_match("claude-*", "gpt-4o"));
2527 }
2528
2529 #[test]
2530 fn test_glob_match_suffix() {
2531 assert!(glob_match("*-latest", "llama3.2-latest"));
2532 assert!(!glob_match("*-latest", "llama3.2"));
2533 }
2534
2535 #[test]
2536 fn test_glob_match_middle() {
2537 assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
2538 assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
2539 }
2540
2541 #[test]
2542 fn test_glob_match_exact() {
2543 assert!(glob_match("gpt-4o", "gpt-4o"));
2544 assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
2545 }
2546
2547 #[test]
2548 fn test_infer_provider_from_defaults() {
2549 let _guard = crate::llm::env_lock().lock().expect("env lock");
2550 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2551 unsafe {
2552 std::env::remove_var("HARN_DEFAULT_PROVIDER");
2553 }
2554
2555 assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
2556 assert_eq!(infer_provider("gpt-4o"), "openai");
2557 assert_eq!(infer_provider("o1-preview"), "openai");
2558 assert_eq!(infer_provider("o3-mini"), "openai");
2559 assert_eq!(infer_provider("o4-mini"), "openai");
2560 assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
2561 assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
2562 assert_eq!(infer_provider("llama3.2:latest"), "ollama");
2563 assert_eq!(infer_provider("unknown-model"), "anthropic");
2564
2565 unsafe {
2566 match prev_default_provider {
2567 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2568 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2569 }
2570 }
2571 }
2572
2573 #[test]
2574 fn test_infer_provider_prefix_rules() {
2575 assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
2576 assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
2577 assert_eq!(infer_provider("local:owner/model"), "ollama");
2579 assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
2580 }
2581
2582 #[test]
2583 fn test_openrouter_inference_requires_one_slash() {
2584 let _guard = crate::llm::env_lock().lock().expect("env lock");
2585 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2586 unsafe {
2587 std::env::remove_var("HARN_DEFAULT_PROVIDER");
2588 }
2589
2590 assert_eq!(infer_provider("org/model"), "openrouter");
2591 assert_eq!(infer_provider("org/team/model"), "anthropic");
2592
2593 unsafe {
2594 match prev_default_provider {
2595 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2596 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2597 }
2598 }
2599 }
2600
2601 #[test]
2602 fn test_cerebras_inference_beats_openrouter_slash_fallback() {
2603 let _guard = crate::llm::env_lock().lock().expect("env lock");
2604 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2605 unsafe {
2606 std::env::remove_var("HARN_DEFAULT_PROVIDER");
2607 }
2608
2609 assert_eq!(infer_provider("cerebras/gpt-oss-120b"), "cerebras");
2610 assert_eq!(infer_provider("cerebras/zai-glm-4.7"), "cerebras");
2611 assert_eq!(infer_provider("cerebras/llama-3.3-70b"), "cerebras");
2612
2613 unsafe {
2614 match prev_default_provider {
2615 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2616 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2617 }
2618 }
2619 }
2620
2621 #[test]
2622 fn test_direct_catalog_model_id_resolves_to_catalog_provider() {
2623 let _guard = crate::llm::env_lock().lock().expect("env lock");
2628 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2629 unsafe {
2630 std::env::remove_var("HARN_DEFAULT_PROVIDER");
2631 }
2632
2633 for model in ["gpt-oss-120b", "zai-glm-4.7", "llama-3.3-70b"] {
2634 assert_eq!(
2635 infer_provider(model),
2636 "cerebras",
2637 "{model} should route to its catalog provider"
2638 );
2639 let resolved = resolve_model_info(model);
2640 assert_eq!(resolved.id, model);
2641 assert_eq!(resolved.provider, "cerebras");
2642 }
2643
2644 unsafe {
2645 match prev_default_provider {
2646 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2647 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2648 }
2649 }
2650 }
2651
2652 #[test]
2653 fn test_equivalent_model_catalog_entries_use_capability_compatible_routes() {
2654 reset_overrides();
2655
2656 assert_eq!(
2657 wire_model_id("groq/openai/gpt-oss-120b"),
2658 "openai/gpt-oss-120b"
2659 );
2660 assert_eq!(wire_model_id("gpt-oss-120b"), "gpt-oss-120b");
2661
2662 let equivalents = equivalent_model_catalog_entries("gpt-oss-120b");
2663 let ids = equivalents
2664 .iter()
2665 .map(|(id, _)| id.as_str())
2666 .collect::<Vec<_>>();
2667
2668 assert!(
2669 ids.contains(&"groq/openai/gpt-oss-120b"),
2670 "Cerebras GPT-OSS should surface the Groq serving variant"
2671 );
2672 assert!(
2673 !ids.contains(&"gpt-oss-120b"),
2674 "equivalence results should not include the source row"
2675 );
2676 assert!(equivalents.iter().all(|(_, model)| {
2677 model.equivalence_group.as_deref() == Some("openai-gpt-oss-120b")
2678 }));
2679 }
2680
2681 #[test]
2682 fn test_user_catalog_overlay_re_homes_model_provider() {
2683 reset_overrides();
2687 let mut overlay = ProvidersConfig::default();
2688 overlay.models.insert(
2689 "gpt-4o".to_string(),
2690 ModelDef {
2691 name: "GPT-4o via OpenRouter".to_string(),
2692 provider: "openrouter".to_string(),
2693 context_window: 128_000,
2694 logical_model: None,
2695 equivalence_group: None,
2696 served_variant: None,
2697 wire_model: None,
2698 api_dialect: None,
2699 rate_limits: None,
2700 architecture: None,
2701 local_memory: None,
2702 runtime_context_window: None,
2703 stream_timeout: None,
2704 capabilities: Vec::new(),
2705 pricing: None,
2706 deprecated: false,
2707 deprecation_note: None,
2708 superseded_by: None,
2709 fast_mode: None,
2710 quality_tags: Vec::new(),
2711 availability: ModelAvailability::default(),
2712 tier: None,
2713 open_weight: None,
2714 strengths: Vec::new(),
2715 benchmarks: std::collections::BTreeMap::new(),
2716 family: None,
2717 lineage: None,
2718 complementary_with: Vec::new(),
2719 avoid_as_reviewer_for: Vec::new(),
2720 },
2721 );
2722 set_user_overrides(Some(overlay));
2723
2724 assert_eq!(infer_provider("gpt-4o"), "openrouter");
2725
2726 reset_overrides();
2727 }
2728
2729 #[test]
2730 fn test_resolve_model_info_normalizes_provider_prefixes() {
2731 let local = resolve_model_info("local:gemma-4-e4b-it");
2732 assert_eq!(local.id, "gemma-4-e4b-it");
2733 assert_eq!(local.provider, "ollama");
2734
2735 let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
2736 assert_eq!(ollama.id, "qwen3:30b-a3b");
2737 assert_eq!(ollama.provider, "ollama");
2738
2739 let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
2740 assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
2741 assert_eq!(hf.provider, "huggingface");
2742
2743 let cerebras = resolve_model_info("cerebras/gpt-oss-120b");
2744 assert_eq!(cerebras.id, "gpt-oss-120b");
2745 assert_eq!(cerebras.provider, "cerebras");
2746
2747 let cerebras_glm = resolve_model_info("cerebras/zai-glm-4.7");
2748 assert_eq!(cerebras_glm.id, "zai-glm-4.7");
2749 assert_eq!(cerebras_glm.provider, "cerebras");
2750 }
2751
2752 #[test]
2753 fn test_model_tier_from_defaults() {
2754 assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
2758 assert_eq!(model_tier("gpt-4o"), "frontier");
2759 assert_eq!(model_tier("Qwen/Qwen3.5-9B"), "small");
2760 assert_eq!(model_tier("deepseek-v4-flash"), "mid");
2761 assert_eq!(model_tier("deepseek-v4-pro"), "frontier");
2762 assert_eq!(model_tier("MiniMax-M2.7"), "frontier");
2763 assert_eq!(model_tier("glm-5.1"), "frontier");
2764 assert_eq!(model_tier("definitely-not-a-real-model"), "mid");
2766 }
2767
2768 #[test]
2769 fn test_model_family_preserves_underlying_hosted_lineage() {
2770 assert_eq!(
2771 model_family("openrouter", "anthropic/claude-sonnet-4-6"),
2772 "anthropic-claude"
2773 );
2774 assert_eq!(
2775 model_family("openrouter", "google/gemini-2.5-flash"),
2776 "google-gemini"
2777 );
2778 assert_eq!(
2779 model_family("openrouter", "openai/o3-mini"),
2780 "openai-reasoning"
2781 );
2782 assert_eq!(model_lineage("openrouter", "openai/gpt-5.5"), "openai-gpt5");
2783 assert_eq!(
2784 model_lineage("openrouter", "openai/o3-mini"),
2785 "openai-reasoning"
2786 );
2787 assert_eq!(
2788 model_lineage("anthropic", "claude-opus-4-8"),
2789 "claude-opus-adaptive"
2790 );
2791 assert_eq!(model_lineage("llamacpp", "qwen3.6-35b-a3b"), "qwen3");
2792 }
2793
2794 #[test]
2795 fn test_complementary_reviewer_uses_different_family() {
2796 let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
2797 author_model: "claude-sonnet-4-6".to_string(),
2798 author_provider: None,
2799 intent: ComplementaryReviewerIntent::PlanReview,
2800 max_price_multiplier: Some(3.0),
2801 });
2802
2803 assert!(!selection.fallback, "{selection:?}");
2804 assert_eq!(selection.author.family, "anthropic-claude");
2805 assert_ne!(selection.reviewer.family, selection.author.family);
2806 assert_eq!(selection.reviewer.tier, "frontier");
2807 assert!(selection.estimated_incremental_cost.is_some());
2808 }
2809
2810 #[test]
2811 fn test_complementary_reviewer_falls_back_deterministically_on_price_cap() {
2812 let selection = pick_complementary_reviewer(ComplementaryReviewerOptions {
2813 author_model: "gpt-4o-mini".to_string(),
2814 author_provider: Some("openai".to_string()),
2815 intent: ComplementaryReviewerIntent::Review,
2816 max_price_multiplier: Some(0.01),
2817 });
2818
2819 assert!(selection.fallback, "{selection:?}");
2820 assert_eq!(selection.reviewer.id, "gpt-4o-mini");
2821 assert_eq!(selection.reviewer.family, selection.author.family);
2822 assert!(selection
2823 .fallback_reason
2824 .as_deref()
2825 .is_some_and(|reason| reason.contains("max_price_multiplier")));
2826 }
2827
2828 #[test]
2829 fn test_resolve_model_unknown_alias() {
2830 let (id, provider) = resolve_model("gpt-4o");
2831 assert_eq!(id, "gpt-4o");
2832 assert!(provider.is_none());
2833 }
2834
2835 #[test]
2836 fn test_provider_names() {
2837 let names = provider_names();
2838 assert!(names.len() >= 7);
2839 assert!(names.contains(&"anthropic".to_string()));
2840 assert!(names.contains(&"together".to_string()));
2841 assert!(names.contains(&"local".to_string()));
2842 assert!(names.contains(&"mlx".to_string()));
2843 assert!(names.contains(&"openai".to_string()));
2844 assert!(names.contains(&"ollama".to_string()));
2845 assert!(names.contains(&"bedrock".to_string()));
2846 assert!(names.contains(&"azure_openai".to_string()));
2847 assert!(names.contains(&"vertex".to_string()));
2848 }
2849
2850 #[test]
2851 fn global_provider_file_is_an_overlay_on_builtin_defaults() {
2852 let mut overlay = ProvidersConfig {
2853 default_provider: Some("ollama".to_string()),
2854 ..Default::default()
2855 };
2856 overlay.aliases.insert(
2857 "quickstart".to_string(),
2858 AliasDef {
2859 id: "llama3.2".to_string(),
2860 provider: "ollama".to_string(),
2861 tool_format: None,
2862 },
2863 );
2864
2865 let merged = merge_global_config(overlay);
2866
2867 assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
2868 assert!(merged.providers.contains_key("anthropic"));
2869 assert!(merged.providers.contains_key("ollama"));
2870 assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
2871 }
2872
2873 #[test]
2874 fn partial_provider_overlay_preserves_builtin_provider_metadata() {
2875 let overlay = parse_config_toml(
2876 r#"
2877 [providers.ollama]
2878 base_url = "http://localhost:11435"
2879 extra_headers = { "x-local" = "1" }
2880 "#,
2881 )
2882 .expect("provider overlay parses");
2883
2884 let merged = merge_global_config(overlay);
2885 let ollama = merged
2886 .providers
2887 .get("ollama")
2888 .expect("ollama remains configured");
2889
2890 assert_eq!(ollama.base_url, "http://localhost:11435");
2891 assert_eq!(ollama.auth_style, "none");
2892 assert_eq!(ollama.chat_endpoint, "/api/chat");
2893 assert_eq!(ollama.completion_endpoint.as_deref(), Some("/api/generate"));
2894 assert_eq!(ollama.cost_per_1k_in, Some(0.0));
2895 assert_eq!(ollama.cost_per_1k_out, Some(0.0));
2896 assert_eq!(
2897 ollama
2898 .healthcheck
2899 .as_ref()
2900 .and_then(|healthcheck| healthcheck.path.as_deref()),
2901 Some("/api/tags")
2902 );
2903 assert_eq!(
2904 ollama.extra_headers.get("x-local").map(String::as_str),
2905 Some("1")
2906 );
2907 }
2908
2909 #[test]
2910 fn partial_provider_overlay_can_explicitly_replace_default_auth_style() {
2911 let overlay = parse_config_toml(
2912 r#"
2913 [providers.ollama]
2914 auth_style = "bearer"
2915 auth_env = "OLLAMA_API_KEY"
2916 "#,
2917 )
2918 .expect("provider overlay parses");
2919
2920 let merged = merge_global_config(overlay);
2921 let ollama = merged
2922 .providers
2923 .get("ollama")
2924 .expect("ollama remains configured");
2925
2926 assert_eq!(ollama.auth_style, "bearer");
2927 assert_eq!(auth_env_names(&ollama.auth_env), vec!["OLLAMA_API_KEY"]);
2928 assert_eq!(ollama.chat_endpoint, "/api/chat");
2929 }
2930
2931 #[test]
2932 fn test_resolve_tier_model_default_aliases() {
2933 let (model, provider) = resolve_tier_model("frontier", None)
2938 .expect("frontier alias must resolve from the embedded catalog");
2939 assert_eq!(provider, "anthropic");
2940 assert!(
2941 model_catalog_entry(&model)
2942 .is_some_and(|entry| entry.provider == "anthropic" && !entry.deprecated),
2943 "frontier alias must point at a registered, non-deprecated anthropic model (got {model})"
2944 );
2945
2946 let (model, provider) = resolve_tier_model("small", None)
2947 .expect("small alias must resolve from the embedded catalog");
2948 assert!(
2949 [
2950 "openrouter",
2951 "huggingface",
2952 "local",
2953 "llamacpp",
2954 "mlx",
2955 "ollama"
2956 ]
2957 .contains(&provider.as_str()),
2958 "small tier should resolve to an open-weight provider (got {provider} / {model})"
2959 );
2960 }
2961
2962 #[test]
2963 fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
2964 let (model, provider) = resolve_tier_model("mid", Some("openai"))
2968 .expect("mid tier scoped to openai must resolve");
2969 assert_eq!(provider, "openai");
2970 assert!(
2971 model_catalog_entry(&model).is_some(),
2972 "mid/openai alias must point at a registered model (got {model})"
2973 );
2974 }
2975
2976 #[test]
2977 fn test_provider_config_anthropic() {
2978 let pdef = provider_config("anthropic").unwrap();
2979 assert_eq!(pdef.auth_style, "header");
2980 assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
2981 }
2982
2983 #[test]
2984 fn test_provider_config_mlx() {
2985 let pdef = provider_config("mlx").unwrap();
2986 assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
2987 assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
2988 assert_eq!(
2989 pdef.healthcheck.unwrap().path.as_deref(),
2990 Some("/v1/models")
2991 );
2992
2993 let (model, provider) = resolve_model("mlx-qwen36-27b");
2994 assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
2995 assert_eq!(provider.as_deref(), Some("mlx"));
2996 }
2997
2998 #[test]
2999 fn test_enterprise_provider_defaults_and_inference() {
3000 let bedrock = provider_config("bedrock").unwrap();
3001 assert_eq!(bedrock.auth_style, "aws_sigv4");
3002 assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
3003 assert_eq!(
3004 infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
3005 "bedrock"
3006 );
3007 assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
3008
3009 let azure = provider_config("azure_openai").unwrap();
3010 assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
3011 assert_eq!(
3012 auth_env_names(&azure.auth_env),
3013 vec![
3014 "AZURE_OPENAI_API_KEY".to_string(),
3015 "AZURE_OPENAI_AD_TOKEN".to_string(),
3016 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
3017 ]
3018 );
3019
3020 let vertex = provider_config("vertex").unwrap();
3021 assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
3022 assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
3023 }
3024
3025 #[test]
3026 fn test_default_provider_env_override_for_unknown_model() {
3027 let _guard = crate::llm::env_lock().lock().expect("env lock");
3028 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3029 unsafe {
3030 std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
3031 }
3032
3033 let inference = infer_provider_detail("unknown-model");
3034
3035 unsafe {
3036 match prev_default_provider {
3037 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3038 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3039 }
3040 }
3041
3042 assert_eq!(inference.provider, "openai");
3043 assert_eq!(
3044 inference.source,
3045 crate::llm::provider::ProviderInferenceSource::DefaultFallback
3046 );
3047 }
3048
3049 #[test]
3050 fn test_unknown_model_family_ignores_default_provider_fallback() {
3051 let _guard = crate::llm::env_lock().lock().expect("env lock");
3052 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
3053 unsafe {
3054 std::env::set_var("HARN_DEFAULT_PROVIDER", "ollama");
3055 }
3056
3057 let unknown = resolve_model_info("mystery-model-xyz");
3058 let known_family = resolve_model_info("deepseek-mystery-model");
3059
3060 unsafe {
3061 match prev_default_provider {
3062 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
3063 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
3064 }
3065 }
3066
3067 assert_eq!(unknown.provider, "ollama");
3068 assert_eq!(unknown.family, "unknown");
3069 assert_eq!(unknown.lineage, "unknown");
3070 assert_eq!(known_family.family, "deepseek");
3071 assert_eq!(known_family.lineage, "deepseek");
3072 }
3073
3074 #[test]
3075 fn test_resolve_base_url_no_env() {
3076 let pdef = ProviderDef {
3077 base_url: "https://example.com".to_string(),
3078 ..Default::default()
3079 };
3080 assert_eq!(resolve_base_url(&pdef), "https://example.com");
3081 }
3082
3083 #[test]
3084 fn test_default_config_roundtrip() {
3085 let config = default_config();
3086 assert!(!config.providers.is_empty());
3087 assert!(!config.inference_rules.is_empty());
3088 assert_eq!(config.tier_defaults.default, "mid");
3091 let frontiers = config
3093 .models
3094 .iter()
3095 .filter(|(_, m)| m.tier.as_deref() == Some("frontier"))
3096 .count();
3097 assert!(
3098 frontiers >= 4,
3099 "expected at least 4 frontier-tagged models, got {frontiers}"
3100 );
3101 }
3102
3103 #[test]
3104 fn test_local_ollama_catalog_metadata() {
3105 reset_overrides();
3106
3107 let devstral =
3108 model_catalog_entry("devstral-small-2:24b").expect("devstral-small-2 catalog entry");
3109 assert_eq!(devstral.context_window, 262_144);
3110 assert!(!devstral.capabilities.iter().any(|cap| cap == "vision"));
3111
3112 let gemma4 = model_catalog_entry("gemma4:26b").expect("gemma4 catalog entry");
3113 assert_eq!(gemma4.context_window, 262_144);
3114 assert!(gemma4.capabilities.iter().any(|cap| cap == "vision"));
3115 }
3116
3117 #[test]
3118 fn test_external_config_overlays_default_catalog() {
3119 let mut config = default_config();
3120 let mut overlay = ProvidersConfig {
3121 default_provider: Some("ollama".to_string()),
3122 ..Default::default()
3123 };
3124 overlay.providers.insert(
3125 "custom".to_string(),
3126 ProviderDef {
3127 base_url: "https://llm.example.test/v1".to_string(),
3128 chat_endpoint: "/chat/completions".to_string(),
3129 ..Default::default()
3130 },
3131 );
3132
3133 config.merge_from(&overlay);
3134
3135 assert_eq!(config.default_provider.as_deref(), Some("ollama"));
3136 assert!(config.providers.contains_key("custom"));
3137 assert!(config.providers.contains_key("anthropic"));
3138 assert!(config.providers.contains_key("ollama"));
3139 }
3140
3141 #[test]
3142 fn test_model_params_empty() {
3143 let params = model_params("claude-sonnet-4-20250514");
3144 assert!(params.is_empty());
3145 }
3146
3147 #[test]
3148 fn test_user_overrides_add_provider_and_alias() {
3149 reset_overrides();
3150 let mut overlay = ProvidersConfig::default();
3151 overlay.providers.insert(
3152 "acme".to_string(),
3153 ProviderDef {
3154 base_url: "https://llm.acme.test/v1".to_string(),
3155 chat_endpoint: "/chat/completions".to_string(),
3156 ..Default::default()
3157 },
3158 );
3159 overlay.aliases.insert(
3160 "acme-fast".to_string(),
3161 AliasDef {
3162 id: "acme/model-fast".to_string(),
3163 provider: "acme".to_string(),
3164 tool_format: Some("native".to_string()),
3165 },
3166 );
3167 set_user_overrides(Some(overlay));
3168
3169 let (model, provider) = resolve_model("acme-fast");
3170 assert_eq!(model, "acme/model-fast");
3171 assert_eq!(provider.as_deref(), Some("acme"));
3172 assert!(provider_names().contains(&"acme".to_string()));
3173 assert_eq!(
3174 provider_config("acme").map(|provider| provider.base_url),
3175 Some("https://llm.acme.test/v1".to_string())
3176 );
3177
3178 reset_overrides();
3179 }
3180
3181 #[test]
3182 fn test_default_tool_format_uses_capability_matrix() {
3183 reset_overrides();
3184
3185 assert_eq!(
3186 default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
3187 "native"
3188 );
3189 assert_eq!(
3190 default_tool_format("devstral-small-2:24b", "ollama"),
3191 "text"
3192 );
3193 assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "native");
3197 assert_eq!(
3198 default_tool_format("deepseek/deepseek-v3.2", "openrouter"),
3199 "text"
3200 );
3201 assert_eq!(
3202 default_tool_format("qwen/qwen3-coder-flash", "openrouter"),
3203 "text"
3204 );
3205 }
3206
3207 #[test]
3208 fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
3209 reset_overrides();
3210 let mut overlay = ProvidersConfig::default();
3211 overlay.models.insert(
3212 "acme/model-fast".to_string(),
3213 ModelDef {
3214 name: "Acme Fast".to_string(),
3215 provider: "acme".to_string(),
3216 context_window: 65_536,
3217 logical_model: None,
3218 equivalence_group: None,
3219 served_variant: None,
3220 wire_model: None,
3221 api_dialect: None,
3222 rate_limits: None,
3223 architecture: None,
3224 local_memory: None,
3225 runtime_context_window: None,
3226 stream_timeout: Some(42.0),
3227 capabilities: vec!["tools".to_string(), "streaming".to_string()],
3228 pricing: Some(ModelPricing {
3229 input_per_mtok: 1.25,
3230 output_per_mtok: 2.5,
3231 cache_read_per_mtok: Some(0.25),
3232 cache_write_per_mtok: None,
3233 }),
3234 deprecated: false,
3235 deprecation_note: None,
3236 superseded_by: None,
3237 fast_mode: None,
3238 quality_tags: Vec::new(),
3239 availability: ModelAvailability::default(),
3240 tier: None,
3241 open_weight: None,
3242 strengths: Vec::new(),
3243 benchmarks: std::collections::BTreeMap::new(),
3244 family: None,
3245 lineage: None,
3246 complementary_with: Vec::new(),
3247 avoid_as_reviewer_for: Vec::new(),
3248 },
3249 );
3250 overlay
3251 .qc_defaults
3252 .insert("acme".to_string(), "acme/model-cheap".to_string());
3253 set_user_overrides(Some(overlay));
3254
3255 let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
3256 assert_eq!(entry.context_window, 65_536);
3257 assert_eq!(
3258 entry.capabilities,
3259 vec!["streaming".to_string(), "tools".to_string()]
3260 );
3261 assert_eq!(
3262 entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
3263 Some(1.25)
3264 );
3265 assert_eq!(
3266 pricing_per_1k_for("acme", "acme/model-fast"),
3267 Some((0.00125, 0.0025))
3268 );
3269 assert_eq!(
3270 qc_default_model("acme").as_deref(),
3271 Some("acme/model-cheap")
3272 );
3273
3274 reset_overrides();
3275 }
3276
3277 #[test]
3278 fn test_user_overrides_prepend_inference_rules() {
3279 reset_overrides();
3280 let mut overlay = ProvidersConfig::default();
3281 overlay.inference_rules.push(InferenceRule {
3282 pattern: Some("internal-*".to_string()),
3283 contains: None,
3284 exact: None,
3285 provider: "openai".to_string(),
3286 });
3287 set_user_overrides(Some(overlay));
3288
3289 assert_eq!(infer_provider("internal-foo"), "openai");
3290
3291 reset_overrides();
3292 }
3293
3294 #[test]
3301 fn embedded_providers_toml_parses_and_is_not_trivially_empty() {
3302 let config = default_config();
3303 assert!(
3304 config.providers.len() >= 10,
3305 "expected >=10 providers in embedded catalog, got {}",
3306 config.providers.len()
3307 );
3308 assert!(
3309 config.models.len() >= 20,
3310 "expected >=20 models in embedded catalog, got {}",
3311 config.models.len()
3312 );
3313 assert!(
3314 config.aliases.len() >= 15,
3315 "expected >=15 aliases in embedded catalog, got {}",
3316 config.aliases.len()
3317 );
3318 assert_eq!(config.default_provider.as_deref(), Some("anthropic"));
3319 }
3320
3321 #[test]
3322 fn embedded_catalog_every_deprecated_model_has_a_note() {
3323 let config = default_config();
3324 let offenders: Vec<&str> = config
3325 .models
3326 .iter()
3327 .filter(|(_, model)| {
3328 model.deprecated
3329 && model
3330 .deprecation_note
3331 .as_deref()
3332 .unwrap_or("")
3333 .trim()
3334 .is_empty()
3335 })
3336 .map(|(id, _)| id.as_str())
3337 .collect();
3338 assert!(
3339 offenders.is_empty(),
3340 "deprecated models missing a deprecation_note: {offenders:?}"
3341 );
3342 }
3343
3344 #[test]
3345 fn embedded_cerebras_catalog_separates_public_and_dedicated_routes() {
3346 let config = default_config();
3347 for id in ["gpt-oss-120b", "zai-glm-4.7"] {
3348 let model = config.models.get(id).expect("current public Cerebras row");
3349 assert_eq!(model.provider, "cerebras");
3350 assert_eq!(model.availability, ModelAvailability::Serverless);
3351 assert!(!model.deprecated);
3352 }
3353
3354 let llama = config
3355 .models
3356 .get("llama-3.3-70b")
3357 .expect("legacy Cerebras row");
3358 assert_eq!(llama.provider, "cerebras");
3359 assert_eq!(llama.availability, ModelAvailability::Dedicated);
3360 assert!(llama.deprecated);
3361 }
3362
3363 #[test]
3364 fn embedded_catalog_every_model_targets_a_registered_provider() {
3365 let config = default_config();
3366 let known: std::collections::BTreeSet<&str> =
3367 config.providers.keys().map(String::as_str).collect();
3368 let orphans: Vec<(&str, &str)> = config
3369 .models
3370 .iter()
3371 .filter(|(_, model)| !known.contains(model.provider.as_str()))
3372 .map(|(id, model)| (id.as_str(), model.provider.as_str()))
3373 .collect();
3374 assert!(
3375 orphans.is_empty(),
3376 "models reference unknown providers: {orphans:?}"
3377 );
3378 }
3379
3380 #[test]
3381 fn embedded_catalog_every_alias_targets_a_registered_provider() {
3382 let config = default_config();
3383 let known: std::collections::BTreeSet<&str> =
3384 config.providers.keys().map(String::as_str).collect();
3385 let orphans: Vec<(&str, &str)> = config
3386 .aliases
3387 .iter()
3388 .filter(|(_, alias)| !known.contains(alias.provider.as_str()))
3389 .map(|(name, alias)| (name.as_str(), alias.provider.as_str()))
3390 .collect();
3391 assert!(
3392 orphans.is_empty(),
3393 "aliases reference unknown providers: {orphans:?}"
3394 );
3395 }
3396
3397 #[test]
3398 fn embedded_catalog_every_qc_default_targets_a_known_model() {
3399 let config = default_config();
3400 let orphans: Vec<(&str, &str)> = config
3401 .qc_defaults
3402 .iter()
3403 .filter(|(_, model_id)| !config.models.contains_key(model_id.as_str()))
3404 .map(|(provider, model_id)| (provider.as_str(), model_id.as_str()))
3405 .collect();
3406 assert!(
3407 orphans.is_empty(),
3408 "qc_defaults reference unknown models: {orphans:?}"
3409 );
3410 }
3411
3412 #[test]
3413 fn embedded_catalog_pricing_rates_are_non_negative() {
3414 let config = default_config();
3415 for (id, model) in &config.models {
3416 let Some(pricing) = &model.pricing else {
3417 continue;
3418 };
3419 assert!(
3420 pricing.input_per_mtok >= 0.0 && pricing.output_per_mtok >= 0.0,
3421 "{id}: negative pricing — in={} out={}",
3422 pricing.input_per_mtok,
3423 pricing.output_per_mtok
3424 );
3425 if let Some(rate) = pricing.cache_read_per_mtok {
3426 assert!(rate >= 0.0, "{id}: negative cache_read rate {rate}");
3427 }
3428 if let Some(rate) = pricing.cache_write_per_mtok {
3429 assert!(rate >= 0.0, "{id}: negative cache_write rate {rate}");
3430 }
3431 }
3432 }
3433
3434 #[test]
3435 fn model_availability_parses_known_strings() {
3436 assert_eq!(
3437 ModelAvailability::parse("serverless"),
3438 Some(ModelAvailability::Serverless)
3439 );
3440 assert_eq!(
3441 ModelAvailability::parse("dedicated"),
3442 Some(ModelAvailability::Dedicated)
3443 );
3444 assert_eq!(
3445 ModelAvailability::parse("unknown"),
3446 Some(ModelAvailability::Unknown)
3447 );
3448 assert_eq!(ModelAvailability::parse("provisioned"), None);
3449 for value in [
3450 ModelAvailability::Serverless,
3451 ModelAvailability::Dedicated,
3452 ModelAvailability::Unknown,
3453 ] {
3454 assert_eq!(ModelAvailability::parse(value.as_str()), Some(value));
3455 }
3456 }
3457
3458 #[test]
3459 fn embedded_catalog_marks_together_dedicated_route_as_dedicated() {
3460 let config = default_config();
3461 let model = config
3462 .models
3463 .get("Qwen/Qwen3-Coder-Next-FP8")
3464 .expect("Together Qwen3 Coder Next FP8 is cataloged");
3465 assert_eq!(model.provider, "together");
3466 assert_eq!(model.availability, ModelAvailability::Dedicated);
3467 }
3468
3469 #[test]
3470 fn embedded_catalog_dedicated_models_are_not_targeted_by_tier_aliases() {
3471 let config = default_config();
3475 let dedicated: std::collections::BTreeSet<(&str, &str)> = config
3476 .models
3477 .iter()
3478 .filter(|(_, model)| model.availability == ModelAvailability::Dedicated)
3479 .map(|(id, model)| (model.provider.as_str(), id.as_str()))
3480 .collect();
3481 for (name, alias) in &config.aliases {
3482 if matches!(
3483 name.as_str(),
3484 "frontier"
3485 | "mid"
3486 | "small"
3487 | "tier/frontier"
3488 | "tier/mid"
3489 | "tier/small"
3490 | "sonnet"
3491 | "opus"
3492 | "haiku"
3493 ) {
3494 assert!(
3495 !dedicated.contains(&(alias.provider.as_str(), alias.id.as_str())),
3496 "tier alias `{name}` targets dedicated-only route `{}/{}`",
3497 alias.provider,
3498 alias.id,
3499 );
3500 }
3501 }
3502 }
3503
3504 #[test]
3505 fn embedded_catalog_tier_aliases_resolve_to_active_models() {
3506 for alias in ["frontier", "mid", "small"] {
3510 let (model, _provider) = resolve_tier_model(alias, None)
3511 .unwrap_or_else(|| panic!("tier alias `{alias}` must resolve"));
3512 let entry = model_catalog_entry(&model).unwrap_or_else(|| {
3513 panic!("tier alias `{alias}` -> `{model}` must be a registered catalog entry")
3514 });
3515 assert!(
3516 !entry.deprecated,
3517 "tier alias `{alias}` resolves to deprecated model `{model}` ({:?})",
3518 entry.deprecation_note
3519 );
3520 }
3521 }
3522
3523 #[test]
3524 fn opus_alias_tracks_claude_opus_4_8_with_fast_mode() {
3525 let (model, provider) = resolve_model("opus");
3528 assert_eq!(model, "claude-opus-4-8");
3529 assert_eq!(provider.as_deref(), Some("anthropic"));
3530
3531 let opus48 = model_catalog_entry("claude-opus-4-8").expect("opus 4.8 catalog entry");
3532 assert!(!opus48.deprecated, "newest Opus must not be deprecated");
3533 let fast = opus48.fast_mode.expect("opus 4.8 advertises fast mode");
3534 assert_eq!(fast.param, "speed");
3535 assert_eq!(fast.value, "fast");
3536 assert_eq!(fast.status.as_deref(), Some("research_preview"));
3537 let fast_pricing = fast.pricing.expect("fast mode carries premium pricing");
3538 let standard = opus48.pricing.expect("opus 4.8 standard pricing");
3539 assert!(
3540 fast_pricing.input_per_mtok > standard.input_per_mtok,
3541 "fast mode must be premium-priced relative to standard"
3542 );
3543 }
3544
3545 #[test]
3546 fn superseded_opus_models_point_at_claude_opus_4_8() {
3547 for model in ["claude-opus-4-7", "claude-opus-4-6"] {
3550 let entry =
3551 model_catalog_entry(model).unwrap_or_else(|| panic!("{model} catalog entry"));
3552 assert!(entry.deprecated, "{model} should be deprecated");
3553 assert_eq!(
3554 entry.superseded_by.as_deref(),
3555 Some("claude-opus-4-8"),
3556 "{model} should be superseded by claude-opus-4-8"
3557 );
3558 }
3559 }
3560
3561 #[test]
3562 fn gpt_5_5_fast_mode_rides_service_tier() {
3563 let entry = model_catalog_entry("gpt-5.5").expect("gpt-5.5 catalog entry");
3566 let fast = entry.fast_mode.expect("gpt-5.5 advertises a fast tier");
3567 assert_eq!(fast.param, "service_tier");
3568 assert_eq!(fast.status.as_deref(), Some("ga"));
3569 }
3570}