1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::BTreeMap;
4use std::sync::OnceLock;
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8
9thread_local! {
10 static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
15}
16
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct ProvidersConfig {
19 #[serde(default)]
20 pub default_provider: Option<String>,
21 #[serde(default)]
22 pub providers: BTreeMap<String, ProviderDef>,
23 #[serde(default)]
24 pub aliases: BTreeMap<String, AliasDef>,
25 #[serde(default)]
26 pub models: BTreeMap<String, ModelDef>,
27 #[serde(default)]
28 pub qc_defaults: BTreeMap<String, String>,
29 #[serde(default)]
30 pub inference_rules: Vec<InferenceRule>,
31 #[serde(default)]
32 pub tier_rules: Vec<TierRule>,
33 #[serde(default)]
34 pub tier_defaults: TierDefaults,
35 #[serde(default)]
36 pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
37}
38
39impl ProvidersConfig {
40 pub fn is_empty(&self) -> bool {
41 self.default_provider.is_none()
42 && self.providers.is_empty()
43 && self.aliases.is_empty()
44 && self.models.is_empty()
45 && self.qc_defaults.is_empty()
46 && self.inference_rules.is_empty()
47 && self.tier_rules.is_empty()
48 && self.model_defaults.is_empty()
49 && self.tier_defaults.default == default_mid()
50 }
51
52 pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
53 self.providers.extend(overlay.providers.clone());
54 self.aliases.extend(overlay.aliases.clone());
55 self.models.extend(overlay.models.clone());
56 self.qc_defaults.extend(overlay.qc_defaults.clone());
57
58 if overlay.default_provider.is_some() {
59 self.default_provider = overlay.default_provider.clone();
60 }
61
62 if !overlay.inference_rules.is_empty() {
63 let mut merged = overlay.inference_rules.clone();
64 merged.extend(self.inference_rules.clone());
65 self.inference_rules = merged;
66 }
67
68 if !overlay.tier_rules.is_empty() {
69 let mut merged = overlay.tier_rules.clone();
70 merged.extend(self.tier_rules.clone());
71 self.tier_rules = merged;
72 }
73
74 if overlay.tier_defaults.default != default_mid() {
75 self.tier_defaults = overlay.tier_defaults.clone();
76 }
77
78 for (pattern, defaults) in &overlay.model_defaults {
79 self.model_defaults
80 .entry(pattern.clone())
81 .or_default()
82 .extend(defaults.clone());
83 }
84 }
85}
86
87#[derive(Debug, Clone, Deserialize)]
88pub struct ProviderDef {
89 #[serde(default)]
90 pub display_name: Option<String>,
91 #[serde(default)]
92 pub icon: Option<String>,
93 pub base_url: String,
94 #[serde(default)]
95 pub base_url_env: Option<String>,
96 #[serde(default = "default_bearer")]
97 pub auth_style: String,
98 #[serde(default)]
99 pub auth_header: Option<String>,
100 #[serde(default)]
101 pub auth_env: AuthEnv,
102 #[serde(default)]
103 pub extra_headers: BTreeMap<String, String>,
104 #[serde(default)]
105 pub chat_endpoint: String,
106 #[serde(default)]
107 pub completion_endpoint: Option<String>,
108 #[serde(default)]
109 pub healthcheck: Option<HealthcheckDef>,
110 #[serde(default)]
111 pub features: Vec<String>,
112 #[serde(default)]
114 pub fallback: Option<String>,
115 #[serde(default)]
117 pub retry_count: Option<u32>,
118 #[serde(default)]
120 pub retry_delay_ms: Option<u64>,
121 #[serde(default)]
123 pub rpm: Option<u32>,
124 #[serde(default)]
126 pub cost_per_1k_in: Option<f64>,
127 #[serde(default)]
129 pub cost_per_1k_out: Option<f64>,
130 #[serde(default)]
132 pub latency_p50_ms: Option<u64>,
133}
134
135impl Default for ProviderDef {
136 fn default() -> Self {
137 Self {
138 display_name: None,
139 icon: None,
140 base_url: String::new(),
141 base_url_env: None,
142 auth_style: default_bearer(),
143 auth_header: None,
144 auth_env: AuthEnv::None,
145 extra_headers: BTreeMap::new(),
146 chat_endpoint: String::new(),
147 completion_endpoint: None,
148 healthcheck: None,
149 features: Vec::new(),
150 fallback: None,
151 retry_count: None,
152 retry_delay_ms: None,
153 rpm: None,
154 cost_per_1k_in: None,
155 cost_per_1k_out: None,
156 latency_p50_ms: None,
157 }
158 }
159}
160
161fn default_bearer() -> String {
162 "bearer".to_string()
163}
164
165#[derive(Debug, Clone, Deserialize, Default)]
168#[serde(untagged)]
169pub enum AuthEnv {
170 #[default]
171 None,
172 Single(String),
173 Multiple(Vec<String>),
174}
175
176#[derive(Debug, Clone, Deserialize)]
177pub struct HealthcheckDef {
178 pub method: String,
179 #[serde(default)]
180 pub path: Option<String>,
181 #[serde(default)]
182 pub url: Option<String>,
183 #[serde(default)]
184 pub body: Option<String>,
185}
186
187#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
188pub struct AliasDef {
189 pub id: String,
190 pub provider: String,
191 #[serde(default)]
196 pub tool_format: Option<String>,
197}
198
199#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
200pub struct ModelPricing {
201 pub input_per_mtok: f64,
202 pub output_per_mtok: f64,
203 #[serde(default)]
204 pub cache_read_per_mtok: Option<f64>,
205 #[serde(default)]
206 pub cache_write_per_mtok: Option<f64>,
207}
208
209#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
210pub struct ModelDef {
211 pub name: String,
212 pub provider: String,
213 pub context_window: u64,
214 #[serde(default)]
215 pub runtime_context_window: Option<u64>,
216 #[serde(default)]
217 pub stream_timeout: Option<f64>,
218 #[serde(default)]
219 pub capabilities: Vec<String>,
220 #[serde(default)]
221 pub pricing: Option<ModelPricing>,
222}
223
224#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
225pub struct ResolvedModel {
226 pub id: String,
227 pub provider: String,
228 pub alias: Option<String>,
229 pub tool_format: String,
230 pub tier: String,
231}
232
233#[derive(Debug, Clone, Deserialize)]
234pub struct InferenceRule {
235 #[serde(default)]
236 pub pattern: Option<String>,
237 #[serde(default)]
238 pub contains: Option<String>,
239 #[serde(default)]
240 pub exact: Option<String>,
241 pub provider: String,
242}
243
244#[derive(Debug, Clone, Deserialize)]
245pub struct TierRule {
246 #[serde(default)]
247 pub pattern: Option<String>,
248 #[serde(default)]
249 pub contains: Option<String>,
250 #[serde(default)]
251 pub exact: Option<String>,
252 pub tier: String,
253}
254
255#[derive(Debug, Clone, Deserialize)]
256pub struct TierDefaults {
257 #[serde(default = "default_mid")]
258 pub default: String,
259}
260
261impl Default for TierDefaults {
262 fn default() -> Self {
263 Self {
264 default: default_mid(),
265 }
266 }
267}
268
269fn default_mid() -> String {
270 "mid".to_string()
271}
272
273pub fn load_config() -> &'static ProvidersConfig {
275 CONFIG.get_or_init(|| {
276 let mut config = default_config();
277 let verbose_config_logging = matches!(
278 std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
279 Some("1" | "true" | "TRUE" | "yes" | "YES")
280 ) || matches!(
281 std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
282 Some("1" | "true" | "TRUE" | "yes" | "YES")
283 );
284 if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
285 if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
286 config.merge_from(&overlay);
287 let _ = CONFIG_PATH.set(path);
288 return config;
289 }
290 }
291 if let Some(home) = dirs_or_home() {
292 let path = format!("{home}/.config/harn/providers.toml");
293 if let Some(overlay) = read_external_config(&path, false) {
294 config.merge_from(&overlay);
295 let _ = CONFIG_PATH.set(path);
296 return config;
297 }
298 }
299 config
300 })
301}
302
303fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
304 match std::fs::read_to_string(path) {
305 Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
306 Ok(config) => {
307 if verbose {
308 eprintln!(
309 "[llm_config] Loaded {} providers, {} aliases from {}",
310 config.providers.len(),
311 config.aliases.len(),
312 path
313 );
314 }
315 Some(config)
316 }
317 Err(error) => {
318 eprintln!("[llm_config] TOML parse error in {}: {}", path, error);
319 None
320 }
321 },
322 Err(error) => {
323 if verbose {
324 eprintln!("[llm_config] Cannot read {}: {}", path, error);
325 }
326 None
327 }
328 }
329}
330
331pub fn loaded_config_path() -> Option<std::path::PathBuf> {
334 let _ = load_config();
336 CONFIG_PATH.get().map(std::path::PathBuf::from)
337}
338
339pub fn set_user_overrides(config: Option<ProvidersConfig>) {
343 USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
344}
345
346pub fn clear_user_overrides() {
348 set_user_overrides(None);
349}
350
351fn effective_config() -> ProvidersConfig {
352 let mut merged = load_config().clone();
353 USER_OVERRIDES.with(|cell| {
354 if let Some(overlay) = cell.borrow().as_ref() {
355 merged.merge_from(overlay);
356 }
357 });
358 merged
359}
360
361pub fn resolve_model(alias: &str) -> (String, Option<String>) {
363 let config = effective_config();
364 if let Some(a) = config.aliases.get(alias) {
365 return (a.id.clone(), Some(a.provider.clone()));
366 }
367 (normalize_model_id(alias), None)
368}
369
370pub fn normalize_model_id(raw: &str) -> String {
375 for prefix in ["ollama:", "local:", "huggingface:", "hf:"] {
376 if let Some(stripped) = raw.strip_prefix(prefix) {
377 return stripped.to_string();
378 }
379 }
380 raw.to_string()
381}
382
383pub fn resolve_model_info(selector: &str) -> ResolvedModel {
386 let config = effective_config();
387 if let Some(alias) = config.aliases.get(selector) {
388 let id = alias.id.clone();
389 let provider = alias.provider.clone();
390 let tool_format = alias
391 .tool_format
392 .clone()
393 .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
394 return ResolvedModel {
395 tier: model_tier_with_config(&config, &id),
396 id,
397 provider,
398 alias: Some(selector.to_string()),
399 tool_format,
400 };
401 }
402
403 let provider = infer_provider_with_config(&config, selector).provider;
404 let id = normalize_model_id(selector);
405 let tool_format = default_tool_format_with_config(&config, &id, &provider);
406 let tier = model_tier_with_config(&config, &id);
407 ResolvedModel {
408 id,
409 provider,
410 alias: None,
411 tool_format,
412 tier,
413 }
414}
415
416pub fn infer_provider(model_id: &str) -> String {
418 infer_provider_detail(model_id).provider
419}
420
421pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
423 let config = effective_config();
424 infer_provider_with_config(&config, model_id)
425}
426
427fn infer_provider_with_config(
428 config: &ProvidersConfig,
429 model_id: &str,
430) -> crate::llm::provider::ProviderInference {
431 if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
432 return crate::llm::provider::ProviderInference::builtin("ollama");
433 }
434 if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
435 return crate::llm::provider::ProviderInference::builtin("huggingface");
436 }
437 for rule in &config.inference_rules {
438 if let Some(exact) = &rule.exact {
439 if model_id == exact {
440 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
441 }
442 }
443 if let Some(pattern) = &rule.pattern {
444 if glob_match(pattern, model_id) {
445 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
446 }
447 }
448 if let Some(substr) = &rule.contains {
449 if model_id.contains(substr.as_str()) {
450 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
451 }
452 }
453 }
454 crate::llm::provider::infer_provider_from_model_id(
455 model_id,
456 &default_provider_with_config(config),
457 )
458}
459
460pub fn default_provider() -> String {
461 let config = effective_config();
462 default_provider_with_config(&config)
463}
464
465fn default_provider_with_config(config: &ProvidersConfig) -> String {
466 std::env::var("HARN_DEFAULT_PROVIDER")
467 .ok()
468 .map(|value| value.trim().to_string())
469 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
470 .or_else(|| {
471 config
472 .default_provider
473 .as_deref()
474 .map(str::trim)
475 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
476 .map(str::to_string)
477 })
478 .unwrap_or_else(|| "anthropic".to_string())
479}
480
481pub fn model_tier(model_id: &str) -> String {
483 let config = effective_config();
484 model_tier_with_config(&config, model_id)
485}
486
487fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
488 for rule in &config.tier_rules {
489 if let Some(exact) = &rule.exact {
490 if model_id == exact {
491 return rule.tier.clone();
492 }
493 }
494 if let Some(pattern) = &rule.pattern {
495 if glob_match(pattern, model_id) {
496 return rule.tier.clone();
497 }
498 }
499 if let Some(substr) = &rule.contains {
500 if model_id.contains(substr.as_str()) {
501 return rule.tier.clone();
502 }
503 }
504 }
505 let lower = model_id.to_lowercase();
506 if lower.contains("9b") || lower.contains("a3b") {
507 return "small".to_string();
508 }
509 if lower.starts_with("claude-") || lower == "gpt-4o" {
510 return "frontier".to_string();
511 }
512 config.tier_defaults.default.clone()
513}
514
515pub fn provider_config(name: &str) -> Option<ProviderDef> {
517 effective_config().providers.get(name).cloned()
518}
519
520pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
523 let config = effective_config();
524 let mut params = BTreeMap::new();
525 for (pattern, defaults) in &config.model_defaults {
526 if glob_match(pattern, model_id) {
527 for (k, v) in defaults {
528 params.insert(k.clone(), v.clone());
529 }
530 }
531 }
532 params
533}
534
535pub fn provider_names() -> Vec<String> {
537 effective_config().providers.keys().cloned().collect()
538}
539
540pub fn known_model_names() -> Vec<String> {
542 effective_config().aliases.keys().cloned().collect()
543}
544
545pub fn alias_entries() -> Vec<(String, AliasDef)> {
546 effective_config().aliases.into_iter().collect()
547}
548
549pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
551 let mut entries: Vec<_> = effective_config()
552 .models
553 .into_iter()
554 .map(|(id, model)| {
555 let provider = model.provider.clone();
556 (
557 id.clone(),
558 with_effective_capability_tags(id, provider, model),
559 )
560 })
561 .collect();
562 entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
563 model_a
564 .provider
565 .cmp(&model_b.provider)
566 .then_with(|| id_a.cmp(id_b))
567 });
568 entries
569}
570
571pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
572 effective_config()
573 .models
574 .get(model_id)
575 .cloned()
576 .map(|model| {
577 let provider = model.provider.clone();
578 with_effective_capability_tags(model_id.to_string(), provider, model)
579 })
580}
581
582pub fn qc_default_model(provider: &str) -> Option<String> {
583 std::env::var("BURIN_QC_MODEL")
584 .ok()
585 .filter(|value| !value.trim().is_empty())
586 .or_else(|| {
587 effective_config()
588 .qc_defaults
589 .get(&provider.to_lowercase())
590 .cloned()
591 })
592}
593
594pub fn default_model_for_provider(provider: &str) -> String {
595 match provider {
596 "local" => std::env::var("LOCAL_LLM_MODEL")
597 .or_else(|_| std::env::var("HARN_LLM_MODEL"))
598 .unwrap_or_else(|_| "gpt-4o".to_string()),
599 "mlx" => std::env::var("MLX_MODEL_ID")
600 .unwrap_or_else(|_| "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string()),
601 "openai" => "gpt-4o".to_string(),
602 "ollama" => "llama3.2".to_string(),
603 "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
604 _ => "claude-sonnet-4-20250514".to_string(),
605 }
606}
607
608pub fn qc_defaults() -> BTreeMap<String, String> {
609 effective_config().qc_defaults
610}
611
612pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
613 effective_config()
614 .models
615 .get(model_id)
616 .and_then(|model| model.pricing.clone())
617}
618
619pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
620 model_pricing_per_mtok(model_id)
621 .map(|pricing| {
622 (
623 pricing.input_per_mtok / 1000.0,
624 pricing.output_per_mtok / 1000.0,
625 )
626 })
627 .or_else(|| {
628 let (input, output, _) = provider_economics(provider);
629 match (input, output) {
630 (Some(input), Some(output)) => Some((input, output)),
631 _ => None,
632 }
633 })
634}
635
636pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
637 match auth_env {
638 AuthEnv::None => Vec::new(),
639 AuthEnv::Single(name) => vec![name.clone()],
640 AuthEnv::Multiple(names) => names.clone(),
641 }
642}
643
644pub fn provider_key_available(provider: &str) -> bool {
645 let Some(pdef) = provider_config(provider) else {
646 return provider == "ollama";
647 };
648 if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
649 return true;
650 }
651 auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
652 std::env::var(env_name)
653 .ok()
654 .is_some_and(|value| !value.trim().is_empty())
655 })
656}
657
658pub fn available_provider_names() -> Vec<String> {
659 provider_names()
660 .into_iter()
661 .filter(|provider| provider_key_available(provider))
662 .collect()
663}
664
665pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
667 provider_config(provider)
668 .map(|p| p.features.iter().any(|f| f == feature))
669 .unwrap_or(false)
670}
671
672pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
676 provider_config(provider)
677 .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
678 .unwrap_or((None, None, None))
679}
680
681pub fn default_tool_format(model: &str, provider: &str) -> String {
685 let config = effective_config();
686 default_tool_format_with_config(&config, model, provider)
687}
688
689fn default_tool_format_with_config(
690 config: &ProvidersConfig,
691 model: &str,
692 provider: &str,
693) -> String {
694 for (name, alias) in &config.aliases {
696 let matches = (alias.id == model && alias.provider == provider) || name == model;
697 if matches {
698 if let Some(ref fmt) = alias.tool_format {
699 return fmt.clone();
700 }
701 }
702 }
703 let capability_matrix_native = crate::llm::capabilities::lookup(provider, model).native_tools;
704 let legacy_provider_native = config
705 .providers
706 .get(provider)
707 .map(|p| p.features.iter().any(|f| f == "native_tools"))
708 .unwrap_or(false);
709 if capability_matrix_native || legacy_provider_native {
710 "native".to_string()
711 } else {
712 "text".to_string()
713 }
714}
715
716fn with_effective_capability_tags(
717 model_id: String,
718 provider: String,
719 mut model: ModelDef,
720) -> ModelDef {
721 model.capabilities = effective_model_capability_tags(&provider, &model_id);
722 model
723}
724
725pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
729 let caps = crate::llm::capabilities::lookup(provider, model_id);
730 let mut tags = Vec::new();
731 tags.push("streaming".to_string());
734 if caps.native_tools {
735 tags.push("tools".to_string());
736 }
737 if !caps.tool_search.is_empty() {
738 tags.push("tool_search".to_string());
739 }
740 if caps.vision || caps.vision_supported {
741 tags.push("vision".to_string());
742 }
743 if caps.audio {
744 tags.push("audio".to_string());
745 }
746 if caps.pdf {
747 tags.push("pdf".to_string());
748 }
749 if caps.files_api_supported {
750 tags.push("files".to_string());
751 }
752 if caps.prompt_caching {
753 tags.push("prompt_caching".to_string());
754 }
755 if !caps.thinking_modes.is_empty() {
756 tags.push("thinking".to_string());
757 }
758 if caps.interleaved_thinking_supported
759 || caps
760 .thinking_modes
761 .iter()
762 .any(|mode| mode == "adaptive" || mode == "effort")
763 {
764 tags.push("extended_thinking".to_string());
765 }
766 if caps.json_schema.is_some() {
767 tags.push("structured_output".to_string());
768 }
769 tags
770}
771
772pub fn resolve_tier_model(
774 target: &str,
775 preferred_provider: Option<&str>,
776) -> Option<(String, String)> {
777 let config = effective_config();
778
779 if let Some(alias) = config.aliases.get(target) {
780 return Some((alias.id.clone(), alias.provider.clone()));
781 }
782
783 let candidate_aliases = if let Some(provider) = preferred_provider {
784 vec![
785 format!("{provider}/{target}"),
786 format!("{provider}:{target}"),
787 format!("tier/{target}"),
788 target.to_string(),
789 ]
790 } else {
791 vec![format!("tier/{target}"), target.to_string()]
792 };
793
794 for alias_name in candidate_aliases {
795 if let Some(alias) = config.aliases.get(&alias_name) {
796 return Some((alias.id.clone(), alias.provider.clone()));
797 }
798 }
799
800 None
801}
802
803pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
807 let config = effective_config();
808 let mut seen = std::collections::BTreeSet::new();
809 let mut candidates = Vec::new();
810
811 for alias in config.aliases.values() {
812 let pair = (alias.id.clone(), alias.provider.clone());
813 if seen.contains(&pair) {
814 continue;
815 }
816 if model_tier(&alias.id) == target {
817 seen.insert(pair.clone());
818 candidates.push(pair);
819 }
820 }
821
822 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
823 provider_a
824 .cmp(provider_b)
825 .then_with(|| model_a.cmp(model_b))
826 });
827 candidates
828}
829
830pub fn all_model_candidates() -> Vec<(String, String)> {
833 let config = effective_config();
834 let mut seen = std::collections::BTreeSet::new();
835 let mut candidates = Vec::new();
836
837 for alias in config.aliases.values() {
838 let pair = (alias.id.clone(), alias.provider.clone());
839 if seen.insert(pair.clone()) {
840 candidates.push(pair);
841 }
842 }
843
844 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
845 provider_a
846 .cmp(provider_b)
847 .then_with(|| model_a.cmp(model_b))
848 });
849 candidates
850}
851
852fn glob_match(pattern: &str, input: &str) -> bool {
854 if let Some(prefix) = pattern.strip_suffix('*') {
855 input.starts_with(prefix)
856 } else if let Some(suffix) = pattern.strip_prefix('*') {
857 input.ends_with(suffix)
858 } else if pattern.contains('*') {
859 let parts: Vec<&str> = pattern.split('*').collect();
860 if parts.len() == 2 {
861 input.starts_with(parts[0]) && input.ends_with(parts[1])
862 } else {
863 input == pattern
864 }
865 } else {
866 input == pattern
867 }
868}
869
870fn dirs_or_home() -> Option<String> {
871 std::env::var("HOME").ok()
872}
873
874pub fn resolve_base_url(pdef: &ProviderDef) -> String {
877 if let Some(env_name) = &pdef.base_url_env {
878 if let Ok(val) = std::env::var(env_name) {
879 let trimmed = val.trim().trim_matches('"').trim_matches('\'');
881 if !trimmed.is_empty() {
882 return trimmed.to_string();
883 }
884 }
885 }
886 pdef.base_url.clone()
887}
888
889fn default_config() -> ProvidersConfig {
890 let mut config = ProvidersConfig {
891 default_provider: Some("anthropic".to_string()),
892 ..Default::default()
893 };
894
895 config.providers.insert(
896 "anthropic".to_string(),
897 ProviderDef {
898 base_url: "https://api.anthropic.com/v1".to_string(),
899 auth_style: "header".to_string(),
900 auth_header: Some("x-api-key".to_string()),
901 auth_env: AuthEnv::Single("ANTHROPIC_API_KEY".to_string()),
902 extra_headers: BTreeMap::from([(
903 "anthropic-version".to_string(),
904 "2023-06-01".to_string(),
905 )]),
906 chat_endpoint: "/messages".to_string(),
907 completion_endpoint: None,
908 healthcheck: Some(HealthcheckDef {
909 method: "POST".to_string(),
910 path: Some("/messages/count_tokens".to_string()),
911 url: None,
912 body: Some(
913 r#"{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":"x"}]}"#
914 .to_string(),
915 ),
916 }),
917 features: vec!["prompt_caching".to_string(), "thinking".to_string()],
918 cost_per_1k_in: Some(0.003),
919 cost_per_1k_out: Some(0.015),
920 latency_p50_ms: Some(2500),
921 ..Default::default()
922 },
923 );
924
925 config.providers.insert(
927 "openai".to_string(),
928 ProviderDef {
929 base_url: "https://api.openai.com/v1".to_string(),
930 auth_style: "bearer".to_string(),
931 auth_env: AuthEnv::Single("OPENAI_API_KEY".to_string()),
932 chat_endpoint: "/chat/completions".to_string(),
933 completion_endpoint: Some("/completions".to_string()),
934 healthcheck: Some(HealthcheckDef {
935 method: "GET".to_string(),
936 path: Some("/models".to_string()),
937 url: None,
938 body: None,
939 }),
940 cost_per_1k_in: Some(0.0025),
941 cost_per_1k_out: Some(0.010),
942 latency_p50_ms: Some(1800),
943 ..Default::default()
944 },
945 );
946
947 config.providers.insert(
949 "openrouter".to_string(),
950 ProviderDef {
951 base_url: "https://openrouter.ai/api/v1".to_string(),
952 auth_style: "bearer".to_string(),
953 auth_env: AuthEnv::Single("OPENROUTER_API_KEY".to_string()),
954 chat_endpoint: "/chat/completions".to_string(),
955 completion_endpoint: Some("/completions".to_string()),
956 healthcheck: Some(HealthcheckDef {
957 method: "GET".to_string(),
958 path: Some("/auth/key".to_string()),
959 url: None,
960 body: None,
961 }),
962 cost_per_1k_in: Some(0.003),
963 cost_per_1k_out: Some(0.015),
964 latency_p50_ms: Some(2200),
965 ..Default::default()
966 },
967 );
968
969 config.providers.insert(
971 "huggingface".to_string(),
972 ProviderDef {
973 base_url: "https://router.huggingface.co/v1".to_string(),
974 auth_style: "bearer".to_string(),
975 auth_env: AuthEnv::Multiple(vec![
976 "HF_TOKEN".to_string(),
977 "HUGGINGFACE_API_KEY".to_string(),
978 ]),
979 chat_endpoint: "/chat/completions".to_string(),
980 completion_endpoint: Some("/completions".to_string()),
981 healthcheck: Some(HealthcheckDef {
982 method: "GET".to_string(),
983 url: Some("https://huggingface.co/api/whoami-v2".to_string()),
984 path: None,
985 body: None,
986 }),
987 cost_per_1k_in: Some(0.0002),
988 cost_per_1k_out: Some(0.0006),
989 latency_p50_ms: Some(2400),
990 ..Default::default()
991 },
992 );
993
994 config.providers.insert(
1003 "ollama".to_string(),
1004 ProviderDef {
1005 base_url: "http://localhost:11434".to_string(),
1006 base_url_env: Some("OLLAMA_HOST".to_string()),
1007 auth_style: "none".to_string(),
1008 chat_endpoint: "/api/chat".to_string(),
1009 completion_endpoint: Some("/api/generate".to_string()),
1010 healthcheck: Some(HealthcheckDef {
1011 method: "GET".to_string(),
1012 path: Some("/api/tags".to_string()),
1013 url: None,
1014 body: None,
1015 }),
1016 cost_per_1k_in: Some(0.0),
1017 cost_per_1k_out: Some(0.0),
1018 latency_p50_ms: Some(1200),
1019 ..Default::default()
1020 },
1021 );
1022
1023 config.providers.insert(
1025 "gemini".to_string(),
1026 ProviderDef {
1027 base_url: "https://generativelanguage.googleapis.com".to_string(),
1028 base_url_env: Some("GEMINI_BASE_URL".to_string()),
1029 auth_style: "header".to_string(),
1030 auth_header: Some("x-goog-api-key".to_string()),
1031 auth_env: AuthEnv::Multiple(vec![
1032 "GEMINI_API_KEY".to_string(),
1033 "GOOGLE_API_KEY".to_string(),
1034 ]),
1035 chat_endpoint: "/v1beta/models".to_string(),
1036 healthcheck: Some(HealthcheckDef {
1037 method: "GET".to_string(),
1038 path: Some("/v1beta/models".to_string()),
1039 url: None,
1040 body: None,
1041 }),
1042 cost_per_1k_in: Some(0.00125),
1043 cost_per_1k_out: Some(0.005),
1044 latency_p50_ms: Some(1800),
1045 ..Default::default()
1046 },
1047 );
1048
1049 config.providers.insert(
1051 "together".to_string(),
1052 ProviderDef {
1053 base_url: "https://api.together.xyz/v1".to_string(),
1054 base_url_env: Some("TOGETHER_AI_BASE_URL".to_string()),
1055 auth_style: "bearer".to_string(),
1056 auth_env: AuthEnv::Single("TOGETHER_AI_API_KEY".to_string()),
1057 chat_endpoint: "/chat/completions".to_string(),
1058 completion_endpoint: Some("/completions".to_string()),
1059 healthcheck: Some(HealthcheckDef {
1060 method: "GET".to_string(),
1061 path: Some("/models".to_string()),
1062 url: None,
1063 body: None,
1064 }),
1065 cost_per_1k_in: Some(0.0002),
1066 cost_per_1k_out: Some(0.0006),
1067 latency_p50_ms: Some(1600),
1068 ..Default::default()
1069 },
1070 );
1071
1072 config.providers.insert(
1074 "groq".to_string(),
1075 ProviderDef {
1076 base_url: "https://api.groq.com/openai/v1".to_string(),
1077 base_url_env: Some("GROQ_BASE_URL".to_string()),
1078 auth_style: "bearer".to_string(),
1079 auth_env: AuthEnv::Single("GROQ_API_KEY".to_string()),
1080 chat_endpoint: "/chat/completions".to_string(),
1081 completion_endpoint: Some("/completions".to_string()),
1082 healthcheck: Some(HealthcheckDef {
1083 method: "GET".to_string(),
1084 path: Some("/models".to_string()),
1085 url: None,
1086 body: None,
1087 }),
1088 cost_per_1k_in: Some(0.0001),
1089 cost_per_1k_out: Some(0.0003),
1090 latency_p50_ms: Some(450),
1091 ..Default::default()
1092 },
1093 );
1094
1095 config.providers.insert(
1097 "deepseek".to_string(),
1098 ProviderDef {
1099 base_url: "https://api.deepseek.com/v1".to_string(),
1100 base_url_env: Some("DEEPSEEK_BASE_URL".to_string()),
1101 auth_style: "bearer".to_string(),
1102 auth_env: AuthEnv::Single("DEEPSEEK_API_KEY".to_string()),
1103 chat_endpoint: "/chat/completions".to_string(),
1104 completion_endpoint: Some("/completions".to_string()),
1105 healthcheck: Some(HealthcheckDef {
1106 method: "GET".to_string(),
1107 path: Some("/models".to_string()),
1108 url: None,
1109 body: None,
1110 }),
1111 cost_per_1k_in: Some(0.00014),
1112 cost_per_1k_out: Some(0.00028),
1113 latency_p50_ms: Some(1800),
1114 ..Default::default()
1115 },
1116 );
1117
1118 config.providers.insert(
1120 "fireworks".to_string(),
1121 ProviderDef {
1122 base_url: "https://api.fireworks.ai/inference/v1".to_string(),
1123 base_url_env: Some("FIREWORKS_BASE_URL".to_string()),
1124 auth_style: "bearer".to_string(),
1125 auth_env: AuthEnv::Single("FIREWORKS_API_KEY".to_string()),
1126 chat_endpoint: "/chat/completions".to_string(),
1127 completion_endpoint: Some("/completions".to_string()),
1128 healthcheck: Some(HealthcheckDef {
1129 method: "GET".to_string(),
1130 path: Some("/models".to_string()),
1131 url: None,
1132 body: None,
1133 }),
1134 cost_per_1k_in: Some(0.0002),
1135 cost_per_1k_out: Some(0.0006),
1136 latency_p50_ms: Some(1400),
1137 ..Default::default()
1138 },
1139 );
1140
1141 config.providers.insert(
1143 "dashscope".to_string(),
1144 ProviderDef {
1145 base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".to_string(),
1146 base_url_env: Some("DASHSCOPE_BASE_URL".to_string()),
1147 auth_style: "bearer".to_string(),
1148 auth_env: AuthEnv::Single("DASHSCOPE_API_KEY".to_string()),
1149 chat_endpoint: "/chat/completions".to_string(),
1150 completion_endpoint: Some("/completions".to_string()),
1151 healthcheck: Some(HealthcheckDef {
1152 method: "GET".to_string(),
1153 path: Some("/models".to_string()),
1154 url: None,
1155 body: None,
1156 }),
1157 cost_per_1k_in: Some(0.0003),
1158 cost_per_1k_out: Some(0.0012),
1159 latency_p50_ms: Some(1600),
1160 ..Default::default()
1161 },
1162 );
1163
1164 config.providers.insert(
1168 "bedrock".to_string(),
1169 ProviderDef {
1170 base_url: String::new(),
1171 base_url_env: Some("BEDROCK_BASE_URL".to_string()),
1172 auth_style: "aws_sigv4".to_string(),
1173 auth_env: AuthEnv::None,
1174 chat_endpoint: "/model/{model}/converse".to_string(),
1175 features: vec!["native_tools".to_string()],
1176 latency_p50_ms: Some(2600),
1177 ..Default::default()
1178 },
1179 );
1180
1181 config.providers.insert(
1185 "azure_openai".to_string(),
1186 ProviderDef {
1187 base_url: "https://{resource}.openai.azure.com".to_string(),
1188 base_url_env: Some("AZURE_OPENAI_ENDPOINT".to_string()),
1189 auth_style: "azure_openai".to_string(),
1190 auth_env: AuthEnv::Multiple(vec![
1191 "AZURE_OPENAI_API_KEY".to_string(),
1192 "AZURE_OPENAI_AD_TOKEN".to_string(),
1193 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1194 ]),
1195 chat_endpoint:
1196 "/openai/deployments/{deployment}/chat/completions?api-version={api_version}"
1197 .to_string(),
1198 features: vec!["native_tools".to_string()],
1199 cost_per_1k_in: Some(0.0025),
1200 cost_per_1k_out: Some(0.010),
1201 latency_p50_ms: Some(1900),
1202 ..Default::default()
1203 },
1204 );
1205
1206 config.providers.insert(
1208 "vertex".to_string(),
1209 ProviderDef {
1210 base_url: "https://aiplatform.googleapis.com/v1".to_string(),
1211 base_url_env: Some("VERTEX_AI_BASE_URL".to_string()),
1212 auth_style: "bearer".to_string(),
1213 auth_env: AuthEnv::Multiple(vec![
1214 "VERTEX_AI_ACCESS_TOKEN".to_string(),
1215 "GOOGLE_OAUTH_ACCESS_TOKEN".to_string(),
1216 "GOOGLE_APPLICATION_CREDENTIALS".to_string(),
1217 ]),
1218 chat_endpoint:
1219 "/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent"
1220 .to_string(),
1221 features: vec!["native_tools".to_string()],
1222 cost_per_1k_in: Some(0.00125),
1223 cost_per_1k_out: Some(0.005),
1224 latency_p50_ms: Some(2100),
1225 ..Default::default()
1226 },
1227 );
1228
1229 config.providers.insert(
1231 "local".to_string(),
1232 ProviderDef {
1233 base_url: "http://localhost:8000".to_string(),
1234 base_url_env: Some("LOCAL_LLM_BASE_URL".to_string()),
1235 auth_style: "none".to_string(),
1236 chat_endpoint: "/v1/chat/completions".to_string(),
1237 completion_endpoint: Some("/v1/completions".to_string()),
1238 healthcheck: Some(HealthcheckDef {
1239 method: "GET".to_string(),
1240 path: Some("/v1/models".to_string()),
1241 url: None,
1242 body: None,
1243 }),
1244 cost_per_1k_in: Some(0.0),
1245 cost_per_1k_out: Some(0.0),
1246 latency_p50_ms: Some(900),
1247 ..Default::default()
1248 },
1249 );
1250
1251 config.providers.insert(
1255 "llamacpp".to_string(),
1256 ProviderDef {
1257 base_url: "http://127.0.0.1:8001".to_string(),
1258 base_url_env: Some("LLAMACPP_BASE_URL".to_string()),
1259 auth_style: "none".to_string(),
1260 chat_endpoint: "/v1/chat/completions".to_string(),
1261 completion_endpoint: Some("/v1/completions".to_string()),
1262 healthcheck: Some(HealthcheckDef {
1263 method: "GET".to_string(),
1264 path: Some("/v1/models".to_string()),
1265 url: None,
1266 body: None,
1267 }),
1268 cost_per_1k_in: Some(0.0),
1269 cost_per_1k_out: Some(0.0),
1270 latency_p50_ms: Some(900),
1271 ..Default::default()
1272 },
1273 );
1274
1275 config.providers.insert(
1279 "mlx".to_string(),
1280 ProviderDef {
1281 base_url: "http://127.0.0.1:8002".to_string(),
1282 base_url_env: Some("MLX_BASE_URL".to_string()),
1283 auth_style: "none".to_string(),
1284 chat_endpoint: "/v1/chat/completions".to_string(),
1285 completion_endpoint: Some("/v1/completions".to_string()),
1286 healthcheck: Some(HealthcheckDef {
1287 method: "GET".to_string(),
1288 path: Some("/v1/models".to_string()),
1289 url: None,
1290 body: None,
1291 }),
1292 cost_per_1k_in: Some(0.0),
1293 cost_per_1k_out: Some(0.0),
1294 latency_p50_ms: Some(900),
1295 ..Default::default()
1296 },
1297 );
1298
1299 config.providers.insert(
1301 "vllm".to_string(),
1302 ProviderDef {
1303 base_url: "http://localhost:8000".to_string(),
1304 base_url_env: Some("VLLM_BASE_URL".to_string()),
1305 auth_style: "none".to_string(),
1306 chat_endpoint: "/v1/chat/completions".to_string(),
1307 completion_endpoint: Some("/v1/completions".to_string()),
1308 healthcheck: Some(HealthcheckDef {
1309 method: "GET".to_string(),
1310 path: Some("/v1/models".to_string()),
1311 url: None,
1312 body: None,
1313 }),
1314 cost_per_1k_in: Some(0.0),
1315 cost_per_1k_out: Some(0.0),
1316 latency_p50_ms: Some(800),
1317 ..Default::default()
1318 },
1319 );
1320
1321 config.providers.insert(
1323 "tgi".to_string(),
1324 ProviderDef {
1325 base_url: "http://localhost:8080".to_string(),
1326 base_url_env: Some("TGI_BASE_URL".to_string()),
1327 auth_style: "none".to_string(),
1328 chat_endpoint: "/v1/chat/completions".to_string(),
1329 completion_endpoint: Some("/v1/completions".to_string()),
1330 healthcheck: Some(HealthcheckDef {
1331 method: "GET".to_string(),
1332 path: Some("/health".to_string()),
1333 url: None,
1334 body: None,
1335 }),
1336 cost_per_1k_in: Some(0.0),
1337 cost_per_1k_out: Some(0.0),
1338 latency_p50_ms: Some(950),
1339 ..Default::default()
1340 },
1341 );
1342
1343 config.inference_rules = vec![
1345 InferenceRule {
1346 pattern: Some("claude-*".to_string()),
1347 contains: None,
1348 exact: None,
1349 provider: "anthropic".to_string(),
1350 },
1351 InferenceRule {
1352 pattern: Some("gpt-*".to_string()),
1353 contains: None,
1354 exact: None,
1355 provider: "openai".to_string(),
1356 },
1357 InferenceRule {
1358 pattern: Some("o1*".to_string()),
1359 contains: None,
1360 exact: None,
1361 provider: "openai".to_string(),
1362 },
1363 InferenceRule {
1364 pattern: Some("o3*".to_string()),
1365 contains: None,
1366 exact: None,
1367 provider: "openai".to_string(),
1368 },
1369 InferenceRule {
1370 pattern: Some("o4*".to_string()),
1371 contains: None,
1372 exact: None,
1373 provider: "openai".to_string(),
1374 },
1375 InferenceRule {
1376 pattern: Some("anthropic.claude-*".to_string()),
1377 contains: None,
1378 exact: None,
1379 provider: "bedrock".to_string(),
1380 },
1381 InferenceRule {
1382 pattern: Some("meta.llama*".to_string()),
1383 contains: None,
1384 exact: None,
1385 provider: "bedrock".to_string(),
1386 },
1387 InferenceRule {
1388 pattern: Some("amazon.*".to_string()),
1389 contains: None,
1390 exact: None,
1391 provider: "bedrock".to_string(),
1392 },
1393 InferenceRule {
1394 pattern: Some("mistral.*".to_string()),
1395 contains: None,
1396 exact: None,
1397 provider: "bedrock".to_string(),
1398 },
1399 InferenceRule {
1400 pattern: Some("cohere.*".to_string()),
1401 contains: None,
1402 exact: None,
1403 provider: "bedrock".to_string(),
1404 },
1405 InferenceRule {
1406 pattern: Some("gemini-*".to_string()),
1407 contains: None,
1408 exact: None,
1409 provider: "gemini".to_string(),
1410 },
1411 ];
1412
1413 config.tier_rules = vec![
1415 TierRule {
1416 contains: Some("9b".to_string()),
1417 pattern: None,
1418 exact: None,
1419 tier: "small".to_string(),
1420 },
1421 TierRule {
1422 contains: Some("a3b".to_string()),
1423 pattern: None,
1424 exact: None,
1425 tier: "small".to_string(),
1426 },
1427 TierRule {
1428 contains: Some("gemma-4-e2b".to_string()),
1429 pattern: None,
1430 exact: None,
1431 tier: "small".to_string(),
1432 },
1433 TierRule {
1434 contains: Some("gemma-4-e4b".to_string()),
1435 pattern: None,
1436 exact: None,
1437 tier: "small".to_string(),
1438 },
1439 TierRule {
1440 contains: Some("gemma-4-26b".to_string()),
1441 pattern: None,
1442 exact: None,
1443 tier: "mid".to_string(),
1444 },
1445 TierRule {
1446 contains: Some("gemma-4-31b".to_string()),
1447 pattern: None,
1448 exact: None,
1449 tier: "frontier".to_string(),
1450 },
1451 TierRule {
1452 contains: Some("gemma4:26b".to_string()),
1453 pattern: None,
1454 exact: None,
1455 tier: "mid".to_string(),
1456 },
1457 TierRule {
1458 contains: Some("gemma4:31b".to_string()),
1459 pattern: None,
1460 exact: None,
1461 tier: "frontier".to_string(),
1462 },
1463 TierRule {
1464 pattern: Some("claude-*".to_string()),
1465 contains: None,
1466 exact: None,
1467 tier: "frontier".to_string(),
1468 },
1469 TierRule {
1470 exact: Some("gpt-4o".to_string()),
1471 contains: None,
1472 pattern: None,
1473 tier: "frontier".to_string(),
1474 },
1475 ];
1476
1477 config.tier_defaults = TierDefaults {
1478 default: "mid".to_string(),
1479 };
1480
1481 config.aliases.insert(
1482 "frontier".to_string(),
1483 AliasDef {
1484 id: "claude-sonnet-4-20250514".to_string(),
1485 provider: "anthropic".to_string(),
1486 tool_format: None,
1487 },
1488 );
1489 config.aliases.insert(
1490 "tier/frontier".to_string(),
1491 AliasDef {
1492 id: "claude-sonnet-4-20250514".to_string(),
1493 provider: "anthropic".to_string(),
1494 tool_format: None,
1495 },
1496 );
1497 config.aliases.insert(
1498 "mid".to_string(),
1499 AliasDef {
1500 id: "gpt-4o-mini".to_string(),
1501 provider: "openai".to_string(),
1502 tool_format: None,
1503 },
1504 );
1505 config.aliases.insert(
1506 "tier/mid".to_string(),
1507 AliasDef {
1508 id: "gpt-4o-mini".to_string(),
1509 provider: "openai".to_string(),
1510 tool_format: None,
1511 },
1512 );
1513 config.aliases.insert(
1514 "small".to_string(),
1515 AliasDef {
1516 id: "Qwen/Qwen3.5-9B".to_string(),
1517 provider: "openrouter".to_string(),
1518 tool_format: None,
1519 },
1520 );
1521 config.aliases.insert(
1522 "tier/small".to_string(),
1523 AliasDef {
1524 id: "Qwen/Qwen3.5-9B".to_string(),
1525 provider: "openrouter".to_string(),
1526 tool_format: None,
1527 },
1528 );
1529 config.aliases.insert(
1530 "local-gemma4".to_string(),
1531 AliasDef {
1532 id: "gemma-4-26b-a4b-it".to_string(),
1533 provider: "local".to_string(),
1534 tool_format: None,
1535 },
1536 );
1537 config.aliases.insert(
1538 "local-gemma4-26b".to_string(),
1539 AliasDef {
1540 id: "gemma-4-26b-a4b-it".to_string(),
1541 provider: "local".to_string(),
1542 tool_format: None,
1543 },
1544 );
1545 config.aliases.insert(
1546 "local-gemma4-31b".to_string(),
1547 AliasDef {
1548 id: "gemma-4-31b-it".to_string(),
1549 provider: "local".to_string(),
1550 tool_format: None,
1551 },
1552 );
1553 config.aliases.insert(
1554 "local-gemma4-e4b".to_string(),
1555 AliasDef {
1556 id: "gemma-4-e4b-it".to_string(),
1557 provider: "local".to_string(),
1558 tool_format: None,
1559 },
1560 );
1561 config.aliases.insert(
1562 "local-gemma4-e2b".to_string(),
1563 AliasDef {
1564 id: "gemma-4-e2b-it".to_string(),
1565 provider: "local".to_string(),
1566 tool_format: None,
1567 },
1568 );
1569 config.aliases.insert(
1570 "mlx-qwen36-27b".to_string(),
1571 AliasDef {
1572 id: "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string(),
1573 provider: "mlx".to_string(),
1574 tool_format: None,
1575 },
1576 );
1577
1578 config.qc_defaults.extend(BTreeMap::from([
1579 (
1580 "anthropic".to_string(),
1581 "claude-3-5-haiku-20241022".to_string(),
1582 ),
1583 ("openai".to_string(), "gpt-4o-mini".to_string()),
1584 (
1585 "openrouter".to_string(),
1586 "google/gemini-2.5-flash".to_string(),
1587 ),
1588 ("ollama".to_string(), "llama3.2".to_string()),
1589 ("local".to_string(), "gpt-4o".to_string()),
1590 ]));
1591
1592 config.models.extend(BTreeMap::from([
1593 (
1594 "claude-sonnet-4-20250514".to_string(),
1595 ModelDef {
1596 name: "Claude Sonnet 4".to_string(),
1597 provider: "anthropic".to_string(),
1598 context_window: 200_000,
1599 runtime_context_window: None,
1600 stream_timeout: None,
1601 capabilities: vec![
1602 "tools".to_string(),
1603 "streaming".to_string(),
1604 "prompt_caching".to_string(),
1605 "thinking".to_string(),
1606 ],
1607 pricing: Some(ModelPricing {
1608 input_per_mtok: 3.0,
1609 output_per_mtok: 15.0,
1610 cache_read_per_mtok: Some(0.3),
1611 cache_write_per_mtok: Some(3.75),
1612 }),
1613 },
1614 ),
1615 (
1616 "gpt-4o-mini".to_string(),
1617 ModelDef {
1618 name: "GPT-4o Mini".to_string(),
1619 provider: "openai".to_string(),
1620 context_window: 128_000,
1621 runtime_context_window: None,
1622 stream_timeout: None,
1623 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1624 pricing: Some(ModelPricing {
1625 input_per_mtok: 0.15,
1626 output_per_mtok: 0.60,
1627 cache_read_per_mtok: None,
1628 cache_write_per_mtok: None,
1629 }),
1630 },
1631 ),
1632 (
1633 "Qwen/Qwen3.5-9B".to_string(),
1634 ModelDef {
1635 name: "Qwen3.5 9B".to_string(),
1636 provider: "openrouter".to_string(),
1637 context_window: 131_072,
1638 runtime_context_window: None,
1639 stream_timeout: None,
1640 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1641 pricing: None,
1642 },
1643 ),
1644 (
1645 "llama3.2".to_string(),
1646 ModelDef {
1647 name: "Llama 3.2".to_string(),
1648 provider: "ollama".to_string(),
1649 context_window: 32_000,
1650 runtime_context_window: None,
1651 stream_timeout: Some(300.0),
1652 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1653 pricing: None,
1654 },
1655 ),
1656 ]));
1657
1658 config.models.extend(canonical_priced_models());
1659
1660 config
1661}
1662
1663fn canonical_priced_models() -> BTreeMap<String, ModelDef> {
1671 let mut out = BTreeMap::new();
1672 let anthropic_caps = vec![
1673 "tools".to_string(),
1674 "streaming".to_string(),
1675 "prompt_caching".to_string(),
1676 "thinking".to_string(),
1677 ];
1678 let openai_caps = vec!["tools".to_string(), "streaming".to_string()];
1679 let gemini_caps = vec!["tools".to_string(), "streaming".to_string()];
1680
1681 let mut anthropic = |id: &str,
1682 name: &str,
1683 context_window: u64,
1684 input: f64,
1685 output: f64,
1686 cache_read: Option<f64>,
1687 cache_write: Option<f64>| {
1688 out.insert(
1689 id.to_string(),
1690 ModelDef {
1691 name: name.to_string(),
1692 provider: "anthropic".to_string(),
1693 context_window,
1694 runtime_context_window: None,
1695 stream_timeout: None,
1696 capabilities: anthropic_caps.clone(),
1697 pricing: Some(ModelPricing {
1698 input_per_mtok: input,
1699 output_per_mtok: output,
1700 cache_read_per_mtok: cache_read,
1701 cache_write_per_mtok: cache_write,
1702 }),
1703 },
1704 );
1705 };
1706 anthropic(
1707 "claude-3-5-haiku-20241022",
1708 "Claude Haiku 3.5",
1709 200_000,
1710 0.80,
1711 4.00,
1712 Some(0.08),
1713 Some(1.00),
1714 );
1715 anthropic(
1716 "claude-haiku-4-5-20251001",
1717 "Claude Haiku 4.5",
1718 200_000,
1719 1.00,
1720 5.00,
1721 Some(0.10),
1722 Some(1.25),
1723 );
1724 anthropic(
1725 "claude-3-5-sonnet-20240620",
1726 "Claude Sonnet 3.5 (2024-06-20)",
1727 200_000,
1728 3.00,
1729 15.00,
1730 Some(0.30),
1731 Some(3.75),
1732 );
1733 anthropic(
1734 "claude-3-5-sonnet-20241022",
1735 "Claude Sonnet 3.5 (2024-10-22)",
1736 200_000,
1737 3.00,
1738 15.00,
1739 Some(0.30),
1740 Some(3.75),
1741 );
1742 anthropic(
1743 "claude-3-opus-20240229",
1744 "Claude Opus 3",
1745 200_000,
1746 15.00,
1747 75.00,
1748 Some(1.50),
1749 Some(18.75),
1750 );
1751 anthropic(
1752 "claude-opus-4-20250514",
1753 "Claude Opus 4",
1754 200_000,
1755 15.00,
1756 75.00,
1757 Some(1.50),
1758 Some(18.75),
1759 );
1760 anthropic(
1761 "claude-opus-4-1-20250805",
1762 "Claude Opus 4.1",
1763 200_000,
1764 15.00,
1765 75.00,
1766 Some(1.50),
1767 Some(18.75),
1768 );
1769
1770 let mut openai = |id: &str,
1771 name: &str,
1772 context_window: u64,
1773 input: f64,
1774 output: f64,
1775 cache_read: Option<f64>| {
1776 out.insert(
1777 id.to_string(),
1778 ModelDef {
1779 name: name.to_string(),
1780 provider: "openai".to_string(),
1781 context_window,
1782 runtime_context_window: None,
1783 stream_timeout: None,
1784 capabilities: openai_caps.clone(),
1785 pricing: Some(ModelPricing {
1786 input_per_mtok: input,
1787 output_per_mtok: output,
1788 cache_read_per_mtok: cache_read,
1789 cache_write_per_mtok: None,
1790 }),
1791 },
1792 );
1793 };
1794 openai("gpt-4o", "GPT-4o", 128_000, 2.50, 10.00, Some(1.25));
1795 openai("gpt-4-turbo", "GPT-4 Turbo", 128_000, 10.00, 30.00, None);
1796 openai("o1", "OpenAI o1", 200_000, 15.00, 60.00, Some(7.50));
1797 openai(
1798 "o1-mini",
1799 "OpenAI o1-mini",
1800 128_000,
1801 3.00,
1802 12.00,
1803 Some(1.50),
1804 );
1805 openai("o3", "OpenAI o3", 200_000, 15.00, 60.00, Some(7.50));
1806 openai("o3-mini", "OpenAI o3-mini", 200_000, 1.10, 4.40, Some(0.55));
1807
1808 let mut gemini = |id: &str,
1809 name: &str,
1810 context_window: u64,
1811 input: f64,
1812 output: f64,
1813 cache_read: Option<f64>| {
1814 out.insert(
1815 id.to_string(),
1816 ModelDef {
1817 name: name.to_string(),
1818 provider: "gemini".to_string(),
1819 context_window,
1820 runtime_context_window: None,
1821 stream_timeout: None,
1822 capabilities: gemini_caps.clone(),
1823 pricing: Some(ModelPricing {
1824 input_per_mtok: input,
1825 output_per_mtok: output,
1826 cache_read_per_mtok: cache_read,
1827 cache_write_per_mtok: None,
1828 }),
1829 },
1830 );
1831 };
1832 gemini(
1833 "gemini-2.5-flash",
1834 "Gemini 2.5 Flash",
1835 1_048_576,
1836 0.10,
1837 0.40,
1838 Some(0.025),
1839 );
1840 gemini(
1841 "gemini-2.5-pro",
1842 "Gemini 2.5 Pro",
1843 2_097_152,
1844 1.25,
1845 5.00,
1846 Some(0.3125),
1847 );
1848
1849 out.insert(
1850 "mistral-large-latest".to_string(),
1851 ModelDef {
1852 name: "Mistral Large".to_string(),
1853 provider: "openrouter".to_string(),
1854 context_window: 128_000,
1855 runtime_context_window: None,
1856 stream_timeout: None,
1857 capabilities: openai_caps.clone(),
1858 pricing: Some(ModelPricing {
1859 input_per_mtok: 2.00,
1860 output_per_mtok: 6.00,
1861 cache_read_per_mtok: None,
1862 cache_write_per_mtok: None,
1863 }),
1864 },
1865 );
1866 out.insert(
1867 "mistral-small-latest".to_string(),
1868 ModelDef {
1869 name: "Mistral Small".to_string(),
1870 provider: "openrouter".to_string(),
1871 context_window: 128_000,
1872 runtime_context_window: None,
1873 stream_timeout: None,
1874 capabilities: openai_caps,
1875 pricing: Some(ModelPricing {
1876 input_per_mtok: 0.20,
1877 output_per_mtok: 0.60,
1878 cache_read_per_mtok: None,
1879 cache_write_per_mtok: None,
1880 }),
1881 },
1882 );
1883 out
1884}
1885
1886#[cfg(test)]
1887fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
1888 let mut config = default_config();
1889 config.merge_from(&overlay);
1890 config
1891}
1892
1893#[cfg(test)]
1894mod tests {
1895 use super::*;
1896
1897 fn reset_overrides() {
1898 clear_user_overrides();
1899 }
1900
1901 #[test]
1902 fn test_glob_match_prefix() {
1903 assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
1904 assert!(glob_match("gpt-*", "gpt-4o"));
1905 assert!(!glob_match("claude-*", "gpt-4o"));
1906 }
1907
1908 #[test]
1909 fn test_glob_match_suffix() {
1910 assert!(glob_match("*-latest", "llama3.2-latest"));
1911 assert!(!glob_match("*-latest", "llama3.2"));
1912 }
1913
1914 #[test]
1915 fn test_glob_match_middle() {
1916 assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
1917 assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
1918 }
1919
1920 #[test]
1921 fn test_glob_match_exact() {
1922 assert!(glob_match("gpt-4o", "gpt-4o"));
1923 assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
1924 }
1925
1926 #[test]
1927 fn test_infer_provider_from_defaults() {
1928 let _guard = crate::llm::env_lock().lock().expect("env lock");
1929 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1930 unsafe {
1931 std::env::remove_var("HARN_DEFAULT_PROVIDER");
1932 }
1933
1934 assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
1935 assert_eq!(infer_provider("gpt-4o"), "openai");
1936 assert_eq!(infer_provider("o1-preview"), "openai");
1937 assert_eq!(infer_provider("o3-mini"), "openai");
1938 assert_eq!(infer_provider("o4-mini"), "openai");
1939 assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
1940 assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
1941 assert_eq!(infer_provider("llama3.2:latest"), "ollama");
1942 assert_eq!(infer_provider("unknown-model"), "anthropic");
1943
1944 unsafe {
1945 match prev_default_provider {
1946 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1947 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1948 }
1949 }
1950 }
1951
1952 #[test]
1953 fn test_infer_provider_prefix_rules() {
1954 assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
1955 assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
1956 assert_eq!(infer_provider("local:owner/model"), "ollama");
1958 assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
1959 }
1960
1961 #[test]
1962 fn test_openrouter_inference_requires_one_slash() {
1963 let _guard = crate::llm::env_lock().lock().expect("env lock");
1964 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1965 unsafe {
1966 std::env::remove_var("HARN_DEFAULT_PROVIDER");
1967 }
1968
1969 assert_eq!(infer_provider("org/model"), "openrouter");
1970 assert_eq!(infer_provider("org/team/model"), "anthropic");
1971
1972 unsafe {
1973 match prev_default_provider {
1974 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1975 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1976 }
1977 }
1978 }
1979
1980 #[test]
1981 fn test_resolve_model_info_normalizes_provider_prefixes() {
1982 let local = resolve_model_info("local:gemma-4-e4b-it");
1983 assert_eq!(local.id, "gemma-4-e4b-it");
1984 assert_eq!(local.provider, "ollama");
1985
1986 let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
1987 assert_eq!(ollama.id, "qwen3:30b-a3b");
1988 assert_eq!(ollama.provider, "ollama");
1989
1990 let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
1991 assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
1992 assert_eq!(hf.provider, "huggingface");
1993 }
1994
1995 #[test]
1996 fn test_model_tier_from_defaults() {
1997 assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
1998 assert_eq!(model_tier("gpt-4o"), "frontier");
1999 assert_eq!(model_tier("Qwen3.5-9B"), "small");
2000 assert_eq!(model_tier("deepseek-v3"), "mid");
2001 }
2002
2003 #[test]
2004 fn test_resolve_model_unknown_alias() {
2005 let (id, provider) = resolve_model("gpt-4o");
2006 assert_eq!(id, "gpt-4o");
2007 assert!(provider.is_none());
2008 }
2009
2010 #[test]
2011 fn test_provider_names() {
2012 let names = provider_names();
2013 assert!(names.len() >= 7);
2014 assert!(names.contains(&"anthropic".to_string()));
2015 assert!(names.contains(&"together".to_string()));
2016 assert!(names.contains(&"local".to_string()));
2017 assert!(names.contains(&"mlx".to_string()));
2018 assert!(names.contains(&"openai".to_string()));
2019 assert!(names.contains(&"ollama".to_string()));
2020 assert!(names.contains(&"bedrock".to_string()));
2021 assert!(names.contains(&"azure_openai".to_string()));
2022 assert!(names.contains(&"vertex".to_string()));
2023 }
2024
2025 #[test]
2026 fn global_provider_file_is_an_overlay_on_builtin_defaults() {
2027 let mut overlay = ProvidersConfig {
2028 default_provider: Some("ollama".to_string()),
2029 ..Default::default()
2030 };
2031 overlay.aliases.insert(
2032 "quickstart".to_string(),
2033 AliasDef {
2034 id: "llama3.2".to_string(),
2035 provider: "ollama".to_string(),
2036 tool_format: None,
2037 },
2038 );
2039
2040 let merged = merge_global_config(overlay);
2041
2042 assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
2043 assert!(merged.providers.contains_key("anthropic"));
2044 assert!(merged.providers.contains_key("ollama"));
2045 assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
2046 }
2047
2048 #[test]
2049 fn test_resolve_tier_model_default_aliases() {
2050 let (model, provider) = resolve_tier_model("frontier", None).unwrap();
2051 assert_eq!(model, "claude-sonnet-4-20250514");
2052 assert_eq!(provider, "anthropic");
2053
2054 let (model, provider) = resolve_tier_model("small", None).unwrap();
2055 assert_eq!(model, "Qwen/Qwen3.5-9B");
2056 assert_eq!(provider, "openrouter");
2057 }
2058
2059 #[test]
2060 fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
2061 let (model, provider) = resolve_tier_model("mid", Some("openai")).unwrap();
2062 assert_eq!(model, "gpt-4o-mini");
2063 assert_eq!(provider, "openai");
2064 }
2065
2066 #[test]
2067 fn test_provider_config_anthropic() {
2068 let pdef = provider_config("anthropic").unwrap();
2069 assert_eq!(pdef.auth_style, "header");
2070 assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
2071 }
2072
2073 #[test]
2074 fn test_provider_config_mlx() {
2075 let pdef = provider_config("mlx").unwrap();
2076 assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
2077 assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
2078 assert_eq!(
2079 pdef.healthcheck.unwrap().path.as_deref(),
2080 Some("/v1/models")
2081 );
2082
2083 let (model, provider) = resolve_model("mlx-qwen36-27b");
2084 assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
2085 assert_eq!(provider.as_deref(), Some("mlx"));
2086 }
2087
2088 #[test]
2089 fn test_enterprise_provider_defaults_and_inference() {
2090 let bedrock = provider_config("bedrock").unwrap();
2091 assert_eq!(bedrock.auth_style, "aws_sigv4");
2092 assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
2093 assert_eq!(
2094 infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
2095 "bedrock"
2096 );
2097 assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
2098
2099 let azure = provider_config("azure_openai").unwrap();
2100 assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
2101 assert_eq!(
2102 auth_env_names(&azure.auth_env),
2103 vec![
2104 "AZURE_OPENAI_API_KEY".to_string(),
2105 "AZURE_OPENAI_AD_TOKEN".to_string(),
2106 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
2107 ]
2108 );
2109
2110 let vertex = provider_config("vertex").unwrap();
2111 assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
2112 assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
2113 }
2114
2115 #[test]
2116 fn test_default_provider_env_override_for_unknown_model() {
2117 let _guard = crate::llm::env_lock().lock().expect("env lock");
2118 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
2119 unsafe {
2120 std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
2121 }
2122
2123 let inference = infer_provider_detail("unknown-model");
2124
2125 unsafe {
2126 match prev_default_provider {
2127 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
2128 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
2129 }
2130 }
2131
2132 assert_eq!(inference.provider, "openai");
2133 assert_eq!(
2134 inference.source,
2135 crate::llm::provider::ProviderInferenceSource::DefaultFallback
2136 );
2137 }
2138
2139 #[test]
2140 fn test_resolve_base_url_no_env() {
2141 let pdef = ProviderDef {
2142 base_url: "https://example.com".to_string(),
2143 ..Default::default()
2144 };
2145 assert_eq!(resolve_base_url(&pdef), "https://example.com");
2146 }
2147
2148 #[test]
2149 fn test_default_config_roundtrip() {
2150 let config = default_config();
2151 assert!(!config.providers.is_empty());
2152 assert!(!config.inference_rules.is_empty());
2153 assert!(!config.tier_rules.is_empty());
2154 assert_eq!(config.tier_defaults.default, "mid");
2155 }
2156
2157 #[test]
2158 fn test_external_config_overlays_default_catalog() {
2159 let mut config = default_config();
2160 let mut overlay = ProvidersConfig {
2161 default_provider: Some("ollama".to_string()),
2162 ..Default::default()
2163 };
2164 overlay.providers.insert(
2165 "custom".to_string(),
2166 ProviderDef {
2167 base_url: "https://llm.example.test/v1".to_string(),
2168 chat_endpoint: "/chat/completions".to_string(),
2169 ..Default::default()
2170 },
2171 );
2172
2173 config.merge_from(&overlay);
2174
2175 assert_eq!(config.default_provider.as_deref(), Some("ollama"));
2176 assert!(config.providers.contains_key("custom"));
2177 assert!(config.providers.contains_key("anthropic"));
2178 assert!(config.providers.contains_key("ollama"));
2179 }
2180
2181 #[test]
2182 fn test_model_params_empty() {
2183 let params = model_params("claude-sonnet-4-20250514");
2184 assert!(params.is_empty());
2185 }
2186
2187 #[test]
2188 fn test_user_overrides_add_provider_and_alias() {
2189 reset_overrides();
2190 let mut overlay = ProvidersConfig::default();
2191 overlay.providers.insert(
2192 "acme".to_string(),
2193 ProviderDef {
2194 base_url: "https://llm.acme.test/v1".to_string(),
2195 chat_endpoint: "/chat/completions".to_string(),
2196 ..Default::default()
2197 },
2198 );
2199 overlay.aliases.insert(
2200 "acme-fast".to_string(),
2201 AliasDef {
2202 id: "acme/model-fast".to_string(),
2203 provider: "acme".to_string(),
2204 tool_format: Some("native".to_string()),
2205 },
2206 );
2207 set_user_overrides(Some(overlay));
2208
2209 let (model, provider) = resolve_model("acme-fast");
2210 assert_eq!(model, "acme/model-fast");
2211 assert_eq!(provider.as_deref(), Some("acme"));
2212 assert!(provider_names().contains(&"acme".to_string()));
2213 assert_eq!(
2214 provider_config("acme").map(|provider| provider.base_url),
2215 Some("https://llm.acme.test/v1".to_string())
2216 );
2217
2218 reset_overrides();
2219 }
2220
2221 #[test]
2222 fn test_default_tool_format_uses_capability_matrix() {
2223 reset_overrides();
2224
2225 assert_eq!(
2226 default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
2227 "native"
2228 );
2229 assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "text");
2230 }
2231
2232 #[test]
2233 fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
2234 reset_overrides();
2235 let mut overlay = ProvidersConfig::default();
2236 overlay.models.insert(
2237 "acme/model-fast".to_string(),
2238 ModelDef {
2239 name: "Acme Fast".to_string(),
2240 provider: "acme".to_string(),
2241 context_window: 65_536,
2242 runtime_context_window: None,
2243 stream_timeout: Some(42.0),
2244 capabilities: vec!["tools".to_string(), "streaming".to_string()],
2245 pricing: Some(ModelPricing {
2246 input_per_mtok: 1.25,
2247 output_per_mtok: 2.5,
2248 cache_read_per_mtok: Some(0.25),
2249 cache_write_per_mtok: None,
2250 }),
2251 },
2252 );
2253 overlay
2254 .qc_defaults
2255 .insert("acme".to_string(), "acme/model-cheap".to_string());
2256 set_user_overrides(Some(overlay));
2257
2258 let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
2259 assert_eq!(entry.context_window, 65_536);
2260 assert_eq!(entry.capabilities, vec!["streaming".to_string()]);
2261 assert_eq!(
2262 entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
2263 Some(1.25)
2264 );
2265 assert_eq!(
2266 pricing_per_1k_for("acme", "acme/model-fast"),
2267 Some((0.00125, 0.0025))
2268 );
2269 assert_eq!(
2270 qc_default_model("acme").as_deref(),
2271 Some("acme/model-cheap")
2272 );
2273
2274 reset_overrides();
2275 }
2276
2277 #[test]
2278 fn test_user_overrides_prepend_inference_rules() {
2279 reset_overrides();
2280 let mut overlay = ProvidersConfig::default();
2281 overlay.inference_rules.push(InferenceRule {
2282 pattern: Some("internal-*".to_string()),
2283 contains: None,
2284 exact: None,
2285 provider: "openai".to_string(),
2286 });
2287 set_user_overrides(Some(overlay));
2288
2289 assert_eq!(infer_provider("internal-foo"), "openai");
2290
2291 reset_overrides();
2292 }
2293}