1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::BTreeMap;
4use std::sync::OnceLock;
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8
9thread_local! {
10 static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
15}
16
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct ProvidersConfig {
19 #[serde(default)]
20 pub default_provider: Option<String>,
21 #[serde(default)]
22 pub providers: BTreeMap<String, ProviderDef>,
23 #[serde(default)]
24 pub aliases: BTreeMap<String, AliasDef>,
25 #[serde(default)]
26 pub models: BTreeMap<String, ModelDef>,
27 #[serde(default)]
28 pub qc_defaults: BTreeMap<String, String>,
29 #[serde(default)]
30 pub inference_rules: Vec<InferenceRule>,
31 #[serde(default)]
32 pub tier_rules: Vec<TierRule>,
33 #[serde(default)]
34 pub tier_defaults: TierDefaults,
35 #[serde(default)]
36 pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
37}
38
39impl ProvidersConfig {
40 pub fn is_empty(&self) -> bool {
41 self.default_provider.is_none()
42 && self.providers.is_empty()
43 && self.aliases.is_empty()
44 && self.models.is_empty()
45 && self.qc_defaults.is_empty()
46 && self.inference_rules.is_empty()
47 && self.tier_rules.is_empty()
48 && self.model_defaults.is_empty()
49 && self.tier_defaults.default == default_mid()
50 }
51
52 pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
53 self.providers.extend(overlay.providers.clone());
54 self.aliases.extend(overlay.aliases.clone());
55 self.models.extend(overlay.models.clone());
56 self.qc_defaults.extend(overlay.qc_defaults.clone());
57
58 if overlay.default_provider.is_some() {
59 self.default_provider = overlay.default_provider.clone();
60 }
61
62 if !overlay.inference_rules.is_empty() {
63 let mut merged = overlay.inference_rules.clone();
64 merged.extend(self.inference_rules.clone());
65 self.inference_rules = merged;
66 }
67
68 if !overlay.tier_rules.is_empty() {
69 let mut merged = overlay.tier_rules.clone();
70 merged.extend(self.tier_rules.clone());
71 self.tier_rules = merged;
72 }
73
74 if overlay.tier_defaults.default != default_mid() {
75 self.tier_defaults = overlay.tier_defaults.clone();
76 }
77
78 for (pattern, defaults) in &overlay.model_defaults {
79 self.model_defaults
80 .entry(pattern.clone())
81 .or_default()
82 .extend(defaults.clone());
83 }
84 }
85}
86
87#[derive(Debug, Clone, Deserialize)]
88pub struct ProviderDef {
89 #[serde(default)]
90 pub display_name: Option<String>,
91 #[serde(default)]
92 pub icon: Option<String>,
93 pub base_url: String,
94 #[serde(default)]
95 pub base_url_env: Option<String>,
96 #[serde(default = "default_bearer")]
97 pub auth_style: String,
98 #[serde(default)]
99 pub auth_header: Option<String>,
100 #[serde(default)]
101 pub auth_env: AuthEnv,
102 #[serde(default)]
103 pub extra_headers: BTreeMap<String, String>,
104 #[serde(default)]
105 pub chat_endpoint: String,
106 #[serde(default)]
107 pub completion_endpoint: Option<String>,
108 #[serde(default)]
109 pub healthcheck: Option<HealthcheckDef>,
110 #[serde(default)]
111 pub features: Vec<String>,
112 #[serde(default)]
114 pub fallback: Option<String>,
115 #[serde(default)]
117 pub retry_count: Option<u32>,
118 #[serde(default)]
120 pub retry_delay_ms: Option<u64>,
121 #[serde(default)]
123 pub rpm: Option<u32>,
124 #[serde(default)]
126 pub cost_per_1k_in: Option<f64>,
127 #[serde(default)]
129 pub cost_per_1k_out: Option<f64>,
130 #[serde(default)]
132 pub latency_p50_ms: Option<u64>,
133}
134
135impl Default for ProviderDef {
136 fn default() -> Self {
137 Self {
138 display_name: None,
139 icon: None,
140 base_url: String::new(),
141 base_url_env: None,
142 auth_style: default_bearer(),
143 auth_header: None,
144 auth_env: AuthEnv::None,
145 extra_headers: BTreeMap::new(),
146 chat_endpoint: String::new(),
147 completion_endpoint: None,
148 healthcheck: None,
149 features: Vec::new(),
150 fallback: None,
151 retry_count: None,
152 retry_delay_ms: None,
153 rpm: None,
154 cost_per_1k_in: None,
155 cost_per_1k_out: None,
156 latency_p50_ms: None,
157 }
158 }
159}
160
161fn default_bearer() -> String {
162 "bearer".to_string()
163}
164
165#[derive(Debug, Clone, Deserialize, Default)]
168#[serde(untagged)]
169pub enum AuthEnv {
170 #[default]
171 None,
172 Single(String),
173 Multiple(Vec<String>),
174}
175
176#[derive(Debug, Clone, Deserialize)]
177pub struct HealthcheckDef {
178 pub method: String,
179 #[serde(default)]
180 pub path: Option<String>,
181 #[serde(default)]
182 pub url: Option<String>,
183 #[serde(default)]
184 pub body: Option<String>,
185}
186
187#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
188pub struct AliasDef {
189 pub id: String,
190 pub provider: String,
191 #[serde(default)]
196 pub tool_format: Option<String>,
197}
198
199#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
200pub struct ModelPricing {
201 pub input_per_mtok: f64,
202 pub output_per_mtok: f64,
203 #[serde(default)]
204 pub cache_read_per_mtok: Option<f64>,
205 #[serde(default)]
206 pub cache_write_per_mtok: Option<f64>,
207}
208
209#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
210pub struct ModelDef {
211 pub name: String,
212 pub provider: String,
213 pub context_window: u64,
214 #[serde(default)]
215 pub runtime_context_window: Option<u64>,
216 #[serde(default)]
217 pub stream_timeout: Option<f64>,
218 #[serde(default)]
219 pub capabilities: Vec<String>,
220 #[serde(default)]
221 pub pricing: Option<ModelPricing>,
222}
223
224#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
225pub struct ResolvedModel {
226 pub id: String,
227 pub provider: String,
228 pub alias: Option<String>,
229 pub tool_format: String,
230 pub tier: String,
231}
232
233#[derive(Debug, Clone, Deserialize)]
234pub struct InferenceRule {
235 #[serde(default)]
236 pub pattern: Option<String>,
237 #[serde(default)]
238 pub contains: Option<String>,
239 #[serde(default)]
240 pub exact: Option<String>,
241 pub provider: String,
242}
243
244#[derive(Debug, Clone, Deserialize)]
245pub struct TierRule {
246 #[serde(default)]
247 pub pattern: Option<String>,
248 #[serde(default)]
249 pub contains: Option<String>,
250 #[serde(default)]
251 pub exact: Option<String>,
252 pub tier: String,
253}
254
255#[derive(Debug, Clone, Deserialize)]
256pub struct TierDefaults {
257 #[serde(default = "default_mid")]
258 pub default: String,
259}
260
261impl Default for TierDefaults {
262 fn default() -> Self {
263 Self {
264 default: default_mid(),
265 }
266 }
267}
268
269fn default_mid() -> String {
270 "mid".to_string()
271}
272
273pub fn load_config() -> &'static ProvidersConfig {
275 CONFIG.get_or_init(|| {
276 let mut config = default_config();
277 let verbose_config_logging = matches!(
278 std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
279 Some("1" | "true" | "TRUE" | "yes" | "YES")
280 ) || matches!(
281 std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
282 Some("1" | "true" | "TRUE" | "yes" | "YES")
283 );
284 if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
285 if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
286 config.merge_from(&overlay);
287 let _ = CONFIG_PATH.set(path);
288 return config;
289 }
290 }
291 if let Some(home) = dirs_or_home() {
292 let path = format!("{home}/.config/harn/providers.toml");
293 if let Some(overlay) = read_external_config(&path, false) {
294 config.merge_from(&overlay);
295 let _ = CONFIG_PATH.set(path);
296 return config;
297 }
298 }
299 config
300 })
301}
302
303fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
304 match std::fs::read_to_string(path) {
305 Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
306 Ok(config) => {
307 if verbose {
308 eprintln!(
309 "[llm_config] Loaded {} providers, {} aliases from {}",
310 config.providers.len(),
311 config.aliases.len(),
312 path
313 );
314 }
315 Some(config)
316 }
317 Err(error) => {
318 eprintln!("[llm_config] TOML parse error in {}: {}", path, error);
319 None
320 }
321 },
322 Err(error) => {
323 if verbose {
324 eprintln!("[llm_config] Cannot read {}: {}", path, error);
325 }
326 None
327 }
328 }
329}
330
331pub fn loaded_config_path() -> Option<std::path::PathBuf> {
334 let _ = load_config();
336 CONFIG_PATH.get().map(std::path::PathBuf::from)
337}
338
339pub fn set_user_overrides(config: Option<ProvidersConfig>) {
343 USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
344}
345
346pub fn clear_user_overrides() {
348 set_user_overrides(None);
349}
350
351fn effective_config() -> ProvidersConfig {
352 let mut merged = load_config().clone();
353 USER_OVERRIDES.with(|cell| {
354 if let Some(overlay) = cell.borrow().as_ref() {
355 merged.merge_from(overlay);
356 }
357 });
358 merged
359}
360
361pub fn resolve_model(alias: &str) -> (String, Option<String>) {
363 let config = effective_config();
364 if let Some(a) = config.aliases.get(alias) {
365 return (a.id.clone(), Some(a.provider.clone()));
366 }
367 (normalize_model_id(alias), None)
368}
369
370pub fn normalize_model_id(raw: &str) -> String {
375 for prefix in ["ollama:", "local:", "huggingface:", "hf:"] {
376 if let Some(stripped) = raw.strip_prefix(prefix) {
377 return stripped.to_string();
378 }
379 }
380 raw.to_string()
381}
382
383pub fn resolve_model_info(selector: &str) -> ResolvedModel {
386 let config = effective_config();
387 if let Some(alias) = config.aliases.get(selector) {
388 let id = alias.id.clone();
389 let provider = alias.provider.clone();
390 let tool_format = alias
391 .tool_format
392 .clone()
393 .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
394 return ResolvedModel {
395 tier: model_tier_with_config(&config, &id),
396 id,
397 provider,
398 alias: Some(selector.to_string()),
399 tool_format,
400 };
401 }
402
403 let provider = infer_provider_with_config(&config, selector).provider;
404 let id = normalize_model_id(selector);
405 let tool_format = default_tool_format_with_config(&config, &id, &provider);
406 let tier = model_tier_with_config(&config, &id);
407 ResolvedModel {
408 id,
409 provider,
410 alias: None,
411 tool_format,
412 tier,
413 }
414}
415
416pub fn infer_provider(model_id: &str) -> String {
418 infer_provider_detail(model_id).provider
419}
420
421pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
423 let config = effective_config();
424 infer_provider_with_config(&config, model_id)
425}
426
427fn infer_provider_with_config(
428 config: &ProvidersConfig,
429 model_id: &str,
430) -> crate::llm::provider::ProviderInference {
431 if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
432 return crate::llm::provider::ProviderInference::builtin("ollama");
433 }
434 if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
435 return crate::llm::provider::ProviderInference::builtin("huggingface");
436 }
437 for rule in &config.inference_rules {
438 if let Some(exact) = &rule.exact {
439 if model_id == exact {
440 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
441 }
442 }
443 if let Some(pattern) = &rule.pattern {
444 if glob_match(pattern, model_id) {
445 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
446 }
447 }
448 if let Some(substr) = &rule.contains {
449 if model_id.contains(substr.as_str()) {
450 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
451 }
452 }
453 }
454 crate::llm::provider::infer_provider_from_model_id(
455 model_id,
456 &default_provider_with_config(config),
457 )
458}
459
460pub fn default_provider() -> String {
461 let config = effective_config();
462 default_provider_with_config(&config)
463}
464
465fn default_provider_with_config(config: &ProvidersConfig) -> String {
466 std::env::var("HARN_DEFAULT_PROVIDER")
467 .ok()
468 .map(|value| value.trim().to_string())
469 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
470 .or_else(|| {
471 config
472 .default_provider
473 .as_deref()
474 .map(str::trim)
475 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
476 .map(str::to_string)
477 })
478 .unwrap_or_else(|| "anthropic".to_string())
479}
480
481pub fn model_tier(model_id: &str) -> String {
483 let config = effective_config();
484 model_tier_with_config(&config, model_id)
485}
486
487fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
488 for rule in &config.tier_rules {
489 if let Some(exact) = &rule.exact {
490 if model_id == exact {
491 return rule.tier.clone();
492 }
493 }
494 if let Some(pattern) = &rule.pattern {
495 if glob_match(pattern, model_id) {
496 return rule.tier.clone();
497 }
498 }
499 if let Some(substr) = &rule.contains {
500 if model_id.contains(substr.as_str()) {
501 return rule.tier.clone();
502 }
503 }
504 }
505 let lower = model_id.to_lowercase();
506 if lower.contains("9b") || lower.contains("a3b") {
507 return "small".to_string();
508 }
509 if lower.starts_with("claude-") || lower == "gpt-4o" {
510 return "frontier".to_string();
511 }
512 config.tier_defaults.default.clone()
513}
514
515pub fn provider_config(name: &str) -> Option<ProviderDef> {
517 effective_config().providers.get(name).cloned()
518}
519
520pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
523 let config = effective_config();
524 let mut params = BTreeMap::new();
525 for (pattern, defaults) in &config.model_defaults {
526 if glob_match(pattern, model_id) {
527 for (k, v) in defaults {
528 params.insert(k.clone(), v.clone());
529 }
530 }
531 }
532 params
533}
534
535pub fn provider_names() -> Vec<String> {
537 effective_config().providers.keys().cloned().collect()
538}
539
540pub fn known_model_names() -> Vec<String> {
542 effective_config().aliases.keys().cloned().collect()
543}
544
545pub fn alias_entries() -> Vec<(String, AliasDef)> {
546 effective_config().aliases.into_iter().collect()
547}
548
549pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
551 let mut entries: Vec<_> = effective_config()
552 .models
553 .into_iter()
554 .map(|(id, model)| {
555 let provider = model.provider.clone();
556 (
557 id.clone(),
558 with_effective_capability_tags(id, provider, model),
559 )
560 })
561 .collect();
562 entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
563 model_a
564 .provider
565 .cmp(&model_b.provider)
566 .then_with(|| id_a.cmp(id_b))
567 });
568 entries
569}
570
571pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
572 effective_config()
573 .models
574 .get(model_id)
575 .cloned()
576 .map(|model| {
577 let provider = model.provider.clone();
578 with_effective_capability_tags(model_id.to_string(), provider, model)
579 })
580}
581
582pub fn qc_default_model(provider: &str) -> Option<String> {
583 std::env::var("BURIN_QC_MODEL")
584 .ok()
585 .filter(|value| !value.trim().is_empty())
586 .or_else(|| {
587 effective_config()
588 .qc_defaults
589 .get(&provider.to_lowercase())
590 .cloned()
591 })
592}
593
594pub fn default_model_for_provider(provider: &str) -> String {
595 match provider {
596 "local" => std::env::var("LOCAL_LLM_MODEL")
597 .or_else(|_| std::env::var("HARN_LLM_MODEL"))
598 .unwrap_or_else(|_| "gpt-4o".to_string()),
599 "mlx" => std::env::var("MLX_MODEL_ID")
600 .unwrap_or_else(|_| "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string()),
601 "openai" => "gpt-4o".to_string(),
602 "ollama" => "llama3.2".to_string(),
603 "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
604 _ => "claude-sonnet-4-20250514".to_string(),
605 }
606}
607
608pub fn qc_defaults() -> BTreeMap<String, String> {
609 effective_config().qc_defaults
610}
611
612pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
613 effective_config()
614 .models
615 .get(model_id)
616 .and_then(|model| model.pricing.clone())
617}
618
619pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
620 model_pricing_per_mtok(model_id)
621 .map(|pricing| {
622 (
623 pricing.input_per_mtok / 1000.0,
624 pricing.output_per_mtok / 1000.0,
625 )
626 })
627 .or_else(|| {
628 let (input, output, _) = provider_economics(provider);
629 match (input, output) {
630 (Some(input), Some(output)) => Some((input, output)),
631 _ => None,
632 }
633 })
634}
635
636pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
637 match auth_env {
638 AuthEnv::None => Vec::new(),
639 AuthEnv::Single(name) => vec![name.clone()],
640 AuthEnv::Multiple(names) => names.clone(),
641 }
642}
643
644pub fn provider_key_available(provider: &str) -> bool {
645 let Some(pdef) = provider_config(provider) else {
646 return provider == "ollama";
647 };
648 if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
649 return true;
650 }
651 auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
652 std::env::var(env_name)
653 .ok()
654 .is_some_and(|value| !value.trim().is_empty())
655 })
656}
657
658pub fn available_provider_names() -> Vec<String> {
659 provider_names()
660 .into_iter()
661 .filter(|provider| provider_key_available(provider))
662 .collect()
663}
664
665pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
667 provider_config(provider)
668 .map(|p| p.features.iter().any(|f| f == feature))
669 .unwrap_or(false)
670}
671
672pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
676 provider_config(provider)
677 .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
678 .unwrap_or((None, None, None))
679}
680
681pub fn default_tool_format(model: &str, provider: &str) -> String {
685 let config = effective_config();
686 default_tool_format_with_config(&config, model, provider)
687}
688
689fn default_tool_format_with_config(
690 config: &ProvidersConfig,
691 model: &str,
692 provider: &str,
693) -> String {
694 for (name, alias) in &config.aliases {
696 let matches = (alias.id == model && alias.provider == provider) || name == model;
697 if matches {
698 if let Some(ref fmt) = alias.tool_format {
699 return fmt.clone();
700 }
701 }
702 }
703 let capability_matrix_native = crate::llm::capabilities::lookup(provider, model).native_tools;
704 let legacy_provider_native = config
705 .providers
706 .get(provider)
707 .map(|p| p.features.iter().any(|f| f == "native_tools"))
708 .unwrap_or(false);
709 if capability_matrix_native || legacy_provider_native {
710 "native".to_string()
711 } else {
712 "text".to_string()
713 }
714}
715
716fn with_effective_capability_tags(
717 model_id: String,
718 provider: String,
719 mut model: ModelDef,
720) -> ModelDef {
721 model.capabilities = effective_model_capability_tags(&provider, &model_id);
722 model
723}
724
725pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
729 let caps = crate::llm::capabilities::lookup(provider, model_id);
730 let mut tags = Vec::new();
731 tags.push("streaming".to_string());
734 if caps.native_tools {
735 tags.push("tools".to_string());
736 }
737 if !caps.tool_search.is_empty() {
738 tags.push("tool_search".to_string());
739 }
740 if caps.vision || caps.vision_supported {
741 tags.push("vision".to_string());
742 }
743 if caps.audio {
744 tags.push("audio".to_string());
745 }
746 if caps.pdf {
747 tags.push("pdf".to_string());
748 }
749 if caps.files_api_supported {
750 tags.push("files".to_string());
751 }
752 if caps.prompt_caching {
753 tags.push("prompt_caching".to_string());
754 }
755 if !caps.thinking_modes.is_empty() {
756 tags.push("thinking".to_string());
757 }
758 if caps.interleaved_thinking_supported
759 || caps
760 .thinking_modes
761 .iter()
762 .any(|mode| mode == "adaptive" || mode == "effort")
763 {
764 tags.push("extended_thinking".to_string());
765 }
766 if caps.json_schema.is_some() {
767 tags.push("structured_output".to_string());
768 }
769 tags
770}
771
772pub fn resolve_tier_model(
774 target: &str,
775 preferred_provider: Option<&str>,
776) -> Option<(String, String)> {
777 let config = effective_config();
778
779 if let Some(alias) = config.aliases.get(target) {
780 return Some((alias.id.clone(), alias.provider.clone()));
781 }
782
783 let candidate_aliases = if let Some(provider) = preferred_provider {
784 vec![
785 format!("{provider}/{target}"),
786 format!("{provider}:{target}"),
787 format!("tier/{target}"),
788 target.to_string(),
789 ]
790 } else {
791 vec![format!("tier/{target}"), target.to_string()]
792 };
793
794 for alias_name in candidate_aliases {
795 if let Some(alias) = config.aliases.get(&alias_name) {
796 return Some((alias.id.clone(), alias.provider.clone()));
797 }
798 }
799
800 None
801}
802
803pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
807 let config = effective_config();
808 let mut seen = std::collections::BTreeSet::new();
809 let mut candidates = Vec::new();
810
811 for alias in config.aliases.values() {
812 let pair = (alias.id.clone(), alias.provider.clone());
813 if seen.contains(&pair) {
814 continue;
815 }
816 if model_tier(&alias.id) == target {
817 seen.insert(pair.clone());
818 candidates.push(pair);
819 }
820 }
821
822 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
823 provider_a
824 .cmp(provider_b)
825 .then_with(|| model_a.cmp(model_b))
826 });
827 candidates
828}
829
830pub fn all_model_candidates() -> Vec<(String, String)> {
833 let config = effective_config();
834 let mut seen = std::collections::BTreeSet::new();
835 let mut candidates = Vec::new();
836
837 for alias in config.aliases.values() {
838 let pair = (alias.id.clone(), alias.provider.clone());
839 if seen.insert(pair.clone()) {
840 candidates.push(pair);
841 }
842 }
843
844 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
845 provider_a
846 .cmp(provider_b)
847 .then_with(|| model_a.cmp(model_b))
848 });
849 candidates
850}
851
852fn glob_match(pattern: &str, input: &str) -> bool {
854 if let Some(prefix) = pattern.strip_suffix('*') {
855 input.starts_with(prefix)
856 } else if let Some(suffix) = pattern.strip_prefix('*') {
857 input.ends_with(suffix)
858 } else if pattern.contains('*') {
859 let parts: Vec<&str> = pattern.split('*').collect();
860 if parts.len() == 2 {
861 input.starts_with(parts[0]) && input.ends_with(parts[1])
862 } else {
863 input == pattern
864 }
865 } else {
866 input == pattern
867 }
868}
869
870fn dirs_or_home() -> Option<String> {
871 std::env::var("HOME").ok()
872}
873
874pub fn resolve_base_url(pdef: &ProviderDef) -> String {
877 if let Some(env_name) = &pdef.base_url_env {
878 if let Ok(val) = std::env::var(env_name) {
879 let trimmed = val.trim().trim_matches('"').trim_matches('\'');
881 if !trimmed.is_empty() {
882 return trimmed.to_string();
883 }
884 }
885 }
886 pdef.base_url.clone()
887}
888
889fn default_config() -> ProvidersConfig {
890 let mut config = ProvidersConfig {
891 default_provider: Some("anthropic".to_string()),
892 ..Default::default()
893 };
894
895 config.providers.insert(
896 "anthropic".to_string(),
897 ProviderDef {
898 base_url: "https://api.anthropic.com/v1".to_string(),
899 auth_style: "header".to_string(),
900 auth_header: Some("x-api-key".to_string()),
901 auth_env: AuthEnv::Single("ANTHROPIC_API_KEY".to_string()),
902 extra_headers: BTreeMap::from([(
903 "anthropic-version".to_string(),
904 "2023-06-01".to_string(),
905 )]),
906 chat_endpoint: "/messages".to_string(),
907 completion_endpoint: None,
908 healthcheck: Some(HealthcheckDef {
909 method: "POST".to_string(),
910 path: Some("/messages/count_tokens".to_string()),
911 url: None,
912 body: Some(
913 r#"{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":"x"}]}"#
914 .to_string(),
915 ),
916 }),
917 features: vec!["prompt_caching".to_string(), "thinking".to_string()],
918 cost_per_1k_in: Some(0.003),
919 cost_per_1k_out: Some(0.015),
920 latency_p50_ms: Some(2500),
921 ..Default::default()
922 },
923 );
924
925 config.providers.insert(
927 "openai".to_string(),
928 ProviderDef {
929 base_url: "https://api.openai.com/v1".to_string(),
930 auth_style: "bearer".to_string(),
931 auth_env: AuthEnv::Single("OPENAI_API_KEY".to_string()),
932 chat_endpoint: "/chat/completions".to_string(),
933 completion_endpoint: Some("/completions".to_string()),
934 healthcheck: Some(HealthcheckDef {
935 method: "GET".to_string(),
936 path: Some("/models".to_string()),
937 url: None,
938 body: None,
939 }),
940 cost_per_1k_in: Some(0.0025),
941 cost_per_1k_out: Some(0.010),
942 latency_p50_ms: Some(1800),
943 ..Default::default()
944 },
945 );
946
947 config.providers.insert(
949 "openrouter".to_string(),
950 ProviderDef {
951 base_url: "https://openrouter.ai/api/v1".to_string(),
952 auth_style: "bearer".to_string(),
953 auth_env: AuthEnv::Single("OPENROUTER_API_KEY".to_string()),
954 chat_endpoint: "/chat/completions".to_string(),
955 completion_endpoint: Some("/completions".to_string()),
956 healthcheck: Some(HealthcheckDef {
957 method: "GET".to_string(),
958 path: Some("/auth/key".to_string()),
959 url: None,
960 body: None,
961 }),
962 cost_per_1k_in: Some(0.003),
963 cost_per_1k_out: Some(0.015),
964 latency_p50_ms: Some(2200),
965 ..Default::default()
966 },
967 );
968
969 config.providers.insert(
971 "huggingface".to_string(),
972 ProviderDef {
973 base_url: "https://router.huggingface.co/v1".to_string(),
974 auth_style: "bearer".to_string(),
975 auth_env: AuthEnv::Multiple(vec![
976 "HF_TOKEN".to_string(),
977 "HUGGINGFACE_API_KEY".to_string(),
978 ]),
979 chat_endpoint: "/chat/completions".to_string(),
980 completion_endpoint: Some("/completions".to_string()),
981 healthcheck: Some(HealthcheckDef {
982 method: "GET".to_string(),
983 url: Some("https://huggingface.co/api/whoami-v2".to_string()),
984 path: None,
985 body: None,
986 }),
987 cost_per_1k_in: Some(0.0002),
988 cost_per_1k_out: Some(0.0006),
989 latency_p50_ms: Some(2400),
990 ..Default::default()
991 },
992 );
993
994 config.providers.insert(
1003 "ollama".to_string(),
1004 ProviderDef {
1005 base_url: "http://localhost:11434".to_string(),
1006 base_url_env: Some("OLLAMA_HOST".to_string()),
1007 auth_style: "none".to_string(),
1008 chat_endpoint: "/api/chat".to_string(),
1009 completion_endpoint: Some("/api/generate".to_string()),
1010 healthcheck: Some(HealthcheckDef {
1011 method: "GET".to_string(),
1012 path: Some("/api/tags".to_string()),
1013 url: None,
1014 body: None,
1015 }),
1016 cost_per_1k_in: Some(0.0),
1017 cost_per_1k_out: Some(0.0),
1018 latency_p50_ms: Some(1200),
1019 ..Default::default()
1020 },
1021 );
1022
1023 config.providers.insert(
1025 "gemini".to_string(),
1026 ProviderDef {
1027 base_url: "https://generativelanguage.googleapis.com".to_string(),
1028 base_url_env: Some("GEMINI_BASE_URL".to_string()),
1029 auth_style: "header".to_string(),
1030 auth_header: Some("x-goog-api-key".to_string()),
1031 auth_env: AuthEnv::Multiple(vec![
1032 "GEMINI_API_KEY".to_string(),
1033 "GOOGLE_API_KEY".to_string(),
1034 ]),
1035 chat_endpoint: "/v1beta/models".to_string(),
1036 healthcheck: Some(HealthcheckDef {
1037 method: "GET".to_string(),
1038 path: Some("/v1beta/models".to_string()),
1039 url: None,
1040 body: None,
1041 }),
1042 cost_per_1k_in: Some(0.00125),
1043 cost_per_1k_out: Some(0.005),
1044 latency_p50_ms: Some(1800),
1045 ..Default::default()
1046 },
1047 );
1048
1049 config.providers.insert(
1051 "together".to_string(),
1052 ProviderDef {
1053 base_url: "https://api.together.xyz/v1".to_string(),
1054 base_url_env: Some("TOGETHER_AI_BASE_URL".to_string()),
1055 auth_style: "bearer".to_string(),
1056 auth_env: AuthEnv::Single("TOGETHER_AI_API_KEY".to_string()),
1057 chat_endpoint: "/chat/completions".to_string(),
1058 completion_endpoint: Some("/completions".to_string()),
1059 healthcheck: Some(HealthcheckDef {
1060 method: "GET".to_string(),
1061 path: Some("/models".to_string()),
1062 url: None,
1063 body: None,
1064 }),
1065 cost_per_1k_in: Some(0.0002),
1066 cost_per_1k_out: Some(0.0006),
1067 latency_p50_ms: Some(1600),
1068 ..Default::default()
1069 },
1070 );
1071
1072 config.providers.insert(
1074 "groq".to_string(),
1075 ProviderDef {
1076 base_url: "https://api.groq.com/openai/v1".to_string(),
1077 base_url_env: Some("GROQ_BASE_URL".to_string()),
1078 auth_style: "bearer".to_string(),
1079 auth_env: AuthEnv::Single("GROQ_API_KEY".to_string()),
1080 chat_endpoint: "/chat/completions".to_string(),
1081 completion_endpoint: Some("/completions".to_string()),
1082 healthcheck: Some(HealthcheckDef {
1083 method: "GET".to_string(),
1084 path: Some("/models".to_string()),
1085 url: None,
1086 body: None,
1087 }),
1088 cost_per_1k_in: Some(0.0001),
1089 cost_per_1k_out: Some(0.0003),
1090 latency_p50_ms: Some(450),
1091 ..Default::default()
1092 },
1093 );
1094
1095 config.providers.insert(
1097 "deepseek".to_string(),
1098 ProviderDef {
1099 base_url: "https://api.deepseek.com/v1".to_string(),
1100 base_url_env: Some("DEEPSEEK_BASE_URL".to_string()),
1101 auth_style: "bearer".to_string(),
1102 auth_env: AuthEnv::Single("DEEPSEEK_API_KEY".to_string()),
1103 chat_endpoint: "/chat/completions".to_string(),
1104 completion_endpoint: Some("/completions".to_string()),
1105 healthcheck: Some(HealthcheckDef {
1106 method: "GET".to_string(),
1107 path: Some("/models".to_string()),
1108 url: None,
1109 body: None,
1110 }),
1111 cost_per_1k_in: Some(0.00014),
1112 cost_per_1k_out: Some(0.00028),
1113 latency_p50_ms: Some(1800),
1114 ..Default::default()
1115 },
1116 );
1117
1118 config.providers.insert(
1120 "fireworks".to_string(),
1121 ProviderDef {
1122 base_url: "https://api.fireworks.ai/inference/v1".to_string(),
1123 base_url_env: Some("FIREWORKS_BASE_URL".to_string()),
1124 auth_style: "bearer".to_string(),
1125 auth_env: AuthEnv::Single("FIREWORKS_API_KEY".to_string()),
1126 chat_endpoint: "/chat/completions".to_string(),
1127 completion_endpoint: Some("/completions".to_string()),
1128 healthcheck: Some(HealthcheckDef {
1129 method: "GET".to_string(),
1130 path: Some("/models".to_string()),
1131 url: None,
1132 body: None,
1133 }),
1134 cost_per_1k_in: Some(0.0002),
1135 cost_per_1k_out: Some(0.0006),
1136 latency_p50_ms: Some(1400),
1137 ..Default::default()
1138 },
1139 );
1140
1141 config.providers.insert(
1143 "dashscope".to_string(),
1144 ProviderDef {
1145 base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".to_string(),
1146 base_url_env: Some("DASHSCOPE_BASE_URL".to_string()),
1147 auth_style: "bearer".to_string(),
1148 auth_env: AuthEnv::Single("DASHSCOPE_API_KEY".to_string()),
1149 chat_endpoint: "/chat/completions".to_string(),
1150 completion_endpoint: Some("/completions".to_string()),
1151 healthcheck: Some(HealthcheckDef {
1152 method: "GET".to_string(),
1153 path: Some("/models".to_string()),
1154 url: None,
1155 body: None,
1156 }),
1157 cost_per_1k_in: Some(0.0003),
1158 cost_per_1k_out: Some(0.0012),
1159 latency_p50_ms: Some(1600),
1160 ..Default::default()
1161 },
1162 );
1163
1164 config.providers.insert(
1168 "bedrock".to_string(),
1169 ProviderDef {
1170 base_url: String::new(),
1171 base_url_env: Some("BEDROCK_BASE_URL".to_string()),
1172 auth_style: "aws_sigv4".to_string(),
1173 auth_env: AuthEnv::None,
1174 chat_endpoint: "/model/{model}/converse".to_string(),
1175 features: vec!["native_tools".to_string()],
1176 latency_p50_ms: Some(2600),
1177 ..Default::default()
1178 },
1179 );
1180
1181 config.providers.insert(
1185 "azure_openai".to_string(),
1186 ProviderDef {
1187 base_url: "https://{resource}.openai.azure.com".to_string(),
1188 base_url_env: Some("AZURE_OPENAI_ENDPOINT".to_string()),
1189 auth_style: "azure_openai".to_string(),
1190 auth_env: AuthEnv::Multiple(vec![
1191 "AZURE_OPENAI_API_KEY".to_string(),
1192 "AZURE_OPENAI_AD_TOKEN".to_string(),
1193 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1194 ]),
1195 chat_endpoint:
1196 "/openai/deployments/{deployment}/chat/completions?api-version={api_version}"
1197 .to_string(),
1198 features: vec!["native_tools".to_string()],
1199 cost_per_1k_in: Some(0.0025),
1200 cost_per_1k_out: Some(0.010),
1201 latency_p50_ms: Some(1900),
1202 ..Default::default()
1203 },
1204 );
1205
1206 config.providers.insert(
1208 "vertex".to_string(),
1209 ProviderDef {
1210 base_url: "https://aiplatform.googleapis.com/v1".to_string(),
1211 base_url_env: Some("VERTEX_AI_BASE_URL".to_string()),
1212 auth_style: "bearer".to_string(),
1213 auth_env: AuthEnv::Multiple(vec![
1214 "VERTEX_AI_ACCESS_TOKEN".to_string(),
1215 "GOOGLE_OAUTH_ACCESS_TOKEN".to_string(),
1216 "GOOGLE_APPLICATION_CREDENTIALS".to_string(),
1217 ]),
1218 chat_endpoint:
1219 "/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent"
1220 .to_string(),
1221 features: vec!["native_tools".to_string()],
1222 cost_per_1k_in: Some(0.00125),
1223 cost_per_1k_out: Some(0.005),
1224 latency_p50_ms: Some(2100),
1225 ..Default::default()
1226 },
1227 );
1228
1229 config.providers.insert(
1231 "local".to_string(),
1232 ProviderDef {
1233 base_url: "http://localhost:8000".to_string(),
1234 base_url_env: Some("LOCAL_LLM_BASE_URL".to_string()),
1235 auth_style: "none".to_string(),
1236 chat_endpoint: "/v1/chat/completions".to_string(),
1237 completion_endpoint: Some("/v1/completions".to_string()),
1238 healthcheck: Some(HealthcheckDef {
1239 method: "GET".to_string(),
1240 path: Some("/v1/models".to_string()),
1241 url: None,
1242 body: None,
1243 }),
1244 cost_per_1k_in: Some(0.0),
1245 cost_per_1k_out: Some(0.0),
1246 latency_p50_ms: Some(900),
1247 ..Default::default()
1248 },
1249 );
1250
1251 config.providers.insert(
1255 "llamacpp".to_string(),
1256 ProviderDef {
1257 base_url: "http://127.0.0.1:8001".to_string(),
1258 base_url_env: Some("LLAMACPP_BASE_URL".to_string()),
1259 auth_style: "none".to_string(),
1260 chat_endpoint: "/v1/chat/completions".to_string(),
1261 completion_endpoint: Some("/v1/completions".to_string()),
1262 healthcheck: Some(HealthcheckDef {
1263 method: "GET".to_string(),
1264 path: Some("/v1/models".to_string()),
1265 url: None,
1266 body: None,
1267 }),
1268 cost_per_1k_in: Some(0.0),
1269 cost_per_1k_out: Some(0.0),
1270 latency_p50_ms: Some(900),
1271 ..Default::default()
1272 },
1273 );
1274
1275 config.providers.insert(
1279 "mlx".to_string(),
1280 ProviderDef {
1281 base_url: "http://127.0.0.1:8002".to_string(),
1282 base_url_env: Some("MLX_BASE_URL".to_string()),
1283 auth_style: "none".to_string(),
1284 chat_endpoint: "/v1/chat/completions".to_string(),
1285 completion_endpoint: Some("/v1/completions".to_string()),
1286 healthcheck: Some(HealthcheckDef {
1287 method: "GET".to_string(),
1288 path: Some("/v1/models".to_string()),
1289 url: None,
1290 body: None,
1291 }),
1292 cost_per_1k_in: Some(0.0),
1293 cost_per_1k_out: Some(0.0),
1294 latency_p50_ms: Some(900),
1295 ..Default::default()
1296 },
1297 );
1298
1299 config.providers.insert(
1301 "vllm".to_string(),
1302 ProviderDef {
1303 base_url: "http://localhost:8000".to_string(),
1304 base_url_env: Some("VLLM_BASE_URL".to_string()),
1305 auth_style: "none".to_string(),
1306 chat_endpoint: "/v1/chat/completions".to_string(),
1307 completion_endpoint: Some("/v1/completions".to_string()),
1308 healthcheck: Some(HealthcheckDef {
1309 method: "GET".to_string(),
1310 path: Some("/v1/models".to_string()),
1311 url: None,
1312 body: None,
1313 }),
1314 cost_per_1k_in: Some(0.0),
1315 cost_per_1k_out: Some(0.0),
1316 latency_p50_ms: Some(800),
1317 ..Default::default()
1318 },
1319 );
1320
1321 config.providers.insert(
1323 "tgi".to_string(),
1324 ProviderDef {
1325 base_url: "http://localhost:8080".to_string(),
1326 base_url_env: Some("TGI_BASE_URL".to_string()),
1327 auth_style: "none".to_string(),
1328 chat_endpoint: "/v1/chat/completions".to_string(),
1329 completion_endpoint: Some("/v1/completions".to_string()),
1330 healthcheck: Some(HealthcheckDef {
1331 method: "GET".to_string(),
1332 path: Some("/health".to_string()),
1333 url: None,
1334 body: None,
1335 }),
1336 cost_per_1k_in: Some(0.0),
1337 cost_per_1k_out: Some(0.0),
1338 latency_p50_ms: Some(950),
1339 ..Default::default()
1340 },
1341 );
1342
1343 config.inference_rules = vec![
1345 InferenceRule {
1346 pattern: Some("claude-*".to_string()),
1347 contains: None,
1348 exact: None,
1349 provider: "anthropic".to_string(),
1350 },
1351 InferenceRule {
1352 pattern: Some("gpt-*".to_string()),
1353 contains: None,
1354 exact: None,
1355 provider: "openai".to_string(),
1356 },
1357 InferenceRule {
1358 pattern: Some("o1*".to_string()),
1359 contains: None,
1360 exact: None,
1361 provider: "openai".to_string(),
1362 },
1363 InferenceRule {
1364 pattern: Some("o3*".to_string()),
1365 contains: None,
1366 exact: None,
1367 provider: "openai".to_string(),
1368 },
1369 InferenceRule {
1370 pattern: Some("o4*".to_string()),
1371 contains: None,
1372 exact: None,
1373 provider: "openai".to_string(),
1374 },
1375 InferenceRule {
1376 pattern: Some("anthropic.claude-*".to_string()),
1377 contains: None,
1378 exact: None,
1379 provider: "bedrock".to_string(),
1380 },
1381 InferenceRule {
1382 pattern: Some("meta.llama*".to_string()),
1383 contains: None,
1384 exact: None,
1385 provider: "bedrock".to_string(),
1386 },
1387 InferenceRule {
1388 pattern: Some("amazon.*".to_string()),
1389 contains: None,
1390 exact: None,
1391 provider: "bedrock".to_string(),
1392 },
1393 InferenceRule {
1394 pattern: Some("mistral.*".to_string()),
1395 contains: None,
1396 exact: None,
1397 provider: "bedrock".to_string(),
1398 },
1399 InferenceRule {
1400 pattern: Some("cohere.*".to_string()),
1401 contains: None,
1402 exact: None,
1403 provider: "bedrock".to_string(),
1404 },
1405 InferenceRule {
1406 pattern: Some("gemini-*".to_string()),
1407 contains: None,
1408 exact: None,
1409 provider: "gemini".to_string(),
1410 },
1411 ];
1412
1413 config.tier_rules = vec![
1415 TierRule {
1416 contains: Some("9b".to_string()),
1417 pattern: None,
1418 exact: None,
1419 tier: "small".to_string(),
1420 },
1421 TierRule {
1422 contains: Some("a3b".to_string()),
1423 pattern: None,
1424 exact: None,
1425 tier: "small".to_string(),
1426 },
1427 TierRule {
1428 contains: Some("gemma-4-e2b".to_string()),
1429 pattern: None,
1430 exact: None,
1431 tier: "small".to_string(),
1432 },
1433 TierRule {
1434 contains: Some("gemma-4-e4b".to_string()),
1435 pattern: None,
1436 exact: None,
1437 tier: "small".to_string(),
1438 },
1439 TierRule {
1440 contains: Some("gemma-4-26b".to_string()),
1441 pattern: None,
1442 exact: None,
1443 tier: "mid".to_string(),
1444 },
1445 TierRule {
1446 contains: Some("gemma-4-31b".to_string()),
1447 pattern: None,
1448 exact: None,
1449 tier: "frontier".to_string(),
1450 },
1451 TierRule {
1452 contains: Some("gemma4:26b".to_string()),
1453 pattern: None,
1454 exact: None,
1455 tier: "mid".to_string(),
1456 },
1457 TierRule {
1458 contains: Some("gemma4:31b".to_string()),
1459 pattern: None,
1460 exact: None,
1461 tier: "frontier".to_string(),
1462 },
1463 TierRule {
1464 pattern: Some("claude-*".to_string()),
1465 contains: None,
1466 exact: None,
1467 tier: "frontier".to_string(),
1468 },
1469 TierRule {
1470 exact: Some("gpt-4o".to_string()),
1471 contains: None,
1472 pattern: None,
1473 tier: "frontier".to_string(),
1474 },
1475 ];
1476
1477 config.tier_defaults = TierDefaults {
1478 default: "mid".to_string(),
1479 };
1480
1481 config.aliases.insert(
1482 "frontier".to_string(),
1483 AliasDef {
1484 id: "claude-sonnet-4-20250514".to_string(),
1485 provider: "anthropic".to_string(),
1486 tool_format: None,
1487 },
1488 );
1489 config.aliases.insert(
1490 "tier/frontier".to_string(),
1491 AliasDef {
1492 id: "claude-sonnet-4-20250514".to_string(),
1493 provider: "anthropic".to_string(),
1494 tool_format: None,
1495 },
1496 );
1497 config.aliases.insert(
1498 "mid".to_string(),
1499 AliasDef {
1500 id: "gpt-4o-mini".to_string(),
1501 provider: "openai".to_string(),
1502 tool_format: None,
1503 },
1504 );
1505 config.aliases.insert(
1506 "tier/mid".to_string(),
1507 AliasDef {
1508 id: "gpt-4o-mini".to_string(),
1509 provider: "openai".to_string(),
1510 tool_format: None,
1511 },
1512 );
1513 config.aliases.insert(
1514 "small".to_string(),
1515 AliasDef {
1516 id: "Qwen/Qwen3.5-9B".to_string(),
1517 provider: "openrouter".to_string(),
1518 tool_format: None,
1519 },
1520 );
1521 config.aliases.insert(
1522 "tier/small".to_string(),
1523 AliasDef {
1524 id: "Qwen/Qwen3.5-9B".to_string(),
1525 provider: "openrouter".to_string(),
1526 tool_format: None,
1527 },
1528 );
1529 config.aliases.insert(
1530 "local-gemma4".to_string(),
1531 AliasDef {
1532 id: "gemma-4-26b-a4b-it".to_string(),
1533 provider: "local".to_string(),
1534 tool_format: None,
1535 },
1536 );
1537 config.aliases.insert(
1538 "local-gemma4-26b".to_string(),
1539 AliasDef {
1540 id: "gemma-4-26b-a4b-it".to_string(),
1541 provider: "local".to_string(),
1542 tool_format: None,
1543 },
1544 );
1545 config.aliases.insert(
1546 "local-gemma4-31b".to_string(),
1547 AliasDef {
1548 id: "gemma-4-31b-it".to_string(),
1549 provider: "local".to_string(),
1550 tool_format: None,
1551 },
1552 );
1553 config.aliases.insert(
1554 "local-gemma4-e4b".to_string(),
1555 AliasDef {
1556 id: "gemma-4-e4b-it".to_string(),
1557 provider: "local".to_string(),
1558 tool_format: None,
1559 },
1560 );
1561 config.aliases.insert(
1562 "local-gemma4-e2b".to_string(),
1563 AliasDef {
1564 id: "gemma-4-e2b-it".to_string(),
1565 provider: "local".to_string(),
1566 tool_format: None,
1567 },
1568 );
1569 config.aliases.insert(
1570 "mlx-qwen36-27b".to_string(),
1571 AliasDef {
1572 id: "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string(),
1573 provider: "mlx".to_string(),
1574 tool_format: None,
1575 },
1576 );
1577
1578 config.qc_defaults.extend(BTreeMap::from([
1579 (
1580 "anthropic".to_string(),
1581 "claude-3-5-haiku-20241022".to_string(),
1582 ),
1583 ("openai".to_string(), "gpt-4o-mini".to_string()),
1584 (
1585 "openrouter".to_string(),
1586 "google/gemini-2.5-flash".to_string(),
1587 ),
1588 ("ollama".to_string(), "llama3.2".to_string()),
1589 ("local".to_string(), "gpt-4o".to_string()),
1590 ]));
1591
1592 config.models.extend(BTreeMap::from([
1593 (
1594 "claude-sonnet-4-20250514".to_string(),
1595 ModelDef {
1596 name: "Claude Sonnet 4".to_string(),
1597 provider: "anthropic".to_string(),
1598 context_window: 200_000,
1599 runtime_context_window: None,
1600 stream_timeout: None,
1601 capabilities: vec![
1602 "tools".to_string(),
1603 "streaming".to_string(),
1604 "prompt_caching".to_string(),
1605 "thinking".to_string(),
1606 ],
1607 pricing: Some(ModelPricing {
1608 input_per_mtok: 3.0,
1609 output_per_mtok: 15.0,
1610 cache_read_per_mtok: Some(0.3),
1611 cache_write_per_mtok: Some(3.75),
1612 }),
1613 },
1614 ),
1615 (
1616 "gpt-4o-mini".to_string(),
1617 ModelDef {
1618 name: "GPT-4o Mini".to_string(),
1619 provider: "openai".to_string(),
1620 context_window: 128_000,
1621 runtime_context_window: None,
1622 stream_timeout: None,
1623 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1624 pricing: Some(ModelPricing {
1625 input_per_mtok: 0.15,
1626 output_per_mtok: 0.60,
1627 cache_read_per_mtok: None,
1628 cache_write_per_mtok: None,
1629 }),
1630 },
1631 ),
1632 (
1633 "Qwen/Qwen3.5-9B".to_string(),
1634 ModelDef {
1635 name: "Qwen3.5 9B".to_string(),
1636 provider: "openrouter".to_string(),
1637 context_window: 131_072,
1638 runtime_context_window: None,
1639 stream_timeout: None,
1640 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1641 pricing: None,
1642 },
1643 ),
1644 (
1645 "llama3.2".to_string(),
1646 ModelDef {
1647 name: "Llama 3.2".to_string(),
1648 provider: "ollama".to_string(),
1649 context_window: 32_000,
1650 runtime_context_window: None,
1651 stream_timeout: Some(300.0),
1652 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1653 pricing: None,
1654 },
1655 ),
1656 ]));
1657
1658 config
1659}
1660
1661#[cfg(test)]
1662fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
1663 let mut config = default_config();
1664 config.merge_from(&overlay);
1665 config
1666}
1667
1668#[cfg(test)]
1669mod tests {
1670 use super::*;
1671
1672 fn reset_overrides() {
1673 clear_user_overrides();
1674 }
1675
1676 #[test]
1677 fn test_glob_match_prefix() {
1678 assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
1679 assert!(glob_match("gpt-*", "gpt-4o"));
1680 assert!(!glob_match("claude-*", "gpt-4o"));
1681 }
1682
1683 #[test]
1684 fn test_glob_match_suffix() {
1685 assert!(glob_match("*-latest", "llama3.2-latest"));
1686 assert!(!glob_match("*-latest", "llama3.2"));
1687 }
1688
1689 #[test]
1690 fn test_glob_match_middle() {
1691 assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
1692 assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
1693 }
1694
1695 #[test]
1696 fn test_glob_match_exact() {
1697 assert!(glob_match("gpt-4o", "gpt-4o"));
1698 assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
1699 }
1700
1701 #[test]
1702 fn test_infer_provider_from_defaults() {
1703 let _guard = crate::llm::env_lock().lock().expect("env lock");
1704 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1705 unsafe {
1706 std::env::remove_var("HARN_DEFAULT_PROVIDER");
1707 }
1708
1709 assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
1710 assert_eq!(infer_provider("gpt-4o"), "openai");
1711 assert_eq!(infer_provider("o1-preview"), "openai");
1712 assert_eq!(infer_provider("o3-mini"), "openai");
1713 assert_eq!(infer_provider("o4-mini"), "openai");
1714 assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
1715 assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
1716 assert_eq!(infer_provider("llama3.2:latest"), "ollama");
1717 assert_eq!(infer_provider("unknown-model"), "anthropic");
1718
1719 unsafe {
1720 match prev_default_provider {
1721 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1722 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1723 }
1724 }
1725 }
1726
1727 #[test]
1728 fn test_infer_provider_prefix_rules() {
1729 assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
1730 assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
1731 assert_eq!(infer_provider("local:owner/model"), "ollama");
1733 assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
1734 }
1735
1736 #[test]
1737 fn test_openrouter_inference_requires_one_slash() {
1738 let _guard = crate::llm::env_lock().lock().expect("env lock");
1739 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1740 unsafe {
1741 std::env::remove_var("HARN_DEFAULT_PROVIDER");
1742 }
1743
1744 assert_eq!(infer_provider("org/model"), "openrouter");
1745 assert_eq!(infer_provider("org/team/model"), "anthropic");
1746
1747 unsafe {
1748 match prev_default_provider {
1749 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1750 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1751 }
1752 }
1753 }
1754
1755 #[test]
1756 fn test_resolve_model_info_normalizes_provider_prefixes() {
1757 let local = resolve_model_info("local:gemma-4-e4b-it");
1758 assert_eq!(local.id, "gemma-4-e4b-it");
1759 assert_eq!(local.provider, "ollama");
1760
1761 let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
1762 assert_eq!(ollama.id, "qwen3:30b-a3b");
1763 assert_eq!(ollama.provider, "ollama");
1764
1765 let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
1766 assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
1767 assert_eq!(hf.provider, "huggingface");
1768 }
1769
1770 #[test]
1771 fn test_model_tier_from_defaults() {
1772 assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
1773 assert_eq!(model_tier("gpt-4o"), "frontier");
1774 assert_eq!(model_tier("Qwen3.5-9B"), "small");
1775 assert_eq!(model_tier("deepseek-v3"), "mid");
1776 }
1777
1778 #[test]
1779 fn test_resolve_model_unknown_alias() {
1780 let (id, provider) = resolve_model("gpt-4o");
1781 assert_eq!(id, "gpt-4o");
1782 assert!(provider.is_none());
1783 }
1784
1785 #[test]
1786 fn test_provider_names() {
1787 let names = provider_names();
1788 assert!(names.len() >= 7);
1789 assert!(names.contains(&"anthropic".to_string()));
1790 assert!(names.contains(&"together".to_string()));
1791 assert!(names.contains(&"local".to_string()));
1792 assert!(names.contains(&"mlx".to_string()));
1793 assert!(names.contains(&"openai".to_string()));
1794 assert!(names.contains(&"ollama".to_string()));
1795 assert!(names.contains(&"bedrock".to_string()));
1796 assert!(names.contains(&"azure_openai".to_string()));
1797 assert!(names.contains(&"vertex".to_string()));
1798 }
1799
1800 #[test]
1801 fn global_provider_file_is_an_overlay_on_builtin_defaults() {
1802 let mut overlay = ProvidersConfig {
1803 default_provider: Some("ollama".to_string()),
1804 ..Default::default()
1805 };
1806 overlay.aliases.insert(
1807 "quickstart".to_string(),
1808 AliasDef {
1809 id: "llama3.2".to_string(),
1810 provider: "ollama".to_string(),
1811 tool_format: None,
1812 },
1813 );
1814
1815 let merged = merge_global_config(overlay);
1816
1817 assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
1818 assert!(merged.providers.contains_key("anthropic"));
1819 assert!(merged.providers.contains_key("ollama"));
1820 assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
1821 }
1822
1823 #[test]
1824 fn test_resolve_tier_model_default_aliases() {
1825 let (model, provider) = resolve_tier_model("frontier", None).unwrap();
1826 assert_eq!(model, "claude-sonnet-4-20250514");
1827 assert_eq!(provider, "anthropic");
1828
1829 let (model, provider) = resolve_tier_model("small", None).unwrap();
1830 assert_eq!(model, "Qwen/Qwen3.5-9B");
1831 assert_eq!(provider, "openrouter");
1832 }
1833
1834 #[test]
1835 fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
1836 let (model, provider) = resolve_tier_model("mid", Some("openai")).unwrap();
1837 assert_eq!(model, "gpt-4o-mini");
1838 assert_eq!(provider, "openai");
1839 }
1840
1841 #[test]
1842 fn test_provider_config_anthropic() {
1843 let pdef = provider_config("anthropic").unwrap();
1844 assert_eq!(pdef.auth_style, "header");
1845 assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
1846 }
1847
1848 #[test]
1849 fn test_provider_config_mlx() {
1850 let pdef = provider_config("mlx").unwrap();
1851 assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
1852 assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
1853 assert_eq!(
1854 pdef.healthcheck.unwrap().path.as_deref(),
1855 Some("/v1/models")
1856 );
1857
1858 let (model, provider) = resolve_model("mlx-qwen36-27b");
1859 assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
1860 assert_eq!(provider.as_deref(), Some("mlx"));
1861 }
1862
1863 #[test]
1864 fn test_enterprise_provider_defaults_and_inference() {
1865 let bedrock = provider_config("bedrock").unwrap();
1866 assert_eq!(bedrock.auth_style, "aws_sigv4");
1867 assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
1868 assert_eq!(
1869 infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
1870 "bedrock"
1871 );
1872 assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
1873
1874 let azure = provider_config("azure_openai").unwrap();
1875 assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
1876 assert_eq!(
1877 auth_env_names(&azure.auth_env),
1878 vec![
1879 "AZURE_OPENAI_API_KEY".to_string(),
1880 "AZURE_OPENAI_AD_TOKEN".to_string(),
1881 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1882 ]
1883 );
1884
1885 let vertex = provider_config("vertex").unwrap();
1886 assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
1887 assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
1888 }
1889
1890 #[test]
1891 fn test_default_provider_env_override_for_unknown_model() {
1892 let _guard = crate::llm::env_lock().lock().expect("env lock");
1893 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1894 unsafe {
1895 std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
1896 }
1897
1898 let inference = infer_provider_detail("unknown-model");
1899
1900 unsafe {
1901 match prev_default_provider {
1902 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1903 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1904 }
1905 }
1906
1907 assert_eq!(inference.provider, "openai");
1908 assert_eq!(
1909 inference.source,
1910 crate::llm::provider::ProviderInferenceSource::DefaultFallback
1911 );
1912 }
1913
1914 #[test]
1915 fn test_resolve_base_url_no_env() {
1916 let pdef = ProviderDef {
1917 base_url: "https://example.com".to_string(),
1918 ..Default::default()
1919 };
1920 assert_eq!(resolve_base_url(&pdef), "https://example.com");
1921 }
1922
1923 #[test]
1924 fn test_default_config_roundtrip() {
1925 let config = default_config();
1926 assert!(!config.providers.is_empty());
1927 assert!(!config.inference_rules.is_empty());
1928 assert!(!config.tier_rules.is_empty());
1929 assert_eq!(config.tier_defaults.default, "mid");
1930 }
1931
1932 #[test]
1933 fn test_external_config_overlays_default_catalog() {
1934 let mut config = default_config();
1935 let mut overlay = ProvidersConfig {
1936 default_provider: Some("ollama".to_string()),
1937 ..Default::default()
1938 };
1939 overlay.providers.insert(
1940 "custom".to_string(),
1941 ProviderDef {
1942 base_url: "https://llm.example.test/v1".to_string(),
1943 chat_endpoint: "/chat/completions".to_string(),
1944 ..Default::default()
1945 },
1946 );
1947
1948 config.merge_from(&overlay);
1949
1950 assert_eq!(config.default_provider.as_deref(), Some("ollama"));
1951 assert!(config.providers.contains_key("custom"));
1952 assert!(config.providers.contains_key("anthropic"));
1953 assert!(config.providers.contains_key("ollama"));
1954 }
1955
1956 #[test]
1957 fn test_model_params_empty() {
1958 let params = model_params("claude-sonnet-4-20250514");
1959 assert!(params.is_empty());
1960 }
1961
1962 #[test]
1963 fn test_user_overrides_add_provider_and_alias() {
1964 reset_overrides();
1965 let mut overlay = ProvidersConfig::default();
1966 overlay.providers.insert(
1967 "acme".to_string(),
1968 ProviderDef {
1969 base_url: "https://llm.acme.test/v1".to_string(),
1970 chat_endpoint: "/chat/completions".to_string(),
1971 ..Default::default()
1972 },
1973 );
1974 overlay.aliases.insert(
1975 "acme-fast".to_string(),
1976 AliasDef {
1977 id: "acme/model-fast".to_string(),
1978 provider: "acme".to_string(),
1979 tool_format: Some("native".to_string()),
1980 },
1981 );
1982 set_user_overrides(Some(overlay));
1983
1984 let (model, provider) = resolve_model("acme-fast");
1985 assert_eq!(model, "acme/model-fast");
1986 assert_eq!(provider.as_deref(), Some("acme"));
1987 assert!(provider_names().contains(&"acme".to_string()));
1988 assert_eq!(
1989 provider_config("acme").map(|provider| provider.base_url),
1990 Some("https://llm.acme.test/v1".to_string())
1991 );
1992
1993 reset_overrides();
1994 }
1995
1996 #[test]
1997 fn test_default_tool_format_uses_capability_matrix() {
1998 reset_overrides();
1999
2000 assert_eq!(
2001 default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
2002 "native"
2003 );
2004 assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "text");
2005 }
2006
2007 #[test]
2008 fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
2009 reset_overrides();
2010 let mut overlay = ProvidersConfig::default();
2011 overlay.models.insert(
2012 "acme/model-fast".to_string(),
2013 ModelDef {
2014 name: "Acme Fast".to_string(),
2015 provider: "acme".to_string(),
2016 context_window: 65_536,
2017 runtime_context_window: None,
2018 stream_timeout: Some(42.0),
2019 capabilities: vec!["tools".to_string(), "streaming".to_string()],
2020 pricing: Some(ModelPricing {
2021 input_per_mtok: 1.25,
2022 output_per_mtok: 2.5,
2023 cache_read_per_mtok: Some(0.25),
2024 cache_write_per_mtok: None,
2025 }),
2026 },
2027 );
2028 overlay
2029 .qc_defaults
2030 .insert("acme".to_string(), "acme/model-cheap".to_string());
2031 set_user_overrides(Some(overlay));
2032
2033 let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
2034 assert_eq!(entry.context_window, 65_536);
2035 assert_eq!(entry.capabilities, vec!["streaming".to_string()]);
2036 assert_eq!(
2037 entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
2038 Some(1.25)
2039 );
2040 assert_eq!(
2041 pricing_per_1k_for("acme", "acme/model-fast"),
2042 Some((0.00125, 0.0025))
2043 );
2044 assert_eq!(
2045 qc_default_model("acme").as_deref(),
2046 Some("acme/model-cheap")
2047 );
2048
2049 reset_overrides();
2050 }
2051
2052 #[test]
2053 fn test_user_overrides_prepend_inference_rules() {
2054 reset_overrides();
2055 let mut overlay = ProvidersConfig::default();
2056 overlay.inference_rules.push(InferenceRule {
2057 pattern: Some("internal-*".to_string()),
2058 contains: None,
2059 exact: None,
2060 provider: "openai".to_string(),
2061 });
2062 set_user_overrides(Some(overlay));
2063
2064 assert_eq!(infer_provider("internal-foo"), "openai");
2065
2066 reset_overrides();
2067 }
2068}