1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::BTreeMap;
4use std::sync::OnceLock;
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8
9thread_local! {
10 static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
15}
16
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct ProvidersConfig {
19 #[serde(default)]
20 pub default_provider: Option<String>,
21 #[serde(default)]
22 pub providers: BTreeMap<String, ProviderDef>,
23 #[serde(default)]
24 pub aliases: BTreeMap<String, AliasDef>,
25 #[serde(default)]
26 pub models: BTreeMap<String, ModelDef>,
27 #[serde(default)]
28 pub qc_defaults: BTreeMap<String, String>,
29 #[serde(default)]
30 pub inference_rules: Vec<InferenceRule>,
31 #[serde(default)]
32 pub tier_rules: Vec<TierRule>,
33 #[serde(default)]
34 pub tier_defaults: TierDefaults,
35 #[serde(default)]
36 pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
37}
38
39impl ProvidersConfig {
40 pub fn is_empty(&self) -> bool {
41 self.default_provider.is_none()
42 && self.providers.is_empty()
43 && self.aliases.is_empty()
44 && self.models.is_empty()
45 && self.qc_defaults.is_empty()
46 && self.inference_rules.is_empty()
47 && self.tier_rules.is_empty()
48 && self.model_defaults.is_empty()
49 && self.tier_defaults.default == default_mid()
50 }
51
52 pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
53 self.providers.extend(overlay.providers.clone());
54 self.aliases.extend(overlay.aliases.clone());
55 self.models.extend(overlay.models.clone());
56 self.qc_defaults.extend(overlay.qc_defaults.clone());
57
58 if overlay.default_provider.is_some() {
59 self.default_provider = overlay.default_provider.clone();
60 }
61
62 if !overlay.inference_rules.is_empty() {
63 let mut merged = overlay.inference_rules.clone();
64 merged.extend(self.inference_rules.clone());
65 self.inference_rules = merged;
66 }
67
68 if !overlay.tier_rules.is_empty() {
69 let mut merged = overlay.tier_rules.clone();
70 merged.extend(self.tier_rules.clone());
71 self.tier_rules = merged;
72 }
73
74 if overlay.tier_defaults.default != default_mid() {
75 self.tier_defaults = overlay.tier_defaults.clone();
76 }
77
78 for (pattern, defaults) in &overlay.model_defaults {
79 self.model_defaults
80 .entry(pattern.clone())
81 .or_default()
82 .extend(defaults.clone());
83 }
84 }
85}
86
87#[derive(Debug, Clone, Deserialize)]
88pub struct ProviderDef {
89 #[serde(default)]
90 pub display_name: Option<String>,
91 #[serde(default)]
92 pub icon: Option<String>,
93 pub base_url: String,
94 #[serde(default)]
95 pub base_url_env: Option<String>,
96 #[serde(default = "default_bearer")]
97 pub auth_style: String,
98 #[serde(default)]
99 pub auth_header: Option<String>,
100 #[serde(default)]
101 pub auth_env: AuthEnv,
102 #[serde(default)]
103 pub extra_headers: BTreeMap<String, String>,
104 #[serde(default)]
105 pub chat_endpoint: String,
106 #[serde(default)]
107 pub completion_endpoint: Option<String>,
108 #[serde(default)]
109 pub healthcheck: Option<HealthcheckDef>,
110 #[serde(default)]
111 pub features: Vec<String>,
112 #[serde(default)]
114 pub fallback: Option<String>,
115 #[serde(default)]
117 pub retry_count: Option<u32>,
118 #[serde(default)]
120 pub retry_delay_ms: Option<u64>,
121 #[serde(default)]
123 pub rpm: Option<u32>,
124 #[serde(default)]
126 pub cost_per_1k_in: Option<f64>,
127 #[serde(default)]
129 pub cost_per_1k_out: Option<f64>,
130 #[serde(default)]
132 pub latency_p50_ms: Option<u64>,
133}
134
135impl Default for ProviderDef {
136 fn default() -> Self {
137 Self {
138 display_name: None,
139 icon: None,
140 base_url: String::new(),
141 base_url_env: None,
142 auth_style: default_bearer(),
143 auth_header: None,
144 auth_env: AuthEnv::None,
145 extra_headers: BTreeMap::new(),
146 chat_endpoint: String::new(),
147 completion_endpoint: None,
148 healthcheck: None,
149 features: Vec::new(),
150 fallback: None,
151 retry_count: None,
152 retry_delay_ms: None,
153 rpm: None,
154 cost_per_1k_in: None,
155 cost_per_1k_out: None,
156 latency_p50_ms: None,
157 }
158 }
159}
160
161fn default_bearer() -> String {
162 "bearer".to_string()
163}
164
165#[derive(Debug, Clone, Deserialize, Default)]
168#[serde(untagged)]
169pub enum AuthEnv {
170 #[default]
171 None,
172 Single(String),
173 Multiple(Vec<String>),
174}
175
176#[derive(Debug, Clone, Deserialize)]
177pub struct HealthcheckDef {
178 pub method: String,
179 #[serde(default)]
180 pub path: Option<String>,
181 #[serde(default)]
182 pub url: Option<String>,
183 #[serde(default)]
184 pub body: Option<String>,
185}
186
187#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
188pub struct AliasDef {
189 pub id: String,
190 pub provider: String,
191 #[serde(default)]
196 pub tool_format: Option<String>,
197}
198
199#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
200pub struct ModelPricing {
201 pub input_per_mtok: f64,
202 pub output_per_mtok: f64,
203 #[serde(default)]
204 pub cache_read_per_mtok: Option<f64>,
205 #[serde(default)]
206 pub cache_write_per_mtok: Option<f64>,
207}
208
209#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
210pub struct ModelDef {
211 pub name: String,
212 pub provider: String,
213 pub context_window: u64,
214 #[serde(default)]
215 pub stream_timeout: Option<f64>,
216 #[serde(default)]
217 pub capabilities: Vec<String>,
218 #[serde(default)]
219 pub pricing: Option<ModelPricing>,
220}
221
222#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
223pub struct ResolvedModel {
224 pub id: String,
225 pub provider: String,
226 pub alias: Option<String>,
227 pub tool_format: String,
228 pub tier: String,
229}
230
231#[derive(Debug, Clone, Deserialize)]
232pub struct InferenceRule {
233 #[serde(default)]
234 pub pattern: Option<String>,
235 #[serde(default)]
236 pub contains: Option<String>,
237 #[serde(default)]
238 pub exact: Option<String>,
239 pub provider: String,
240}
241
242#[derive(Debug, Clone, Deserialize)]
243pub struct TierRule {
244 #[serde(default)]
245 pub pattern: Option<String>,
246 #[serde(default)]
247 pub contains: Option<String>,
248 #[serde(default)]
249 pub exact: Option<String>,
250 pub tier: String,
251}
252
253#[derive(Debug, Clone, Deserialize)]
254pub struct TierDefaults {
255 #[serde(default = "default_mid")]
256 pub default: String,
257}
258
259impl Default for TierDefaults {
260 fn default() -> Self {
261 Self {
262 default: default_mid(),
263 }
264 }
265}
266
267fn default_mid() -> String {
268 "mid".to_string()
269}
270
271pub fn load_config() -> &'static ProvidersConfig {
273 CONFIG.get_or_init(|| {
274 let mut config = default_config();
275 let verbose_config_logging = matches!(
276 std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
277 Some("1" | "true" | "TRUE" | "yes" | "YES")
278 ) || matches!(
279 std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
280 Some("1" | "true" | "TRUE" | "yes" | "YES")
281 );
282 if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
283 if let Some(overlay) = read_external_config(&path, verbose_config_logging) {
284 config.merge_from(&overlay);
285 let _ = CONFIG_PATH.set(path);
286 return config;
287 }
288 }
289 if let Some(home) = dirs_or_home() {
290 let path = format!("{home}/.config/harn/providers.toml");
291 if let Some(overlay) = read_external_config(&path, false) {
292 config.merge_from(&overlay);
293 let _ = CONFIG_PATH.set(path);
294 return config;
295 }
296 }
297 config
298 })
299}
300
301fn read_external_config(path: &str, verbose: bool) -> Option<ProvidersConfig> {
302 match std::fs::read_to_string(path) {
303 Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
304 Ok(config) => {
305 if verbose {
306 eprintln!(
307 "[llm_config] Loaded {} providers, {} aliases from {}",
308 config.providers.len(),
309 config.aliases.len(),
310 path
311 );
312 }
313 Some(config)
314 }
315 Err(error) => {
316 eprintln!("[llm_config] TOML parse error in {}: {}", path, error);
317 None
318 }
319 },
320 Err(error) => {
321 if verbose {
322 eprintln!("[llm_config] Cannot read {}: {}", path, error);
323 }
324 None
325 }
326 }
327}
328
329pub fn loaded_config_path() -> Option<std::path::PathBuf> {
332 let _ = load_config();
334 CONFIG_PATH.get().map(std::path::PathBuf::from)
335}
336
337pub fn set_user_overrides(config: Option<ProvidersConfig>) {
341 USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
342}
343
344pub fn clear_user_overrides() {
346 set_user_overrides(None);
347}
348
349fn effective_config() -> ProvidersConfig {
350 let mut merged = load_config().clone();
351 USER_OVERRIDES.with(|cell| {
352 if let Some(overlay) = cell.borrow().as_ref() {
353 merged.merge_from(overlay);
354 }
355 });
356 merged
357}
358
359pub fn resolve_model(alias: &str) -> (String, Option<String>) {
361 let config = effective_config();
362 if let Some(a) = config.aliases.get(alias) {
363 return (a.id.clone(), Some(a.provider.clone()));
364 }
365 (normalize_model_id(alias), None)
366}
367
368pub fn normalize_model_id(raw: &str) -> String {
373 for prefix in ["ollama:", "local:", "huggingface:", "hf:"] {
374 if let Some(stripped) = raw.strip_prefix(prefix) {
375 return stripped.to_string();
376 }
377 }
378 raw.to_string()
379}
380
381pub fn resolve_model_info(selector: &str) -> ResolvedModel {
384 let config = effective_config();
385 if let Some(alias) = config.aliases.get(selector) {
386 let id = alias.id.clone();
387 let provider = alias.provider.clone();
388 let tool_format = alias
389 .tool_format
390 .clone()
391 .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
392 return ResolvedModel {
393 tier: model_tier_with_config(&config, &id),
394 id,
395 provider,
396 alias: Some(selector.to_string()),
397 tool_format,
398 };
399 }
400
401 let provider = infer_provider_with_config(&config, selector).provider;
402 let id = normalize_model_id(selector);
403 let tool_format = default_tool_format_with_config(&config, &id, &provider);
404 let tier = model_tier_with_config(&config, &id);
405 ResolvedModel {
406 id,
407 provider,
408 alias: None,
409 tool_format,
410 tier,
411 }
412}
413
414pub fn infer_provider(model_id: &str) -> String {
416 infer_provider_detail(model_id).provider
417}
418
419pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
421 let config = effective_config();
422 infer_provider_with_config(&config, model_id)
423}
424
425fn infer_provider_with_config(
426 config: &ProvidersConfig,
427 model_id: &str,
428) -> crate::llm::provider::ProviderInference {
429 if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
430 return crate::llm::provider::ProviderInference::builtin("ollama");
431 }
432 if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
433 return crate::llm::provider::ProviderInference::builtin("huggingface");
434 }
435 for rule in &config.inference_rules {
436 if let Some(exact) = &rule.exact {
437 if model_id == exact {
438 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
439 }
440 }
441 if let Some(pattern) = &rule.pattern {
442 if glob_match(pattern, model_id) {
443 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
444 }
445 }
446 if let Some(substr) = &rule.contains {
447 if model_id.contains(substr.as_str()) {
448 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
449 }
450 }
451 }
452 crate::llm::provider::infer_provider_from_model_id(
453 model_id,
454 &default_provider_with_config(config),
455 )
456}
457
458pub fn default_provider() -> String {
459 let config = effective_config();
460 default_provider_with_config(&config)
461}
462
463fn default_provider_with_config(config: &ProvidersConfig) -> String {
464 std::env::var("HARN_DEFAULT_PROVIDER")
465 .ok()
466 .map(|value| value.trim().to_string())
467 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
468 .or_else(|| {
469 config
470 .default_provider
471 .as_deref()
472 .map(str::trim)
473 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
474 .map(str::to_string)
475 })
476 .unwrap_or_else(|| "anthropic".to_string())
477}
478
479pub fn model_tier(model_id: &str) -> String {
481 let config = effective_config();
482 model_tier_with_config(&config, model_id)
483}
484
485fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
486 for rule in &config.tier_rules {
487 if let Some(exact) = &rule.exact {
488 if model_id == exact {
489 return rule.tier.clone();
490 }
491 }
492 if let Some(pattern) = &rule.pattern {
493 if glob_match(pattern, model_id) {
494 return rule.tier.clone();
495 }
496 }
497 if let Some(substr) = &rule.contains {
498 if model_id.contains(substr.as_str()) {
499 return rule.tier.clone();
500 }
501 }
502 }
503 let lower = model_id.to_lowercase();
504 if lower.contains("9b") || lower.contains("a3b") {
505 return "small".to_string();
506 }
507 if lower.starts_with("claude-") || lower == "gpt-4o" {
508 return "frontier".to_string();
509 }
510 config.tier_defaults.default.clone()
511}
512
513pub fn provider_config(name: &str) -> Option<ProviderDef> {
515 effective_config().providers.get(name).cloned()
516}
517
518pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
521 let config = effective_config();
522 let mut params = BTreeMap::new();
523 for (pattern, defaults) in &config.model_defaults {
524 if glob_match(pattern, model_id) {
525 for (k, v) in defaults {
526 params.insert(k.clone(), v.clone());
527 }
528 }
529 }
530 params
531}
532
533pub fn provider_names() -> Vec<String> {
535 effective_config().providers.keys().cloned().collect()
536}
537
538pub fn known_model_names() -> Vec<String> {
540 effective_config().aliases.keys().cloned().collect()
541}
542
543pub fn alias_entries() -> Vec<(String, AliasDef)> {
544 effective_config().aliases.into_iter().collect()
545}
546
547pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
549 let mut entries: Vec<_> = effective_config()
550 .models
551 .into_iter()
552 .map(|(id, model)| {
553 let provider = model.provider.clone();
554 (
555 id.clone(),
556 with_effective_capability_tags(id, provider, model),
557 )
558 })
559 .collect();
560 entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
561 model_a
562 .provider
563 .cmp(&model_b.provider)
564 .then_with(|| id_a.cmp(id_b))
565 });
566 entries
567}
568
569pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
570 effective_config()
571 .models
572 .get(model_id)
573 .cloned()
574 .map(|model| {
575 let provider = model.provider.clone();
576 with_effective_capability_tags(model_id.to_string(), provider, model)
577 })
578}
579
580pub fn qc_default_model(provider: &str) -> Option<String> {
581 std::env::var("BURIN_QC_MODEL")
582 .ok()
583 .filter(|value| !value.trim().is_empty())
584 .or_else(|| {
585 effective_config()
586 .qc_defaults
587 .get(&provider.to_lowercase())
588 .cloned()
589 })
590}
591
592pub fn default_model_for_provider(provider: &str) -> String {
593 match provider {
594 "local" => std::env::var("LOCAL_LLM_MODEL")
595 .or_else(|_| std::env::var("HARN_LLM_MODEL"))
596 .unwrap_or_else(|_| "gpt-4o".to_string()),
597 "mlx" => std::env::var("MLX_MODEL_ID")
598 .unwrap_or_else(|_| "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string()),
599 "openai" => "gpt-4o".to_string(),
600 "ollama" => "llama3.2".to_string(),
601 "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
602 _ => "claude-sonnet-4-20250514".to_string(),
603 }
604}
605
606pub fn qc_defaults() -> BTreeMap<String, String> {
607 effective_config().qc_defaults
608}
609
610pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
611 effective_config()
612 .models
613 .get(model_id)
614 .and_then(|model| model.pricing.clone())
615}
616
617pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
618 model_pricing_per_mtok(model_id)
619 .map(|pricing| {
620 (
621 pricing.input_per_mtok / 1000.0,
622 pricing.output_per_mtok / 1000.0,
623 )
624 })
625 .or_else(|| {
626 let (input, output, _) = provider_economics(provider);
627 match (input, output) {
628 (Some(input), Some(output)) => Some((input, output)),
629 _ => None,
630 }
631 })
632}
633
634pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
635 match auth_env {
636 AuthEnv::None => Vec::new(),
637 AuthEnv::Single(name) => vec![name.clone()],
638 AuthEnv::Multiple(names) => names.clone(),
639 }
640}
641
642pub fn provider_key_available(provider: &str) -> bool {
643 let Some(pdef) = provider_config(provider) else {
644 return provider == "ollama";
645 };
646 if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
647 return true;
648 }
649 auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
650 std::env::var(env_name)
651 .ok()
652 .is_some_and(|value| !value.trim().is_empty())
653 })
654}
655
656pub fn available_provider_names() -> Vec<String> {
657 provider_names()
658 .into_iter()
659 .filter(|provider| provider_key_available(provider))
660 .collect()
661}
662
663pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
665 provider_config(provider)
666 .map(|p| p.features.iter().any(|f| f == feature))
667 .unwrap_or(false)
668}
669
670pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
674 provider_config(provider)
675 .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
676 .unwrap_or((None, None, None))
677}
678
679pub fn default_tool_format(model: &str, provider: &str) -> String {
683 let config = effective_config();
684 default_tool_format_with_config(&config, model, provider)
685}
686
687fn default_tool_format_with_config(
688 config: &ProvidersConfig,
689 model: &str,
690 provider: &str,
691) -> String {
692 for (name, alias) in &config.aliases {
694 let matches = (alias.id == model && alias.provider == provider) || name == model;
695 if matches {
696 if let Some(ref fmt) = alias.tool_format {
697 return fmt.clone();
698 }
699 }
700 }
701 let capability_matrix_native = crate::llm::capabilities::lookup(provider, model).native_tools;
702 let legacy_provider_native = config
703 .providers
704 .get(provider)
705 .map(|p| p.features.iter().any(|f| f == "native_tools"))
706 .unwrap_or(false);
707 if capability_matrix_native || legacy_provider_native {
708 "native".to_string()
709 } else {
710 "text".to_string()
711 }
712}
713
714fn with_effective_capability_tags(
715 model_id: String,
716 provider: String,
717 mut model: ModelDef,
718) -> ModelDef {
719 model.capabilities = effective_model_capability_tags(&provider, &model_id);
720 model
721}
722
723pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
727 let caps = crate::llm::capabilities::lookup(provider, model_id);
728 let mut tags = Vec::new();
729 tags.push("streaming".to_string());
732 if caps.native_tools {
733 tags.push("tools".to_string());
734 }
735 if !caps.tool_search.is_empty() {
736 tags.push("tool_search".to_string());
737 }
738 if caps.vision || caps.vision_supported {
739 tags.push("vision".to_string());
740 }
741 if caps.audio {
742 tags.push("audio".to_string());
743 }
744 if caps.pdf {
745 tags.push("pdf".to_string());
746 }
747 if caps.files_api_supported {
748 tags.push("files".to_string());
749 }
750 if caps.prompt_caching {
751 tags.push("prompt_caching".to_string());
752 }
753 if !caps.thinking_modes.is_empty() {
754 tags.push("thinking".to_string());
755 }
756 if caps.interleaved_thinking_supported
757 || caps
758 .thinking_modes
759 .iter()
760 .any(|mode| mode == "adaptive" || mode == "effort")
761 {
762 tags.push("extended_thinking".to_string());
763 }
764 if caps.json_schema.is_some() {
765 tags.push("structured_output".to_string());
766 }
767 tags
768}
769
770pub fn resolve_tier_model(
772 target: &str,
773 preferred_provider: Option<&str>,
774) -> Option<(String, String)> {
775 let config = effective_config();
776
777 if let Some(alias) = config.aliases.get(target) {
778 return Some((alias.id.clone(), alias.provider.clone()));
779 }
780
781 let candidate_aliases = if let Some(provider) = preferred_provider {
782 vec![
783 format!("{provider}/{target}"),
784 format!("{provider}:{target}"),
785 format!("tier/{target}"),
786 target.to_string(),
787 ]
788 } else {
789 vec![format!("tier/{target}"), target.to_string()]
790 };
791
792 for alias_name in candidate_aliases {
793 if let Some(alias) = config.aliases.get(&alias_name) {
794 return Some((alias.id.clone(), alias.provider.clone()));
795 }
796 }
797
798 None
799}
800
801pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
805 let config = effective_config();
806 let mut seen = std::collections::BTreeSet::new();
807 let mut candidates = Vec::new();
808
809 for alias in config.aliases.values() {
810 let pair = (alias.id.clone(), alias.provider.clone());
811 if seen.contains(&pair) {
812 continue;
813 }
814 if model_tier(&alias.id) == target {
815 seen.insert(pair.clone());
816 candidates.push(pair);
817 }
818 }
819
820 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
821 provider_a
822 .cmp(provider_b)
823 .then_with(|| model_a.cmp(model_b))
824 });
825 candidates
826}
827
828pub fn all_model_candidates() -> Vec<(String, String)> {
831 let config = effective_config();
832 let mut seen = std::collections::BTreeSet::new();
833 let mut candidates = Vec::new();
834
835 for alias in config.aliases.values() {
836 let pair = (alias.id.clone(), alias.provider.clone());
837 if seen.insert(pair.clone()) {
838 candidates.push(pair);
839 }
840 }
841
842 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
843 provider_a
844 .cmp(provider_b)
845 .then_with(|| model_a.cmp(model_b))
846 });
847 candidates
848}
849
850fn glob_match(pattern: &str, input: &str) -> bool {
852 if let Some(prefix) = pattern.strip_suffix('*') {
853 input.starts_with(prefix)
854 } else if let Some(suffix) = pattern.strip_prefix('*') {
855 input.ends_with(suffix)
856 } else if pattern.contains('*') {
857 let parts: Vec<&str> = pattern.split('*').collect();
858 if parts.len() == 2 {
859 input.starts_with(parts[0]) && input.ends_with(parts[1])
860 } else {
861 input == pattern
862 }
863 } else {
864 input == pattern
865 }
866}
867
868fn dirs_or_home() -> Option<String> {
869 std::env::var("HOME").ok()
870}
871
872pub fn resolve_base_url(pdef: &ProviderDef) -> String {
875 if let Some(env_name) = &pdef.base_url_env {
876 if let Ok(val) = std::env::var(env_name) {
877 let trimmed = val.trim().trim_matches('"').trim_matches('\'');
879 if !trimmed.is_empty() {
880 return trimmed.to_string();
881 }
882 }
883 }
884 pdef.base_url.clone()
885}
886
887fn default_config() -> ProvidersConfig {
888 let mut config = ProvidersConfig {
889 default_provider: Some("anthropic".to_string()),
890 ..Default::default()
891 };
892
893 config.providers.insert(
894 "anthropic".to_string(),
895 ProviderDef {
896 base_url: "https://api.anthropic.com/v1".to_string(),
897 auth_style: "header".to_string(),
898 auth_header: Some("x-api-key".to_string()),
899 auth_env: AuthEnv::Single("ANTHROPIC_API_KEY".to_string()),
900 extra_headers: BTreeMap::from([(
901 "anthropic-version".to_string(),
902 "2023-06-01".to_string(),
903 )]),
904 chat_endpoint: "/messages".to_string(),
905 completion_endpoint: None,
906 healthcheck: Some(HealthcheckDef {
907 method: "POST".to_string(),
908 path: Some("/messages/count_tokens".to_string()),
909 url: None,
910 body: Some(
911 r#"{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":"x"}]}"#
912 .to_string(),
913 ),
914 }),
915 features: vec!["prompt_caching".to_string(), "thinking".to_string()],
916 cost_per_1k_in: Some(0.003),
917 cost_per_1k_out: Some(0.015),
918 latency_p50_ms: Some(2500),
919 ..Default::default()
920 },
921 );
922
923 config.providers.insert(
925 "openai".to_string(),
926 ProviderDef {
927 base_url: "https://api.openai.com/v1".to_string(),
928 auth_style: "bearer".to_string(),
929 auth_env: AuthEnv::Single("OPENAI_API_KEY".to_string()),
930 chat_endpoint: "/chat/completions".to_string(),
931 completion_endpoint: Some("/completions".to_string()),
932 healthcheck: Some(HealthcheckDef {
933 method: "GET".to_string(),
934 path: Some("/models".to_string()),
935 url: None,
936 body: None,
937 }),
938 cost_per_1k_in: Some(0.0025),
939 cost_per_1k_out: Some(0.010),
940 latency_p50_ms: Some(1800),
941 ..Default::default()
942 },
943 );
944
945 config.providers.insert(
947 "openrouter".to_string(),
948 ProviderDef {
949 base_url: "https://openrouter.ai/api/v1".to_string(),
950 auth_style: "bearer".to_string(),
951 auth_env: AuthEnv::Single("OPENROUTER_API_KEY".to_string()),
952 chat_endpoint: "/chat/completions".to_string(),
953 completion_endpoint: Some("/completions".to_string()),
954 healthcheck: Some(HealthcheckDef {
955 method: "GET".to_string(),
956 path: Some("/auth/key".to_string()),
957 url: None,
958 body: None,
959 }),
960 cost_per_1k_in: Some(0.003),
961 cost_per_1k_out: Some(0.015),
962 latency_p50_ms: Some(2200),
963 ..Default::default()
964 },
965 );
966
967 config.providers.insert(
969 "huggingface".to_string(),
970 ProviderDef {
971 base_url: "https://router.huggingface.co/v1".to_string(),
972 auth_style: "bearer".to_string(),
973 auth_env: AuthEnv::Multiple(vec![
974 "HF_TOKEN".to_string(),
975 "HUGGINGFACE_API_KEY".to_string(),
976 ]),
977 chat_endpoint: "/chat/completions".to_string(),
978 completion_endpoint: Some("/completions".to_string()),
979 healthcheck: Some(HealthcheckDef {
980 method: "GET".to_string(),
981 url: Some("https://huggingface.co/api/whoami-v2".to_string()),
982 path: None,
983 body: None,
984 }),
985 cost_per_1k_in: Some(0.0002),
986 cost_per_1k_out: Some(0.0006),
987 latency_p50_ms: Some(2400),
988 ..Default::default()
989 },
990 );
991
992 config.providers.insert(
1001 "ollama".to_string(),
1002 ProviderDef {
1003 base_url: "http://localhost:11434".to_string(),
1004 base_url_env: Some("OLLAMA_HOST".to_string()),
1005 auth_style: "none".to_string(),
1006 chat_endpoint: "/api/chat".to_string(),
1007 completion_endpoint: Some("/api/generate".to_string()),
1008 healthcheck: Some(HealthcheckDef {
1009 method: "GET".to_string(),
1010 path: Some("/api/tags".to_string()),
1011 url: None,
1012 body: None,
1013 }),
1014 cost_per_1k_in: Some(0.0),
1015 cost_per_1k_out: Some(0.0),
1016 latency_p50_ms: Some(1200),
1017 ..Default::default()
1018 },
1019 );
1020
1021 config.providers.insert(
1023 "gemini".to_string(),
1024 ProviderDef {
1025 base_url: "https://generativelanguage.googleapis.com".to_string(),
1026 base_url_env: Some("GEMINI_BASE_URL".to_string()),
1027 auth_style: "header".to_string(),
1028 auth_header: Some("x-goog-api-key".to_string()),
1029 auth_env: AuthEnv::Multiple(vec![
1030 "GEMINI_API_KEY".to_string(),
1031 "GOOGLE_API_KEY".to_string(),
1032 ]),
1033 chat_endpoint: "/v1beta/models".to_string(),
1034 healthcheck: Some(HealthcheckDef {
1035 method: "GET".to_string(),
1036 path: Some("/v1beta/models".to_string()),
1037 url: None,
1038 body: None,
1039 }),
1040 cost_per_1k_in: Some(0.00125),
1041 cost_per_1k_out: Some(0.005),
1042 latency_p50_ms: Some(1800),
1043 ..Default::default()
1044 },
1045 );
1046
1047 config.providers.insert(
1049 "together".to_string(),
1050 ProviderDef {
1051 base_url: "https://api.together.xyz/v1".to_string(),
1052 base_url_env: Some("TOGETHER_AI_BASE_URL".to_string()),
1053 auth_style: "bearer".to_string(),
1054 auth_env: AuthEnv::Single("TOGETHER_AI_API_KEY".to_string()),
1055 chat_endpoint: "/chat/completions".to_string(),
1056 completion_endpoint: Some("/completions".to_string()),
1057 healthcheck: Some(HealthcheckDef {
1058 method: "GET".to_string(),
1059 path: Some("/models".to_string()),
1060 url: None,
1061 body: None,
1062 }),
1063 cost_per_1k_in: Some(0.0002),
1064 cost_per_1k_out: Some(0.0006),
1065 latency_p50_ms: Some(1600),
1066 ..Default::default()
1067 },
1068 );
1069
1070 config.providers.insert(
1072 "groq".to_string(),
1073 ProviderDef {
1074 base_url: "https://api.groq.com/openai/v1".to_string(),
1075 base_url_env: Some("GROQ_BASE_URL".to_string()),
1076 auth_style: "bearer".to_string(),
1077 auth_env: AuthEnv::Single("GROQ_API_KEY".to_string()),
1078 chat_endpoint: "/chat/completions".to_string(),
1079 completion_endpoint: Some("/completions".to_string()),
1080 healthcheck: Some(HealthcheckDef {
1081 method: "GET".to_string(),
1082 path: Some("/models".to_string()),
1083 url: None,
1084 body: None,
1085 }),
1086 cost_per_1k_in: Some(0.0001),
1087 cost_per_1k_out: Some(0.0003),
1088 latency_p50_ms: Some(450),
1089 ..Default::default()
1090 },
1091 );
1092
1093 config.providers.insert(
1095 "deepseek".to_string(),
1096 ProviderDef {
1097 base_url: "https://api.deepseek.com/v1".to_string(),
1098 base_url_env: Some("DEEPSEEK_BASE_URL".to_string()),
1099 auth_style: "bearer".to_string(),
1100 auth_env: AuthEnv::Single("DEEPSEEK_API_KEY".to_string()),
1101 chat_endpoint: "/chat/completions".to_string(),
1102 completion_endpoint: Some("/completions".to_string()),
1103 healthcheck: Some(HealthcheckDef {
1104 method: "GET".to_string(),
1105 path: Some("/models".to_string()),
1106 url: None,
1107 body: None,
1108 }),
1109 cost_per_1k_in: Some(0.00014),
1110 cost_per_1k_out: Some(0.00028),
1111 latency_p50_ms: Some(1800),
1112 ..Default::default()
1113 },
1114 );
1115
1116 config.providers.insert(
1118 "fireworks".to_string(),
1119 ProviderDef {
1120 base_url: "https://api.fireworks.ai/inference/v1".to_string(),
1121 base_url_env: Some("FIREWORKS_BASE_URL".to_string()),
1122 auth_style: "bearer".to_string(),
1123 auth_env: AuthEnv::Single("FIREWORKS_API_KEY".to_string()),
1124 chat_endpoint: "/chat/completions".to_string(),
1125 completion_endpoint: Some("/completions".to_string()),
1126 healthcheck: Some(HealthcheckDef {
1127 method: "GET".to_string(),
1128 path: Some("/models".to_string()),
1129 url: None,
1130 body: None,
1131 }),
1132 cost_per_1k_in: Some(0.0002),
1133 cost_per_1k_out: Some(0.0006),
1134 latency_p50_ms: Some(1400),
1135 ..Default::default()
1136 },
1137 );
1138
1139 config.providers.insert(
1141 "dashscope".to_string(),
1142 ProviderDef {
1143 base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".to_string(),
1144 base_url_env: Some("DASHSCOPE_BASE_URL".to_string()),
1145 auth_style: "bearer".to_string(),
1146 auth_env: AuthEnv::Single("DASHSCOPE_API_KEY".to_string()),
1147 chat_endpoint: "/chat/completions".to_string(),
1148 completion_endpoint: Some("/completions".to_string()),
1149 healthcheck: Some(HealthcheckDef {
1150 method: "GET".to_string(),
1151 path: Some("/models".to_string()),
1152 url: None,
1153 body: None,
1154 }),
1155 cost_per_1k_in: Some(0.0003),
1156 cost_per_1k_out: Some(0.0012),
1157 latency_p50_ms: Some(1600),
1158 ..Default::default()
1159 },
1160 );
1161
1162 config.providers.insert(
1166 "bedrock".to_string(),
1167 ProviderDef {
1168 base_url: String::new(),
1169 base_url_env: Some("BEDROCK_BASE_URL".to_string()),
1170 auth_style: "aws_sigv4".to_string(),
1171 auth_env: AuthEnv::None,
1172 chat_endpoint: "/model/{model}/converse".to_string(),
1173 features: vec!["native_tools".to_string()],
1174 latency_p50_ms: Some(2600),
1175 ..Default::default()
1176 },
1177 );
1178
1179 config.providers.insert(
1183 "azure_openai".to_string(),
1184 ProviderDef {
1185 base_url: "https://{resource}.openai.azure.com".to_string(),
1186 base_url_env: Some("AZURE_OPENAI_ENDPOINT".to_string()),
1187 auth_style: "azure_openai".to_string(),
1188 auth_env: AuthEnv::Multiple(vec![
1189 "AZURE_OPENAI_API_KEY".to_string(),
1190 "AZURE_OPENAI_AD_TOKEN".to_string(),
1191 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1192 ]),
1193 chat_endpoint:
1194 "/openai/deployments/{deployment}/chat/completions?api-version={api_version}"
1195 .to_string(),
1196 features: vec!["native_tools".to_string()],
1197 cost_per_1k_in: Some(0.0025),
1198 cost_per_1k_out: Some(0.010),
1199 latency_p50_ms: Some(1900),
1200 ..Default::default()
1201 },
1202 );
1203
1204 config.providers.insert(
1206 "vertex".to_string(),
1207 ProviderDef {
1208 base_url: "https://aiplatform.googleapis.com/v1".to_string(),
1209 base_url_env: Some("VERTEX_AI_BASE_URL".to_string()),
1210 auth_style: "bearer".to_string(),
1211 auth_env: AuthEnv::Multiple(vec![
1212 "VERTEX_AI_ACCESS_TOKEN".to_string(),
1213 "GOOGLE_OAUTH_ACCESS_TOKEN".to_string(),
1214 "GOOGLE_APPLICATION_CREDENTIALS".to_string(),
1215 ]),
1216 chat_endpoint:
1217 "/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent"
1218 .to_string(),
1219 features: vec!["native_tools".to_string()],
1220 cost_per_1k_in: Some(0.00125),
1221 cost_per_1k_out: Some(0.005),
1222 latency_p50_ms: Some(2100),
1223 ..Default::default()
1224 },
1225 );
1226
1227 config.providers.insert(
1229 "local".to_string(),
1230 ProviderDef {
1231 base_url: "http://localhost:8000".to_string(),
1232 base_url_env: Some("LOCAL_LLM_BASE_URL".to_string()),
1233 auth_style: "none".to_string(),
1234 chat_endpoint: "/v1/chat/completions".to_string(),
1235 completion_endpoint: Some("/v1/completions".to_string()),
1236 healthcheck: Some(HealthcheckDef {
1237 method: "GET".to_string(),
1238 path: Some("/v1/models".to_string()),
1239 url: None,
1240 body: None,
1241 }),
1242 cost_per_1k_in: Some(0.0),
1243 cost_per_1k_out: Some(0.0),
1244 latency_p50_ms: Some(900),
1245 ..Default::default()
1246 },
1247 );
1248
1249 config.providers.insert(
1253 "mlx".to_string(),
1254 ProviderDef {
1255 base_url: "http://127.0.0.1:8002".to_string(),
1256 base_url_env: Some("MLX_BASE_URL".to_string()),
1257 auth_style: "none".to_string(),
1258 chat_endpoint: "/v1/chat/completions".to_string(),
1259 completion_endpoint: Some("/v1/completions".to_string()),
1260 healthcheck: Some(HealthcheckDef {
1261 method: "GET".to_string(),
1262 path: Some("/v1/models".to_string()),
1263 url: None,
1264 body: None,
1265 }),
1266 cost_per_1k_in: Some(0.0),
1267 cost_per_1k_out: Some(0.0),
1268 latency_p50_ms: Some(900),
1269 ..Default::default()
1270 },
1271 );
1272
1273 config.providers.insert(
1275 "vllm".to_string(),
1276 ProviderDef {
1277 base_url: "http://localhost:8000".to_string(),
1278 base_url_env: Some("VLLM_BASE_URL".to_string()),
1279 auth_style: "none".to_string(),
1280 chat_endpoint: "/v1/chat/completions".to_string(),
1281 completion_endpoint: Some("/v1/completions".to_string()),
1282 healthcheck: Some(HealthcheckDef {
1283 method: "GET".to_string(),
1284 path: Some("/v1/models".to_string()),
1285 url: None,
1286 body: None,
1287 }),
1288 cost_per_1k_in: Some(0.0),
1289 cost_per_1k_out: Some(0.0),
1290 latency_p50_ms: Some(800),
1291 ..Default::default()
1292 },
1293 );
1294
1295 config.providers.insert(
1297 "tgi".to_string(),
1298 ProviderDef {
1299 base_url: "http://localhost:8080".to_string(),
1300 base_url_env: Some("TGI_BASE_URL".to_string()),
1301 auth_style: "none".to_string(),
1302 chat_endpoint: "/v1/chat/completions".to_string(),
1303 completion_endpoint: Some("/v1/completions".to_string()),
1304 healthcheck: Some(HealthcheckDef {
1305 method: "GET".to_string(),
1306 path: Some("/health".to_string()),
1307 url: None,
1308 body: None,
1309 }),
1310 cost_per_1k_in: Some(0.0),
1311 cost_per_1k_out: Some(0.0),
1312 latency_p50_ms: Some(950),
1313 ..Default::default()
1314 },
1315 );
1316
1317 config.inference_rules = vec![
1319 InferenceRule {
1320 pattern: Some("claude-*".to_string()),
1321 contains: None,
1322 exact: None,
1323 provider: "anthropic".to_string(),
1324 },
1325 InferenceRule {
1326 pattern: Some("gpt-*".to_string()),
1327 contains: None,
1328 exact: None,
1329 provider: "openai".to_string(),
1330 },
1331 InferenceRule {
1332 pattern: Some("o1*".to_string()),
1333 contains: None,
1334 exact: None,
1335 provider: "openai".to_string(),
1336 },
1337 InferenceRule {
1338 pattern: Some("o3*".to_string()),
1339 contains: None,
1340 exact: None,
1341 provider: "openai".to_string(),
1342 },
1343 InferenceRule {
1344 pattern: Some("o4*".to_string()),
1345 contains: None,
1346 exact: None,
1347 provider: "openai".to_string(),
1348 },
1349 InferenceRule {
1350 pattern: Some("anthropic.claude-*".to_string()),
1351 contains: None,
1352 exact: None,
1353 provider: "bedrock".to_string(),
1354 },
1355 InferenceRule {
1356 pattern: Some("meta.llama*".to_string()),
1357 contains: None,
1358 exact: None,
1359 provider: "bedrock".to_string(),
1360 },
1361 InferenceRule {
1362 pattern: Some("amazon.*".to_string()),
1363 contains: None,
1364 exact: None,
1365 provider: "bedrock".to_string(),
1366 },
1367 InferenceRule {
1368 pattern: Some("mistral.*".to_string()),
1369 contains: None,
1370 exact: None,
1371 provider: "bedrock".to_string(),
1372 },
1373 InferenceRule {
1374 pattern: Some("cohere.*".to_string()),
1375 contains: None,
1376 exact: None,
1377 provider: "bedrock".to_string(),
1378 },
1379 InferenceRule {
1380 pattern: Some("gemini-*".to_string()),
1381 contains: None,
1382 exact: None,
1383 provider: "gemini".to_string(),
1384 },
1385 ];
1386
1387 config.tier_rules = vec![
1389 TierRule {
1390 contains: Some("9b".to_string()),
1391 pattern: None,
1392 exact: None,
1393 tier: "small".to_string(),
1394 },
1395 TierRule {
1396 contains: Some("a3b".to_string()),
1397 pattern: None,
1398 exact: None,
1399 tier: "small".to_string(),
1400 },
1401 TierRule {
1402 contains: Some("gemma-4-e2b".to_string()),
1403 pattern: None,
1404 exact: None,
1405 tier: "small".to_string(),
1406 },
1407 TierRule {
1408 contains: Some("gemma-4-e4b".to_string()),
1409 pattern: None,
1410 exact: None,
1411 tier: "small".to_string(),
1412 },
1413 TierRule {
1414 contains: Some("gemma-4-26b".to_string()),
1415 pattern: None,
1416 exact: None,
1417 tier: "mid".to_string(),
1418 },
1419 TierRule {
1420 contains: Some("gemma-4-31b".to_string()),
1421 pattern: None,
1422 exact: None,
1423 tier: "frontier".to_string(),
1424 },
1425 TierRule {
1426 contains: Some("gemma4:26b".to_string()),
1427 pattern: None,
1428 exact: None,
1429 tier: "mid".to_string(),
1430 },
1431 TierRule {
1432 contains: Some("gemma4:31b".to_string()),
1433 pattern: None,
1434 exact: None,
1435 tier: "frontier".to_string(),
1436 },
1437 TierRule {
1438 pattern: Some("claude-*".to_string()),
1439 contains: None,
1440 exact: None,
1441 tier: "frontier".to_string(),
1442 },
1443 TierRule {
1444 exact: Some("gpt-4o".to_string()),
1445 contains: None,
1446 pattern: None,
1447 tier: "frontier".to_string(),
1448 },
1449 ];
1450
1451 config.tier_defaults = TierDefaults {
1452 default: "mid".to_string(),
1453 };
1454
1455 config.aliases.insert(
1456 "frontier".to_string(),
1457 AliasDef {
1458 id: "claude-sonnet-4-20250514".to_string(),
1459 provider: "anthropic".to_string(),
1460 tool_format: None,
1461 },
1462 );
1463 config.aliases.insert(
1464 "tier/frontier".to_string(),
1465 AliasDef {
1466 id: "claude-sonnet-4-20250514".to_string(),
1467 provider: "anthropic".to_string(),
1468 tool_format: None,
1469 },
1470 );
1471 config.aliases.insert(
1472 "mid".to_string(),
1473 AliasDef {
1474 id: "gpt-4o-mini".to_string(),
1475 provider: "openai".to_string(),
1476 tool_format: None,
1477 },
1478 );
1479 config.aliases.insert(
1480 "tier/mid".to_string(),
1481 AliasDef {
1482 id: "gpt-4o-mini".to_string(),
1483 provider: "openai".to_string(),
1484 tool_format: None,
1485 },
1486 );
1487 config.aliases.insert(
1488 "small".to_string(),
1489 AliasDef {
1490 id: "Qwen/Qwen3.5-9B".to_string(),
1491 provider: "openrouter".to_string(),
1492 tool_format: None,
1493 },
1494 );
1495 config.aliases.insert(
1496 "tier/small".to_string(),
1497 AliasDef {
1498 id: "Qwen/Qwen3.5-9B".to_string(),
1499 provider: "openrouter".to_string(),
1500 tool_format: None,
1501 },
1502 );
1503 config.aliases.insert(
1504 "local-gemma4".to_string(),
1505 AliasDef {
1506 id: "gemma-4-26b-a4b-it".to_string(),
1507 provider: "local".to_string(),
1508 tool_format: None,
1509 },
1510 );
1511 config.aliases.insert(
1512 "local-gemma4-26b".to_string(),
1513 AliasDef {
1514 id: "gemma-4-26b-a4b-it".to_string(),
1515 provider: "local".to_string(),
1516 tool_format: None,
1517 },
1518 );
1519 config.aliases.insert(
1520 "local-gemma4-31b".to_string(),
1521 AliasDef {
1522 id: "gemma-4-31b-it".to_string(),
1523 provider: "local".to_string(),
1524 tool_format: None,
1525 },
1526 );
1527 config.aliases.insert(
1528 "local-gemma4-e4b".to_string(),
1529 AliasDef {
1530 id: "gemma-4-e4b-it".to_string(),
1531 provider: "local".to_string(),
1532 tool_format: None,
1533 },
1534 );
1535 config.aliases.insert(
1536 "local-gemma4-e2b".to_string(),
1537 AliasDef {
1538 id: "gemma-4-e2b-it".to_string(),
1539 provider: "local".to_string(),
1540 tool_format: None,
1541 },
1542 );
1543 config.aliases.insert(
1544 "mlx-qwen36-27b".to_string(),
1545 AliasDef {
1546 id: "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string(),
1547 provider: "mlx".to_string(),
1548 tool_format: None,
1549 },
1550 );
1551
1552 config.qc_defaults.extend(BTreeMap::from([
1553 (
1554 "anthropic".to_string(),
1555 "claude-3-5-haiku-20241022".to_string(),
1556 ),
1557 ("openai".to_string(), "gpt-4o-mini".to_string()),
1558 (
1559 "openrouter".to_string(),
1560 "google/gemini-2.5-flash".to_string(),
1561 ),
1562 ("ollama".to_string(), "llama3.2".to_string()),
1563 ("local".to_string(), "gpt-4o".to_string()),
1564 ]));
1565
1566 config.models.extend(BTreeMap::from([
1567 (
1568 "claude-sonnet-4-20250514".to_string(),
1569 ModelDef {
1570 name: "Claude Sonnet 4".to_string(),
1571 provider: "anthropic".to_string(),
1572 context_window: 200_000,
1573 stream_timeout: None,
1574 capabilities: vec![
1575 "tools".to_string(),
1576 "streaming".to_string(),
1577 "prompt_caching".to_string(),
1578 "thinking".to_string(),
1579 ],
1580 pricing: Some(ModelPricing {
1581 input_per_mtok: 3.0,
1582 output_per_mtok: 15.0,
1583 cache_read_per_mtok: Some(0.3),
1584 cache_write_per_mtok: Some(3.75),
1585 }),
1586 },
1587 ),
1588 (
1589 "gpt-4o-mini".to_string(),
1590 ModelDef {
1591 name: "GPT-4o Mini".to_string(),
1592 provider: "openai".to_string(),
1593 context_window: 128_000,
1594 stream_timeout: None,
1595 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1596 pricing: Some(ModelPricing {
1597 input_per_mtok: 0.15,
1598 output_per_mtok: 0.60,
1599 cache_read_per_mtok: None,
1600 cache_write_per_mtok: None,
1601 }),
1602 },
1603 ),
1604 (
1605 "Qwen/Qwen3.5-9B".to_string(),
1606 ModelDef {
1607 name: "Qwen3.5 9B".to_string(),
1608 provider: "openrouter".to_string(),
1609 context_window: 131_072,
1610 stream_timeout: None,
1611 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1612 pricing: None,
1613 },
1614 ),
1615 (
1616 "llama3.2".to_string(),
1617 ModelDef {
1618 name: "Llama 3.2".to_string(),
1619 provider: "ollama".to_string(),
1620 context_window: 32_000,
1621 stream_timeout: Some(300.0),
1622 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1623 pricing: None,
1624 },
1625 ),
1626 ]));
1627
1628 config
1629}
1630
1631#[cfg(test)]
1632fn merge_global_config(overlay: ProvidersConfig) -> ProvidersConfig {
1633 let mut config = default_config();
1634 config.merge_from(&overlay);
1635 config
1636}
1637
1638#[cfg(test)]
1639mod tests {
1640 use super::*;
1641
1642 fn reset_overrides() {
1643 clear_user_overrides();
1644 }
1645
1646 #[test]
1647 fn test_glob_match_prefix() {
1648 assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
1649 assert!(glob_match("gpt-*", "gpt-4o"));
1650 assert!(!glob_match("claude-*", "gpt-4o"));
1651 }
1652
1653 #[test]
1654 fn test_glob_match_suffix() {
1655 assert!(glob_match("*-latest", "llama3.2-latest"));
1656 assert!(!glob_match("*-latest", "llama3.2"));
1657 }
1658
1659 #[test]
1660 fn test_glob_match_middle() {
1661 assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
1662 assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
1663 }
1664
1665 #[test]
1666 fn test_glob_match_exact() {
1667 assert!(glob_match("gpt-4o", "gpt-4o"));
1668 assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
1669 }
1670
1671 #[test]
1672 fn test_infer_provider_from_defaults() {
1673 let _guard = crate::llm::env_lock().lock().expect("env lock");
1674 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1675 unsafe {
1676 std::env::remove_var("HARN_DEFAULT_PROVIDER");
1677 }
1678
1679 assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
1680 assert_eq!(infer_provider("gpt-4o"), "openai");
1681 assert_eq!(infer_provider("o1-preview"), "openai");
1682 assert_eq!(infer_provider("o3-mini"), "openai");
1683 assert_eq!(infer_provider("o4-mini"), "openai");
1684 assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
1685 assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
1686 assert_eq!(infer_provider("llama3.2:latest"), "ollama");
1687 assert_eq!(infer_provider("unknown-model"), "anthropic");
1688
1689 unsafe {
1690 match prev_default_provider {
1691 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1692 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1693 }
1694 }
1695 }
1696
1697 #[test]
1698 fn test_infer_provider_prefix_rules() {
1699 assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
1700 assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
1701 assert_eq!(infer_provider("local:owner/model"), "ollama");
1703 assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
1704 }
1705
1706 #[test]
1707 fn test_openrouter_inference_requires_one_slash() {
1708 let _guard = crate::llm::env_lock().lock().expect("env lock");
1709 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1710 unsafe {
1711 std::env::remove_var("HARN_DEFAULT_PROVIDER");
1712 }
1713
1714 assert_eq!(infer_provider("org/model"), "openrouter");
1715 assert_eq!(infer_provider("org/team/model"), "anthropic");
1716
1717 unsafe {
1718 match prev_default_provider {
1719 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1720 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1721 }
1722 }
1723 }
1724
1725 #[test]
1726 fn test_resolve_model_info_normalizes_provider_prefixes() {
1727 let local = resolve_model_info("local:gemma-4-e4b-it");
1728 assert_eq!(local.id, "gemma-4-e4b-it");
1729 assert_eq!(local.provider, "ollama");
1730
1731 let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
1732 assert_eq!(ollama.id, "qwen3:30b-a3b");
1733 assert_eq!(ollama.provider, "ollama");
1734
1735 let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
1736 assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
1737 assert_eq!(hf.provider, "huggingface");
1738 }
1739
1740 #[test]
1741 fn test_model_tier_from_defaults() {
1742 assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
1743 assert_eq!(model_tier("gpt-4o"), "frontier");
1744 assert_eq!(model_tier("Qwen3.5-9B"), "small");
1745 assert_eq!(model_tier("deepseek-v3"), "mid");
1746 }
1747
1748 #[test]
1749 fn test_resolve_model_unknown_alias() {
1750 let (id, provider) = resolve_model("gpt-4o");
1751 assert_eq!(id, "gpt-4o");
1752 assert!(provider.is_none());
1753 }
1754
1755 #[test]
1756 fn test_provider_names() {
1757 let names = provider_names();
1758 assert!(names.len() >= 7);
1759 assert!(names.contains(&"anthropic".to_string()));
1760 assert!(names.contains(&"together".to_string()));
1761 assert!(names.contains(&"local".to_string()));
1762 assert!(names.contains(&"mlx".to_string()));
1763 assert!(names.contains(&"openai".to_string()));
1764 assert!(names.contains(&"ollama".to_string()));
1765 assert!(names.contains(&"bedrock".to_string()));
1766 assert!(names.contains(&"azure_openai".to_string()));
1767 assert!(names.contains(&"vertex".to_string()));
1768 }
1769
1770 #[test]
1771 fn global_provider_file_is_an_overlay_on_builtin_defaults() {
1772 let mut overlay = ProvidersConfig {
1773 default_provider: Some("ollama".to_string()),
1774 ..Default::default()
1775 };
1776 overlay.aliases.insert(
1777 "quickstart".to_string(),
1778 AliasDef {
1779 id: "llama3.2".to_string(),
1780 provider: "ollama".to_string(),
1781 tool_format: None,
1782 },
1783 );
1784
1785 let merged = merge_global_config(overlay);
1786
1787 assert_eq!(merged.default_provider.as_deref(), Some("ollama"));
1788 assert!(merged.providers.contains_key("anthropic"));
1789 assert!(merged.providers.contains_key("ollama"));
1790 assert_eq!(merged.aliases["quickstart"].id, "llama3.2");
1791 }
1792
1793 #[test]
1794 fn test_resolve_tier_model_default_aliases() {
1795 let (model, provider) = resolve_tier_model("frontier", None).unwrap();
1796 assert_eq!(model, "claude-sonnet-4-20250514");
1797 assert_eq!(provider, "anthropic");
1798
1799 let (model, provider) = resolve_tier_model("small", None).unwrap();
1800 assert_eq!(model, "Qwen/Qwen3.5-9B");
1801 assert_eq!(provider, "openrouter");
1802 }
1803
1804 #[test]
1805 fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
1806 let (model, provider) = resolve_tier_model("mid", Some("openai")).unwrap();
1807 assert_eq!(model, "gpt-4o-mini");
1808 assert_eq!(provider, "openai");
1809 }
1810
1811 #[test]
1812 fn test_provider_config_anthropic() {
1813 let pdef = provider_config("anthropic").unwrap();
1814 assert_eq!(pdef.auth_style, "header");
1815 assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
1816 }
1817
1818 #[test]
1819 fn test_provider_config_mlx() {
1820 let pdef = provider_config("mlx").unwrap();
1821 assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
1822 assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
1823 assert_eq!(
1824 pdef.healthcheck.unwrap().path.as_deref(),
1825 Some("/v1/models")
1826 );
1827
1828 let (model, provider) = resolve_model("mlx-qwen36-27b");
1829 assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
1830 assert_eq!(provider.as_deref(), Some("mlx"));
1831 }
1832
1833 #[test]
1834 fn test_enterprise_provider_defaults_and_inference() {
1835 let bedrock = provider_config("bedrock").unwrap();
1836 assert_eq!(bedrock.auth_style, "aws_sigv4");
1837 assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
1838 assert_eq!(
1839 infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
1840 "bedrock"
1841 );
1842 assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
1843
1844 let azure = provider_config("azure_openai").unwrap();
1845 assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
1846 assert_eq!(
1847 auth_env_names(&azure.auth_env),
1848 vec![
1849 "AZURE_OPENAI_API_KEY".to_string(),
1850 "AZURE_OPENAI_AD_TOKEN".to_string(),
1851 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1852 ]
1853 );
1854
1855 let vertex = provider_config("vertex").unwrap();
1856 assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
1857 assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
1858 }
1859
1860 #[test]
1861 fn test_default_provider_env_override_for_unknown_model() {
1862 let _guard = crate::llm::env_lock().lock().expect("env lock");
1863 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1864 unsafe {
1865 std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
1866 }
1867
1868 let inference = infer_provider_detail("unknown-model");
1869
1870 unsafe {
1871 match prev_default_provider {
1872 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1873 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1874 }
1875 }
1876
1877 assert_eq!(inference.provider, "openai");
1878 assert_eq!(
1879 inference.source,
1880 crate::llm::provider::ProviderInferenceSource::DefaultFallback
1881 );
1882 }
1883
1884 #[test]
1885 fn test_resolve_base_url_no_env() {
1886 let pdef = ProviderDef {
1887 base_url: "https://example.com".to_string(),
1888 ..Default::default()
1889 };
1890 assert_eq!(resolve_base_url(&pdef), "https://example.com");
1891 }
1892
1893 #[test]
1894 fn test_default_config_roundtrip() {
1895 let config = default_config();
1896 assert!(!config.providers.is_empty());
1897 assert!(!config.inference_rules.is_empty());
1898 assert!(!config.tier_rules.is_empty());
1899 assert_eq!(config.tier_defaults.default, "mid");
1900 }
1901
1902 #[test]
1903 fn test_external_config_overlays_default_catalog() {
1904 let mut config = default_config();
1905 let mut overlay = ProvidersConfig {
1906 default_provider: Some("ollama".to_string()),
1907 ..Default::default()
1908 };
1909 overlay.providers.insert(
1910 "custom".to_string(),
1911 ProviderDef {
1912 base_url: "https://llm.example.test/v1".to_string(),
1913 chat_endpoint: "/chat/completions".to_string(),
1914 ..Default::default()
1915 },
1916 );
1917
1918 config.merge_from(&overlay);
1919
1920 assert_eq!(config.default_provider.as_deref(), Some("ollama"));
1921 assert!(config.providers.contains_key("custom"));
1922 assert!(config.providers.contains_key("anthropic"));
1923 assert!(config.providers.contains_key("ollama"));
1924 }
1925
1926 #[test]
1927 fn test_model_params_empty() {
1928 let params = model_params("claude-sonnet-4-20250514");
1929 assert!(params.is_empty());
1930 }
1931
1932 #[test]
1933 fn test_user_overrides_add_provider_and_alias() {
1934 reset_overrides();
1935 let mut overlay = ProvidersConfig::default();
1936 overlay.providers.insert(
1937 "acme".to_string(),
1938 ProviderDef {
1939 base_url: "https://llm.acme.test/v1".to_string(),
1940 chat_endpoint: "/chat/completions".to_string(),
1941 ..Default::default()
1942 },
1943 );
1944 overlay.aliases.insert(
1945 "acme-fast".to_string(),
1946 AliasDef {
1947 id: "acme/model-fast".to_string(),
1948 provider: "acme".to_string(),
1949 tool_format: Some("native".to_string()),
1950 },
1951 );
1952 set_user_overrides(Some(overlay));
1953
1954 let (model, provider) = resolve_model("acme-fast");
1955 assert_eq!(model, "acme/model-fast");
1956 assert_eq!(provider.as_deref(), Some("acme"));
1957 assert!(provider_names().contains(&"acme".to_string()));
1958 assert_eq!(
1959 provider_config("acme").map(|provider| provider.base_url),
1960 Some("https://llm.acme.test/v1".to_string())
1961 );
1962
1963 reset_overrides();
1964 }
1965
1966 #[test]
1967 fn test_default_tool_format_uses_capability_matrix() {
1968 reset_overrides();
1969
1970 assert_eq!(
1971 default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
1972 "native"
1973 );
1974 assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "text");
1975 }
1976
1977 #[test]
1978 fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
1979 reset_overrides();
1980 let mut overlay = ProvidersConfig::default();
1981 overlay.models.insert(
1982 "acme/model-fast".to_string(),
1983 ModelDef {
1984 name: "Acme Fast".to_string(),
1985 provider: "acme".to_string(),
1986 context_window: 65_536,
1987 stream_timeout: Some(42.0),
1988 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1989 pricing: Some(ModelPricing {
1990 input_per_mtok: 1.25,
1991 output_per_mtok: 2.5,
1992 cache_read_per_mtok: Some(0.25),
1993 cache_write_per_mtok: None,
1994 }),
1995 },
1996 );
1997 overlay
1998 .qc_defaults
1999 .insert("acme".to_string(), "acme/model-cheap".to_string());
2000 set_user_overrides(Some(overlay));
2001
2002 let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
2003 assert_eq!(entry.context_window, 65_536);
2004 assert_eq!(entry.capabilities, vec!["streaming".to_string()]);
2005 assert_eq!(
2006 entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
2007 Some(1.25)
2008 );
2009 assert_eq!(
2010 pricing_per_1k_for("acme", "acme/model-fast"),
2011 Some((0.00125, 0.0025))
2012 );
2013 assert_eq!(
2014 qc_default_model("acme").as_deref(),
2015 Some("acme/model-cheap")
2016 );
2017
2018 reset_overrides();
2019 }
2020
2021 #[test]
2022 fn test_user_overrides_prepend_inference_rules() {
2023 reset_overrides();
2024 let mut overlay = ProvidersConfig::default();
2025 overlay.inference_rules.push(InferenceRule {
2026 pattern: Some("internal-*".to_string()),
2027 contains: None,
2028 exact: None,
2029 provider: "openai".to_string(),
2030 });
2031 set_user_overrides(Some(overlay));
2032
2033 assert_eq!(infer_provider("internal-foo"), "openai");
2034
2035 reset_overrides();
2036 }
2037}