1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::BTreeMap;
4use std::sync::OnceLock;
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8
9thread_local! {
10 static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
15}
16
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct ProvidersConfig {
19 #[serde(default)]
20 pub default_provider: Option<String>,
21 #[serde(default)]
22 pub providers: BTreeMap<String, ProviderDef>,
23 #[serde(default)]
24 pub aliases: BTreeMap<String, AliasDef>,
25 #[serde(default)]
26 pub models: BTreeMap<String, ModelDef>,
27 #[serde(default)]
28 pub qc_defaults: BTreeMap<String, String>,
29 #[serde(default)]
30 pub inference_rules: Vec<InferenceRule>,
31 #[serde(default)]
32 pub tier_rules: Vec<TierRule>,
33 #[serde(default)]
34 pub tier_defaults: TierDefaults,
35 #[serde(default)]
36 pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
37}
38
39impl ProvidersConfig {
40 pub fn is_empty(&self) -> bool {
41 self.default_provider.is_none()
42 && self.providers.is_empty()
43 && self.aliases.is_empty()
44 && self.models.is_empty()
45 && self.qc_defaults.is_empty()
46 && self.inference_rules.is_empty()
47 && self.tier_rules.is_empty()
48 && self.model_defaults.is_empty()
49 && self.tier_defaults.default == default_mid()
50 }
51
52 pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
53 self.providers.extend(overlay.providers.clone());
54 self.aliases.extend(overlay.aliases.clone());
55 self.models.extend(overlay.models.clone());
56 self.qc_defaults.extend(overlay.qc_defaults.clone());
57
58 if overlay.default_provider.is_some() {
59 self.default_provider = overlay.default_provider.clone();
60 }
61
62 if !overlay.inference_rules.is_empty() {
63 let mut merged = overlay.inference_rules.clone();
64 merged.extend(self.inference_rules.clone());
65 self.inference_rules = merged;
66 }
67
68 if !overlay.tier_rules.is_empty() {
69 let mut merged = overlay.tier_rules.clone();
70 merged.extend(self.tier_rules.clone());
71 self.tier_rules = merged;
72 }
73
74 if overlay.tier_defaults.default != default_mid() {
75 self.tier_defaults = overlay.tier_defaults.clone();
76 }
77
78 for (pattern, defaults) in &overlay.model_defaults {
79 self.model_defaults
80 .entry(pattern.clone())
81 .or_default()
82 .extend(defaults.clone());
83 }
84 }
85}
86
87#[derive(Debug, Clone, Deserialize)]
88pub struct ProviderDef {
89 #[serde(default)]
90 pub display_name: Option<String>,
91 #[serde(default)]
92 pub icon: Option<String>,
93 pub base_url: String,
94 #[serde(default)]
95 pub base_url_env: Option<String>,
96 #[serde(default = "default_bearer")]
97 pub auth_style: String,
98 #[serde(default)]
99 pub auth_header: Option<String>,
100 #[serde(default)]
101 pub auth_env: AuthEnv,
102 #[serde(default)]
103 pub extra_headers: BTreeMap<String, String>,
104 #[serde(default)]
105 pub chat_endpoint: String,
106 #[serde(default)]
107 pub completion_endpoint: Option<String>,
108 #[serde(default)]
109 pub healthcheck: Option<HealthcheckDef>,
110 #[serde(default)]
111 pub features: Vec<String>,
112 #[serde(default)]
114 pub fallback: Option<String>,
115 #[serde(default)]
117 pub retry_count: Option<u32>,
118 #[serde(default)]
120 pub retry_delay_ms: Option<u64>,
121 #[serde(default)]
123 pub rpm: Option<u32>,
124 #[serde(default)]
126 pub cost_per_1k_in: Option<f64>,
127 #[serde(default)]
129 pub cost_per_1k_out: Option<f64>,
130 #[serde(default)]
132 pub latency_p50_ms: Option<u64>,
133}
134
135impl Default for ProviderDef {
136 fn default() -> Self {
137 Self {
138 display_name: None,
139 icon: None,
140 base_url: String::new(),
141 base_url_env: None,
142 auth_style: default_bearer(),
143 auth_header: None,
144 auth_env: AuthEnv::None,
145 extra_headers: BTreeMap::new(),
146 chat_endpoint: String::new(),
147 completion_endpoint: None,
148 healthcheck: None,
149 features: Vec::new(),
150 fallback: None,
151 retry_count: None,
152 retry_delay_ms: None,
153 rpm: None,
154 cost_per_1k_in: None,
155 cost_per_1k_out: None,
156 latency_p50_ms: None,
157 }
158 }
159}
160
161fn default_bearer() -> String {
162 "bearer".to_string()
163}
164
165#[derive(Debug, Clone, Deserialize, Default)]
168#[serde(untagged)]
169pub enum AuthEnv {
170 #[default]
171 None,
172 Single(String),
173 Multiple(Vec<String>),
174}
175
176#[derive(Debug, Clone, Deserialize)]
177pub struct HealthcheckDef {
178 pub method: String,
179 #[serde(default)]
180 pub path: Option<String>,
181 #[serde(default)]
182 pub url: Option<String>,
183 #[serde(default)]
184 pub body: Option<String>,
185}
186
187#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
188pub struct AliasDef {
189 pub id: String,
190 pub provider: String,
191 #[serde(default)]
196 pub tool_format: Option<String>,
197}
198
199#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
200pub struct ModelPricing {
201 pub input_per_mtok: f64,
202 pub output_per_mtok: f64,
203 #[serde(default)]
204 pub cache_read_per_mtok: Option<f64>,
205 #[serde(default)]
206 pub cache_write_per_mtok: Option<f64>,
207}
208
209#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
210pub struct ModelDef {
211 pub name: String,
212 pub provider: String,
213 pub context_window: u64,
214 #[serde(default)]
215 pub stream_timeout: Option<f64>,
216 #[serde(default)]
217 pub capabilities: Vec<String>,
218 #[serde(default)]
219 pub pricing: Option<ModelPricing>,
220}
221
222#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
223pub struct ResolvedModel {
224 pub id: String,
225 pub provider: String,
226 pub alias: Option<String>,
227 pub tool_format: String,
228 pub tier: String,
229}
230
231#[derive(Debug, Clone, Deserialize)]
232pub struct InferenceRule {
233 #[serde(default)]
234 pub pattern: Option<String>,
235 #[serde(default)]
236 pub contains: Option<String>,
237 #[serde(default)]
238 pub exact: Option<String>,
239 pub provider: String,
240}
241
242#[derive(Debug, Clone, Deserialize)]
243pub struct TierRule {
244 #[serde(default)]
245 pub pattern: Option<String>,
246 #[serde(default)]
247 pub contains: Option<String>,
248 #[serde(default)]
249 pub exact: Option<String>,
250 pub tier: String,
251}
252
253#[derive(Debug, Clone, Deserialize)]
254pub struct TierDefaults {
255 #[serde(default = "default_mid")]
256 pub default: String,
257}
258
259impl Default for TierDefaults {
260 fn default() -> Self {
261 Self {
262 default: default_mid(),
263 }
264 }
265}
266
267fn default_mid() -> String {
268 "mid".to_string()
269}
270
271pub fn load_config() -> &'static ProvidersConfig {
273 CONFIG.get_or_init(|| {
274 let verbose_config_logging = matches!(
275 std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
276 Some("1" | "true" | "TRUE" | "yes" | "YES")
277 ) || matches!(
278 std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
279 Some("1" | "true" | "TRUE" | "yes" | "YES")
280 );
281 if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
282 match std::fs::read_to_string(&path) {
283 Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
284 Ok(config) => {
285 if verbose_config_logging {
286 eprintln!(
287 "[llm_config] Loaded {} providers, {} aliases from {}",
288 config.providers.len(),
289 config.aliases.len(),
290 path
291 );
292 }
293 let _ = CONFIG_PATH.set(path);
294 return config;
295 }
296 Err(e) => eprintln!("[llm_config] TOML parse error in {}: {}", path, e),
297 },
298 Err(e) => eprintln!("[llm_config] Cannot read {}: {}", path, e),
299 }
300 }
301 if let Some(home) = dirs_or_home() {
302 let path = format!("{home}/.config/harn/providers.toml");
303 if let Ok(content) = std::fs::read_to_string(&path) {
304 if let Ok(config) = toml::from_str::<ProvidersConfig>(&content) {
305 let _ = CONFIG_PATH.set(path);
306 return config;
307 }
308 }
309 }
310 default_config()
311 })
312}
313
314pub fn loaded_config_path() -> Option<std::path::PathBuf> {
317 let _ = load_config();
319 CONFIG_PATH.get().map(std::path::PathBuf::from)
320}
321
322pub fn set_user_overrides(config: Option<ProvidersConfig>) {
326 USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
327}
328
329pub fn clear_user_overrides() {
331 set_user_overrides(None);
332}
333
334fn effective_config() -> ProvidersConfig {
335 let mut merged = load_config().clone();
336 USER_OVERRIDES.with(|cell| {
337 if let Some(overlay) = cell.borrow().as_ref() {
338 merged.merge_from(overlay);
339 }
340 });
341 merged
342}
343
344pub fn resolve_model(alias: &str) -> (String, Option<String>) {
346 let config = effective_config();
347 if let Some(a) = config.aliases.get(alias) {
348 return (a.id.clone(), Some(a.provider.clone()));
349 }
350 (normalize_model_id(alias), None)
351}
352
353pub fn normalize_model_id(raw: &str) -> String {
358 for prefix in ["ollama:", "local:", "huggingface:", "hf:"] {
359 if let Some(stripped) = raw.strip_prefix(prefix) {
360 return stripped.to_string();
361 }
362 }
363 raw.to_string()
364}
365
366pub fn resolve_model_info(selector: &str) -> ResolvedModel {
369 let config = effective_config();
370 if let Some(alias) = config.aliases.get(selector) {
371 let id = alias.id.clone();
372 let provider = alias.provider.clone();
373 let tool_format = alias
374 .tool_format
375 .clone()
376 .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
377 return ResolvedModel {
378 tier: model_tier_with_config(&config, &id),
379 id,
380 provider,
381 alias: Some(selector.to_string()),
382 tool_format,
383 };
384 }
385
386 let provider = infer_provider_with_config(&config, selector).provider;
387 let id = normalize_model_id(selector);
388 let tool_format = default_tool_format_with_config(&config, &id, &provider);
389 let tier = model_tier_with_config(&config, &id);
390 ResolvedModel {
391 id,
392 provider,
393 alias: None,
394 tool_format,
395 tier,
396 }
397}
398
399pub fn infer_provider(model_id: &str) -> String {
401 infer_provider_detail(model_id).provider
402}
403
404pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
406 let config = effective_config();
407 infer_provider_with_config(&config, model_id)
408}
409
410fn infer_provider_with_config(
411 config: &ProvidersConfig,
412 model_id: &str,
413) -> crate::llm::provider::ProviderInference {
414 if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
415 return crate::llm::provider::ProviderInference::builtin("ollama");
416 }
417 if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
418 return crate::llm::provider::ProviderInference::builtin("huggingface");
419 }
420 for rule in &config.inference_rules {
421 if let Some(exact) = &rule.exact {
422 if model_id == exact {
423 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
424 }
425 }
426 if let Some(pattern) = &rule.pattern {
427 if glob_match(pattern, model_id) {
428 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
429 }
430 }
431 if let Some(substr) = &rule.contains {
432 if model_id.contains(substr.as_str()) {
433 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
434 }
435 }
436 }
437 crate::llm::provider::infer_provider_from_model_id(
438 model_id,
439 &default_provider_with_config(config),
440 )
441}
442
443pub fn default_provider() -> String {
444 let config = effective_config();
445 default_provider_with_config(&config)
446}
447
448fn default_provider_with_config(config: &ProvidersConfig) -> String {
449 std::env::var("HARN_DEFAULT_PROVIDER")
450 .ok()
451 .map(|value| value.trim().to_string())
452 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
453 .or_else(|| {
454 config
455 .default_provider
456 .as_deref()
457 .map(str::trim)
458 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
459 .map(str::to_string)
460 })
461 .unwrap_or_else(|| "anthropic".to_string())
462}
463
464pub fn model_tier(model_id: &str) -> String {
466 let config = effective_config();
467 model_tier_with_config(&config, model_id)
468}
469
470fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
471 for rule in &config.tier_rules {
472 if let Some(exact) = &rule.exact {
473 if model_id == exact {
474 return rule.tier.clone();
475 }
476 }
477 if let Some(pattern) = &rule.pattern {
478 if glob_match(pattern, model_id) {
479 return rule.tier.clone();
480 }
481 }
482 if let Some(substr) = &rule.contains {
483 if model_id.contains(substr.as_str()) {
484 return rule.tier.clone();
485 }
486 }
487 }
488 let lower = model_id.to_lowercase();
489 if lower.contains("9b") || lower.contains("a3b") {
490 return "small".to_string();
491 }
492 if lower.starts_with("claude-") || lower == "gpt-4o" {
493 return "frontier".to_string();
494 }
495 config.tier_defaults.default.clone()
496}
497
498pub fn provider_config(name: &str) -> Option<ProviderDef> {
500 effective_config().providers.get(name).cloned()
501}
502
503pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
506 let config = effective_config();
507 let mut params = BTreeMap::new();
508 for (pattern, defaults) in &config.model_defaults {
509 if glob_match(pattern, model_id) {
510 for (k, v) in defaults {
511 params.insert(k.clone(), v.clone());
512 }
513 }
514 }
515 params
516}
517
518pub fn provider_names() -> Vec<String> {
520 effective_config().providers.keys().cloned().collect()
521}
522
523pub fn known_model_names() -> Vec<String> {
525 effective_config().aliases.keys().cloned().collect()
526}
527
528pub fn alias_entries() -> Vec<(String, AliasDef)> {
529 effective_config().aliases.into_iter().collect()
530}
531
532pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
534 let mut entries: Vec<_> = effective_config()
535 .models
536 .into_iter()
537 .map(|(id, model)| {
538 let provider = model.provider.clone();
539 (
540 id.clone(),
541 with_effective_capability_tags(id, provider, model),
542 )
543 })
544 .collect();
545 entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
546 model_a
547 .provider
548 .cmp(&model_b.provider)
549 .then_with(|| id_a.cmp(id_b))
550 });
551 entries
552}
553
554pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
555 effective_config()
556 .models
557 .get(model_id)
558 .cloned()
559 .map(|model| {
560 let provider = model.provider.clone();
561 with_effective_capability_tags(model_id.to_string(), provider, model)
562 })
563}
564
565pub fn qc_default_model(provider: &str) -> Option<String> {
566 std::env::var("BURIN_QC_MODEL")
567 .ok()
568 .filter(|value| !value.trim().is_empty())
569 .or_else(|| {
570 effective_config()
571 .qc_defaults
572 .get(&provider.to_lowercase())
573 .cloned()
574 })
575}
576
577pub fn default_model_for_provider(provider: &str) -> String {
578 match provider {
579 "local" => std::env::var("LOCAL_LLM_MODEL")
580 .or_else(|_| std::env::var("HARN_LLM_MODEL"))
581 .unwrap_or_else(|_| "gpt-4o".to_string()),
582 "mlx" => std::env::var("MLX_MODEL_ID")
583 .unwrap_or_else(|_| "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string()),
584 "openai" => "gpt-4o".to_string(),
585 "ollama" => "llama3.2".to_string(),
586 "openrouter" => "anthropic/claude-sonnet-4.6".to_string(),
587 _ => "claude-sonnet-4-20250514".to_string(),
588 }
589}
590
591pub fn qc_defaults() -> BTreeMap<String, String> {
592 effective_config().qc_defaults
593}
594
595pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
596 effective_config()
597 .models
598 .get(model_id)
599 .and_then(|model| model.pricing.clone())
600}
601
602pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
603 model_pricing_per_mtok(model_id)
604 .map(|pricing| {
605 (
606 pricing.input_per_mtok / 1000.0,
607 pricing.output_per_mtok / 1000.0,
608 )
609 })
610 .or_else(|| {
611 let (input, output, _) = provider_economics(provider);
612 match (input, output) {
613 (Some(input), Some(output)) => Some((input, output)),
614 _ => None,
615 }
616 })
617}
618
619pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
620 match auth_env {
621 AuthEnv::None => Vec::new(),
622 AuthEnv::Single(name) => vec![name.clone()],
623 AuthEnv::Multiple(names) => names.clone(),
624 }
625}
626
627pub fn provider_key_available(provider: &str) -> bool {
628 let Some(pdef) = provider_config(provider) else {
629 return provider == "ollama";
630 };
631 if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
632 return true;
633 }
634 auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
635 std::env::var(env_name)
636 .ok()
637 .is_some_and(|value| !value.trim().is_empty())
638 })
639}
640
641pub fn available_provider_names() -> Vec<String> {
642 provider_names()
643 .into_iter()
644 .filter(|provider| provider_key_available(provider))
645 .collect()
646}
647
648pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
650 provider_config(provider)
651 .map(|p| p.features.iter().any(|f| f == feature))
652 .unwrap_or(false)
653}
654
655pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
659 provider_config(provider)
660 .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
661 .unwrap_or((None, None, None))
662}
663
664pub fn default_tool_format(model: &str, provider: &str) -> String {
668 let config = effective_config();
669 default_tool_format_with_config(&config, model, provider)
670}
671
672fn default_tool_format_with_config(
673 config: &ProvidersConfig,
674 model: &str,
675 provider: &str,
676) -> String {
677 for (name, alias) in &config.aliases {
679 let matches = (alias.id == model && alias.provider == provider) || name == model;
680 if matches {
681 if let Some(ref fmt) = alias.tool_format {
682 return fmt.clone();
683 }
684 }
685 }
686 let capability_matrix_native = crate::llm::capabilities::lookup(provider, model).native_tools;
687 let legacy_provider_native = config
688 .providers
689 .get(provider)
690 .map(|p| p.features.iter().any(|f| f == "native_tools"))
691 .unwrap_or(false);
692 if capability_matrix_native || legacy_provider_native {
693 "native".to_string()
694 } else {
695 "text".to_string()
696 }
697}
698
699fn with_effective_capability_tags(
700 model_id: String,
701 provider: String,
702 mut model: ModelDef,
703) -> ModelDef {
704 model.capabilities = effective_model_capability_tags(&provider, &model_id);
705 model
706}
707
708pub fn effective_model_capability_tags(provider: &str, model_id: &str) -> Vec<String> {
712 let caps = crate::llm::capabilities::lookup(provider, model_id);
713 let mut tags = Vec::new();
714 tags.push("streaming".to_string());
717 if caps.native_tools {
718 tags.push("tools".to_string());
719 }
720 if !caps.tool_search.is_empty() {
721 tags.push("tool_search".to_string());
722 }
723 if caps.vision || caps.vision_supported {
724 tags.push("vision".to_string());
725 }
726 if caps.audio {
727 tags.push("audio".to_string());
728 }
729 if caps.pdf {
730 tags.push("pdf".to_string());
731 }
732 if caps.files_api_supported {
733 tags.push("files".to_string());
734 }
735 if caps.prompt_caching {
736 tags.push("prompt_caching".to_string());
737 }
738 if !caps.thinking_modes.is_empty() {
739 tags.push("thinking".to_string());
740 }
741 if caps.interleaved_thinking_supported
742 || caps
743 .thinking_modes
744 .iter()
745 .any(|mode| mode == "adaptive" || mode == "effort")
746 {
747 tags.push("extended_thinking".to_string());
748 }
749 if caps.json_schema.is_some() {
750 tags.push("structured_output".to_string());
751 }
752 tags
753}
754
755pub fn resolve_tier_model(
757 target: &str,
758 preferred_provider: Option<&str>,
759) -> Option<(String, String)> {
760 let config = effective_config();
761
762 if let Some(alias) = config.aliases.get(target) {
763 return Some((alias.id.clone(), alias.provider.clone()));
764 }
765
766 let candidate_aliases = if let Some(provider) = preferred_provider {
767 vec![
768 format!("{provider}/{target}"),
769 format!("{provider}:{target}"),
770 format!("tier/{target}"),
771 target.to_string(),
772 ]
773 } else {
774 vec![format!("tier/{target}"), target.to_string()]
775 };
776
777 for alias_name in candidate_aliases {
778 if let Some(alias) = config.aliases.get(&alias_name) {
779 return Some((alias.id.clone(), alias.provider.clone()));
780 }
781 }
782
783 None
784}
785
786pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
790 let config = effective_config();
791 let mut seen = std::collections::BTreeSet::new();
792 let mut candidates = Vec::new();
793
794 for alias in config.aliases.values() {
795 let pair = (alias.id.clone(), alias.provider.clone());
796 if seen.contains(&pair) {
797 continue;
798 }
799 if model_tier(&alias.id) == target {
800 seen.insert(pair.clone());
801 candidates.push(pair);
802 }
803 }
804
805 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
806 provider_a
807 .cmp(provider_b)
808 .then_with(|| model_a.cmp(model_b))
809 });
810 candidates
811}
812
813pub fn all_model_candidates() -> Vec<(String, String)> {
816 let config = effective_config();
817 let mut seen = std::collections::BTreeSet::new();
818 let mut candidates = Vec::new();
819
820 for alias in config.aliases.values() {
821 let pair = (alias.id.clone(), alias.provider.clone());
822 if seen.insert(pair.clone()) {
823 candidates.push(pair);
824 }
825 }
826
827 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
828 provider_a
829 .cmp(provider_b)
830 .then_with(|| model_a.cmp(model_b))
831 });
832 candidates
833}
834
835fn glob_match(pattern: &str, input: &str) -> bool {
837 if let Some(prefix) = pattern.strip_suffix('*') {
838 input.starts_with(prefix)
839 } else if let Some(suffix) = pattern.strip_prefix('*') {
840 input.ends_with(suffix)
841 } else if pattern.contains('*') {
842 let parts: Vec<&str> = pattern.split('*').collect();
843 if parts.len() == 2 {
844 input.starts_with(parts[0]) && input.ends_with(parts[1])
845 } else {
846 input == pattern
847 }
848 } else {
849 input == pattern
850 }
851}
852
853fn dirs_or_home() -> Option<String> {
854 std::env::var("HOME").ok()
855}
856
857pub fn resolve_base_url(pdef: &ProviderDef) -> String {
860 if let Some(env_name) = &pdef.base_url_env {
861 if let Ok(val) = std::env::var(env_name) {
862 let trimmed = val.trim().trim_matches('"').trim_matches('\'');
864 if !trimmed.is_empty() {
865 return trimmed.to_string();
866 }
867 }
868 }
869 pdef.base_url.clone()
870}
871
872fn default_config() -> ProvidersConfig {
873 let mut config = ProvidersConfig {
874 default_provider: Some("anthropic".to_string()),
875 ..Default::default()
876 };
877
878 config.providers.insert(
879 "anthropic".to_string(),
880 ProviderDef {
881 base_url: "https://api.anthropic.com/v1".to_string(),
882 auth_style: "header".to_string(),
883 auth_header: Some("x-api-key".to_string()),
884 auth_env: AuthEnv::Single("ANTHROPIC_API_KEY".to_string()),
885 extra_headers: BTreeMap::from([(
886 "anthropic-version".to_string(),
887 "2023-06-01".to_string(),
888 )]),
889 chat_endpoint: "/messages".to_string(),
890 completion_endpoint: None,
891 healthcheck: Some(HealthcheckDef {
892 method: "POST".to_string(),
893 path: Some("/messages/count_tokens".to_string()),
894 url: None,
895 body: Some(
896 r#"{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":"x"}]}"#
897 .to_string(),
898 ),
899 }),
900 features: vec!["prompt_caching".to_string(), "thinking".to_string()],
901 cost_per_1k_in: Some(0.003),
902 cost_per_1k_out: Some(0.015),
903 latency_p50_ms: Some(2500),
904 ..Default::default()
905 },
906 );
907
908 config.providers.insert(
910 "openai".to_string(),
911 ProviderDef {
912 base_url: "https://api.openai.com/v1".to_string(),
913 auth_style: "bearer".to_string(),
914 auth_env: AuthEnv::Single("OPENAI_API_KEY".to_string()),
915 chat_endpoint: "/chat/completions".to_string(),
916 completion_endpoint: Some("/completions".to_string()),
917 healthcheck: Some(HealthcheckDef {
918 method: "GET".to_string(),
919 path: Some("/models".to_string()),
920 url: None,
921 body: None,
922 }),
923 cost_per_1k_in: Some(0.0025),
924 cost_per_1k_out: Some(0.010),
925 latency_p50_ms: Some(1800),
926 ..Default::default()
927 },
928 );
929
930 config.providers.insert(
932 "openrouter".to_string(),
933 ProviderDef {
934 base_url: "https://openrouter.ai/api/v1".to_string(),
935 auth_style: "bearer".to_string(),
936 auth_env: AuthEnv::Single("OPENROUTER_API_KEY".to_string()),
937 chat_endpoint: "/chat/completions".to_string(),
938 completion_endpoint: Some("/completions".to_string()),
939 healthcheck: Some(HealthcheckDef {
940 method: "GET".to_string(),
941 path: Some("/auth/key".to_string()),
942 url: None,
943 body: None,
944 }),
945 cost_per_1k_in: Some(0.003),
946 cost_per_1k_out: Some(0.015),
947 latency_p50_ms: Some(2200),
948 ..Default::default()
949 },
950 );
951
952 config.providers.insert(
954 "huggingface".to_string(),
955 ProviderDef {
956 base_url: "https://router.huggingface.co/v1".to_string(),
957 auth_style: "bearer".to_string(),
958 auth_env: AuthEnv::Multiple(vec![
959 "HF_TOKEN".to_string(),
960 "HUGGINGFACE_API_KEY".to_string(),
961 ]),
962 chat_endpoint: "/chat/completions".to_string(),
963 completion_endpoint: Some("/completions".to_string()),
964 healthcheck: Some(HealthcheckDef {
965 method: "GET".to_string(),
966 url: Some("https://huggingface.co/api/whoami-v2".to_string()),
967 path: None,
968 body: None,
969 }),
970 cost_per_1k_in: Some(0.0002),
971 cost_per_1k_out: Some(0.0006),
972 latency_p50_ms: Some(2400),
973 ..Default::default()
974 },
975 );
976
977 config.providers.insert(
986 "ollama".to_string(),
987 ProviderDef {
988 base_url: "http://localhost:11434".to_string(),
989 base_url_env: Some("OLLAMA_HOST".to_string()),
990 auth_style: "none".to_string(),
991 chat_endpoint: "/api/chat".to_string(),
992 completion_endpoint: Some("/api/generate".to_string()),
993 healthcheck: Some(HealthcheckDef {
994 method: "GET".to_string(),
995 path: Some("/api/tags".to_string()),
996 url: None,
997 body: None,
998 }),
999 cost_per_1k_in: Some(0.0),
1000 cost_per_1k_out: Some(0.0),
1001 latency_p50_ms: Some(1200),
1002 ..Default::default()
1003 },
1004 );
1005
1006 config.providers.insert(
1008 "gemini".to_string(),
1009 ProviderDef {
1010 base_url: "https://generativelanguage.googleapis.com".to_string(),
1011 base_url_env: Some("GEMINI_BASE_URL".to_string()),
1012 auth_style: "header".to_string(),
1013 auth_header: Some("x-goog-api-key".to_string()),
1014 auth_env: AuthEnv::Multiple(vec![
1015 "GEMINI_API_KEY".to_string(),
1016 "GOOGLE_API_KEY".to_string(),
1017 ]),
1018 chat_endpoint: "/v1beta/models".to_string(),
1019 healthcheck: Some(HealthcheckDef {
1020 method: "GET".to_string(),
1021 path: Some("/v1beta/models".to_string()),
1022 url: None,
1023 body: None,
1024 }),
1025 cost_per_1k_in: Some(0.00125),
1026 cost_per_1k_out: Some(0.005),
1027 latency_p50_ms: Some(1800),
1028 ..Default::default()
1029 },
1030 );
1031
1032 config.providers.insert(
1034 "together".to_string(),
1035 ProviderDef {
1036 base_url: "https://api.together.xyz/v1".to_string(),
1037 base_url_env: Some("TOGETHER_AI_BASE_URL".to_string()),
1038 auth_style: "bearer".to_string(),
1039 auth_env: AuthEnv::Single("TOGETHER_AI_API_KEY".to_string()),
1040 chat_endpoint: "/chat/completions".to_string(),
1041 completion_endpoint: Some("/completions".to_string()),
1042 healthcheck: Some(HealthcheckDef {
1043 method: "GET".to_string(),
1044 path: Some("/models".to_string()),
1045 url: None,
1046 body: None,
1047 }),
1048 cost_per_1k_in: Some(0.0002),
1049 cost_per_1k_out: Some(0.0006),
1050 latency_p50_ms: Some(1600),
1051 ..Default::default()
1052 },
1053 );
1054
1055 config.providers.insert(
1057 "groq".to_string(),
1058 ProviderDef {
1059 base_url: "https://api.groq.com/openai/v1".to_string(),
1060 base_url_env: Some("GROQ_BASE_URL".to_string()),
1061 auth_style: "bearer".to_string(),
1062 auth_env: AuthEnv::Single("GROQ_API_KEY".to_string()),
1063 chat_endpoint: "/chat/completions".to_string(),
1064 completion_endpoint: Some("/completions".to_string()),
1065 healthcheck: Some(HealthcheckDef {
1066 method: "GET".to_string(),
1067 path: Some("/models".to_string()),
1068 url: None,
1069 body: None,
1070 }),
1071 cost_per_1k_in: Some(0.0001),
1072 cost_per_1k_out: Some(0.0003),
1073 latency_p50_ms: Some(450),
1074 ..Default::default()
1075 },
1076 );
1077
1078 config.providers.insert(
1080 "deepseek".to_string(),
1081 ProviderDef {
1082 base_url: "https://api.deepseek.com/v1".to_string(),
1083 base_url_env: Some("DEEPSEEK_BASE_URL".to_string()),
1084 auth_style: "bearer".to_string(),
1085 auth_env: AuthEnv::Single("DEEPSEEK_API_KEY".to_string()),
1086 chat_endpoint: "/chat/completions".to_string(),
1087 completion_endpoint: Some("/completions".to_string()),
1088 healthcheck: Some(HealthcheckDef {
1089 method: "GET".to_string(),
1090 path: Some("/models".to_string()),
1091 url: None,
1092 body: None,
1093 }),
1094 cost_per_1k_in: Some(0.00014),
1095 cost_per_1k_out: Some(0.00028),
1096 latency_p50_ms: Some(1800),
1097 ..Default::default()
1098 },
1099 );
1100
1101 config.providers.insert(
1103 "fireworks".to_string(),
1104 ProviderDef {
1105 base_url: "https://api.fireworks.ai/inference/v1".to_string(),
1106 base_url_env: Some("FIREWORKS_BASE_URL".to_string()),
1107 auth_style: "bearer".to_string(),
1108 auth_env: AuthEnv::Single("FIREWORKS_API_KEY".to_string()),
1109 chat_endpoint: "/chat/completions".to_string(),
1110 completion_endpoint: Some("/completions".to_string()),
1111 healthcheck: Some(HealthcheckDef {
1112 method: "GET".to_string(),
1113 path: Some("/models".to_string()),
1114 url: None,
1115 body: None,
1116 }),
1117 cost_per_1k_in: Some(0.0002),
1118 cost_per_1k_out: Some(0.0006),
1119 latency_p50_ms: Some(1400),
1120 ..Default::default()
1121 },
1122 );
1123
1124 config.providers.insert(
1126 "dashscope".to_string(),
1127 ProviderDef {
1128 base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".to_string(),
1129 base_url_env: Some("DASHSCOPE_BASE_URL".to_string()),
1130 auth_style: "bearer".to_string(),
1131 auth_env: AuthEnv::Single("DASHSCOPE_API_KEY".to_string()),
1132 chat_endpoint: "/chat/completions".to_string(),
1133 completion_endpoint: Some("/completions".to_string()),
1134 healthcheck: Some(HealthcheckDef {
1135 method: "GET".to_string(),
1136 path: Some("/models".to_string()),
1137 url: None,
1138 body: None,
1139 }),
1140 cost_per_1k_in: Some(0.0003),
1141 cost_per_1k_out: Some(0.0012),
1142 latency_p50_ms: Some(1600),
1143 ..Default::default()
1144 },
1145 );
1146
1147 config.providers.insert(
1151 "bedrock".to_string(),
1152 ProviderDef {
1153 base_url: String::new(),
1154 base_url_env: Some("BEDROCK_BASE_URL".to_string()),
1155 auth_style: "aws_sigv4".to_string(),
1156 auth_env: AuthEnv::None,
1157 chat_endpoint: "/model/{model}/converse".to_string(),
1158 features: vec!["native_tools".to_string()],
1159 latency_p50_ms: Some(2600),
1160 ..Default::default()
1161 },
1162 );
1163
1164 config.providers.insert(
1168 "azure_openai".to_string(),
1169 ProviderDef {
1170 base_url: "https://{resource}.openai.azure.com".to_string(),
1171 base_url_env: Some("AZURE_OPENAI_ENDPOINT".to_string()),
1172 auth_style: "azure_openai".to_string(),
1173 auth_env: AuthEnv::Multiple(vec![
1174 "AZURE_OPENAI_API_KEY".to_string(),
1175 "AZURE_OPENAI_AD_TOKEN".to_string(),
1176 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1177 ]),
1178 chat_endpoint:
1179 "/openai/deployments/{deployment}/chat/completions?api-version={api_version}"
1180 .to_string(),
1181 features: vec!["native_tools".to_string()],
1182 cost_per_1k_in: Some(0.0025),
1183 cost_per_1k_out: Some(0.010),
1184 latency_p50_ms: Some(1900),
1185 ..Default::default()
1186 },
1187 );
1188
1189 config.providers.insert(
1191 "vertex".to_string(),
1192 ProviderDef {
1193 base_url: "https://aiplatform.googleapis.com/v1".to_string(),
1194 base_url_env: Some("VERTEX_AI_BASE_URL".to_string()),
1195 auth_style: "bearer".to_string(),
1196 auth_env: AuthEnv::Multiple(vec![
1197 "VERTEX_AI_ACCESS_TOKEN".to_string(),
1198 "GOOGLE_OAUTH_ACCESS_TOKEN".to_string(),
1199 "GOOGLE_APPLICATION_CREDENTIALS".to_string(),
1200 ]),
1201 chat_endpoint:
1202 "/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent"
1203 .to_string(),
1204 features: vec!["native_tools".to_string()],
1205 cost_per_1k_in: Some(0.00125),
1206 cost_per_1k_out: Some(0.005),
1207 latency_p50_ms: Some(2100),
1208 ..Default::default()
1209 },
1210 );
1211
1212 config.providers.insert(
1214 "local".to_string(),
1215 ProviderDef {
1216 base_url: "http://localhost:8000".to_string(),
1217 base_url_env: Some("LOCAL_LLM_BASE_URL".to_string()),
1218 auth_style: "none".to_string(),
1219 chat_endpoint: "/v1/chat/completions".to_string(),
1220 completion_endpoint: Some("/v1/completions".to_string()),
1221 healthcheck: Some(HealthcheckDef {
1222 method: "GET".to_string(),
1223 path: Some("/v1/models".to_string()),
1224 url: None,
1225 body: None,
1226 }),
1227 cost_per_1k_in: Some(0.0),
1228 cost_per_1k_out: Some(0.0),
1229 latency_p50_ms: Some(900),
1230 ..Default::default()
1231 },
1232 );
1233
1234 config.providers.insert(
1238 "mlx".to_string(),
1239 ProviderDef {
1240 base_url: "http://127.0.0.1:8002".to_string(),
1241 base_url_env: Some("MLX_BASE_URL".to_string()),
1242 auth_style: "none".to_string(),
1243 chat_endpoint: "/v1/chat/completions".to_string(),
1244 completion_endpoint: Some("/v1/completions".to_string()),
1245 healthcheck: Some(HealthcheckDef {
1246 method: "GET".to_string(),
1247 path: Some("/v1/models".to_string()),
1248 url: None,
1249 body: None,
1250 }),
1251 cost_per_1k_in: Some(0.0),
1252 cost_per_1k_out: Some(0.0),
1253 latency_p50_ms: Some(900),
1254 ..Default::default()
1255 },
1256 );
1257
1258 config.providers.insert(
1260 "vllm".to_string(),
1261 ProviderDef {
1262 base_url: "http://localhost:8000".to_string(),
1263 base_url_env: Some("VLLM_BASE_URL".to_string()),
1264 auth_style: "none".to_string(),
1265 chat_endpoint: "/v1/chat/completions".to_string(),
1266 completion_endpoint: Some("/v1/completions".to_string()),
1267 healthcheck: Some(HealthcheckDef {
1268 method: "GET".to_string(),
1269 path: Some("/v1/models".to_string()),
1270 url: None,
1271 body: None,
1272 }),
1273 cost_per_1k_in: Some(0.0),
1274 cost_per_1k_out: Some(0.0),
1275 latency_p50_ms: Some(800),
1276 ..Default::default()
1277 },
1278 );
1279
1280 config.providers.insert(
1282 "tgi".to_string(),
1283 ProviderDef {
1284 base_url: "http://localhost:8080".to_string(),
1285 base_url_env: Some("TGI_BASE_URL".to_string()),
1286 auth_style: "none".to_string(),
1287 chat_endpoint: "/v1/chat/completions".to_string(),
1288 completion_endpoint: Some("/v1/completions".to_string()),
1289 healthcheck: Some(HealthcheckDef {
1290 method: "GET".to_string(),
1291 path: Some("/health".to_string()),
1292 url: None,
1293 body: None,
1294 }),
1295 cost_per_1k_in: Some(0.0),
1296 cost_per_1k_out: Some(0.0),
1297 latency_p50_ms: Some(950),
1298 ..Default::default()
1299 },
1300 );
1301
1302 config.inference_rules = vec![
1304 InferenceRule {
1305 pattern: Some("claude-*".to_string()),
1306 contains: None,
1307 exact: None,
1308 provider: "anthropic".to_string(),
1309 },
1310 InferenceRule {
1311 pattern: Some("gpt-*".to_string()),
1312 contains: None,
1313 exact: None,
1314 provider: "openai".to_string(),
1315 },
1316 InferenceRule {
1317 pattern: Some("o1*".to_string()),
1318 contains: None,
1319 exact: None,
1320 provider: "openai".to_string(),
1321 },
1322 InferenceRule {
1323 pattern: Some("o3*".to_string()),
1324 contains: None,
1325 exact: None,
1326 provider: "openai".to_string(),
1327 },
1328 InferenceRule {
1329 pattern: Some("o4*".to_string()),
1330 contains: None,
1331 exact: None,
1332 provider: "openai".to_string(),
1333 },
1334 InferenceRule {
1335 pattern: Some("anthropic.claude-*".to_string()),
1336 contains: None,
1337 exact: None,
1338 provider: "bedrock".to_string(),
1339 },
1340 InferenceRule {
1341 pattern: Some("meta.llama*".to_string()),
1342 contains: None,
1343 exact: None,
1344 provider: "bedrock".to_string(),
1345 },
1346 InferenceRule {
1347 pattern: Some("amazon.*".to_string()),
1348 contains: None,
1349 exact: None,
1350 provider: "bedrock".to_string(),
1351 },
1352 InferenceRule {
1353 pattern: Some("mistral.*".to_string()),
1354 contains: None,
1355 exact: None,
1356 provider: "bedrock".to_string(),
1357 },
1358 InferenceRule {
1359 pattern: Some("cohere.*".to_string()),
1360 contains: None,
1361 exact: None,
1362 provider: "bedrock".to_string(),
1363 },
1364 InferenceRule {
1365 pattern: Some("gemini-*".to_string()),
1366 contains: None,
1367 exact: None,
1368 provider: "gemini".to_string(),
1369 },
1370 ];
1371
1372 config.tier_rules = vec![
1374 TierRule {
1375 contains: Some("9b".to_string()),
1376 pattern: None,
1377 exact: None,
1378 tier: "small".to_string(),
1379 },
1380 TierRule {
1381 contains: Some("a3b".to_string()),
1382 pattern: None,
1383 exact: None,
1384 tier: "small".to_string(),
1385 },
1386 TierRule {
1387 contains: Some("gemma-4-e2b".to_string()),
1388 pattern: None,
1389 exact: None,
1390 tier: "small".to_string(),
1391 },
1392 TierRule {
1393 contains: Some("gemma-4-e4b".to_string()),
1394 pattern: None,
1395 exact: None,
1396 tier: "small".to_string(),
1397 },
1398 TierRule {
1399 contains: Some("gemma-4-26b".to_string()),
1400 pattern: None,
1401 exact: None,
1402 tier: "mid".to_string(),
1403 },
1404 TierRule {
1405 contains: Some("gemma-4-31b".to_string()),
1406 pattern: None,
1407 exact: None,
1408 tier: "frontier".to_string(),
1409 },
1410 TierRule {
1411 contains: Some("gemma4:26b".to_string()),
1412 pattern: None,
1413 exact: None,
1414 tier: "mid".to_string(),
1415 },
1416 TierRule {
1417 contains: Some("gemma4:31b".to_string()),
1418 pattern: None,
1419 exact: None,
1420 tier: "frontier".to_string(),
1421 },
1422 TierRule {
1423 pattern: Some("claude-*".to_string()),
1424 contains: None,
1425 exact: None,
1426 tier: "frontier".to_string(),
1427 },
1428 TierRule {
1429 exact: Some("gpt-4o".to_string()),
1430 contains: None,
1431 pattern: None,
1432 tier: "frontier".to_string(),
1433 },
1434 ];
1435
1436 config.tier_defaults = TierDefaults {
1437 default: "mid".to_string(),
1438 };
1439
1440 config.aliases.insert(
1441 "frontier".to_string(),
1442 AliasDef {
1443 id: "claude-sonnet-4-20250514".to_string(),
1444 provider: "anthropic".to_string(),
1445 tool_format: None,
1446 },
1447 );
1448 config.aliases.insert(
1449 "tier/frontier".to_string(),
1450 AliasDef {
1451 id: "claude-sonnet-4-20250514".to_string(),
1452 provider: "anthropic".to_string(),
1453 tool_format: None,
1454 },
1455 );
1456 config.aliases.insert(
1457 "mid".to_string(),
1458 AliasDef {
1459 id: "gpt-4o-mini".to_string(),
1460 provider: "openai".to_string(),
1461 tool_format: None,
1462 },
1463 );
1464 config.aliases.insert(
1465 "tier/mid".to_string(),
1466 AliasDef {
1467 id: "gpt-4o-mini".to_string(),
1468 provider: "openai".to_string(),
1469 tool_format: None,
1470 },
1471 );
1472 config.aliases.insert(
1473 "small".to_string(),
1474 AliasDef {
1475 id: "Qwen/Qwen3.5-9B".to_string(),
1476 provider: "openrouter".to_string(),
1477 tool_format: None,
1478 },
1479 );
1480 config.aliases.insert(
1481 "tier/small".to_string(),
1482 AliasDef {
1483 id: "Qwen/Qwen3.5-9B".to_string(),
1484 provider: "openrouter".to_string(),
1485 tool_format: None,
1486 },
1487 );
1488 config.aliases.insert(
1489 "local-gemma4".to_string(),
1490 AliasDef {
1491 id: "gemma-4-26b-a4b-it".to_string(),
1492 provider: "local".to_string(),
1493 tool_format: None,
1494 },
1495 );
1496 config.aliases.insert(
1497 "local-gemma4-26b".to_string(),
1498 AliasDef {
1499 id: "gemma-4-26b-a4b-it".to_string(),
1500 provider: "local".to_string(),
1501 tool_format: None,
1502 },
1503 );
1504 config.aliases.insert(
1505 "local-gemma4-31b".to_string(),
1506 AliasDef {
1507 id: "gemma-4-31b-it".to_string(),
1508 provider: "local".to_string(),
1509 tool_format: None,
1510 },
1511 );
1512 config.aliases.insert(
1513 "local-gemma4-e4b".to_string(),
1514 AliasDef {
1515 id: "gemma-4-e4b-it".to_string(),
1516 provider: "local".to_string(),
1517 tool_format: None,
1518 },
1519 );
1520 config.aliases.insert(
1521 "local-gemma4-e2b".to_string(),
1522 AliasDef {
1523 id: "gemma-4-e2b-it".to_string(),
1524 provider: "local".to_string(),
1525 tool_format: None,
1526 },
1527 );
1528 config.aliases.insert(
1529 "mlx-qwen36-27b".to_string(),
1530 AliasDef {
1531 id: "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string(),
1532 provider: "mlx".to_string(),
1533 tool_format: None,
1534 },
1535 );
1536
1537 config.qc_defaults.extend(BTreeMap::from([
1538 (
1539 "anthropic".to_string(),
1540 "claude-3-5-haiku-20241022".to_string(),
1541 ),
1542 ("openai".to_string(), "gpt-4o-mini".to_string()),
1543 (
1544 "openrouter".to_string(),
1545 "google/gemini-2.5-flash".to_string(),
1546 ),
1547 ("ollama".to_string(), "llama3.2".to_string()),
1548 ("local".to_string(), "gpt-4o".to_string()),
1549 ]));
1550
1551 config.models.extend(BTreeMap::from([
1552 (
1553 "claude-sonnet-4-20250514".to_string(),
1554 ModelDef {
1555 name: "Claude Sonnet 4".to_string(),
1556 provider: "anthropic".to_string(),
1557 context_window: 200_000,
1558 stream_timeout: None,
1559 capabilities: vec![
1560 "tools".to_string(),
1561 "streaming".to_string(),
1562 "prompt_caching".to_string(),
1563 "thinking".to_string(),
1564 ],
1565 pricing: Some(ModelPricing {
1566 input_per_mtok: 3.0,
1567 output_per_mtok: 15.0,
1568 cache_read_per_mtok: Some(0.3),
1569 cache_write_per_mtok: Some(3.75),
1570 }),
1571 },
1572 ),
1573 (
1574 "gpt-4o-mini".to_string(),
1575 ModelDef {
1576 name: "GPT-4o Mini".to_string(),
1577 provider: "openai".to_string(),
1578 context_window: 128_000,
1579 stream_timeout: None,
1580 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1581 pricing: Some(ModelPricing {
1582 input_per_mtok: 0.15,
1583 output_per_mtok: 0.60,
1584 cache_read_per_mtok: None,
1585 cache_write_per_mtok: None,
1586 }),
1587 },
1588 ),
1589 (
1590 "Qwen/Qwen3.5-9B".to_string(),
1591 ModelDef {
1592 name: "Qwen3.5 9B".to_string(),
1593 provider: "openrouter".to_string(),
1594 context_window: 131_072,
1595 stream_timeout: None,
1596 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1597 pricing: None,
1598 },
1599 ),
1600 (
1601 "llama3.2".to_string(),
1602 ModelDef {
1603 name: "Llama 3.2".to_string(),
1604 provider: "ollama".to_string(),
1605 context_window: 32_000,
1606 stream_timeout: Some(300.0),
1607 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1608 pricing: None,
1609 },
1610 ),
1611 ]));
1612
1613 config
1614}
1615
1616#[cfg(test)]
1617mod tests {
1618 use super::*;
1619
1620 fn reset_overrides() {
1621 clear_user_overrides();
1622 }
1623
1624 #[test]
1625 fn test_glob_match_prefix() {
1626 assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
1627 assert!(glob_match("gpt-*", "gpt-4o"));
1628 assert!(!glob_match("claude-*", "gpt-4o"));
1629 }
1630
1631 #[test]
1632 fn test_glob_match_suffix() {
1633 assert!(glob_match("*-latest", "llama3.2-latest"));
1634 assert!(!glob_match("*-latest", "llama3.2"));
1635 }
1636
1637 #[test]
1638 fn test_glob_match_middle() {
1639 assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
1640 assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
1641 }
1642
1643 #[test]
1644 fn test_glob_match_exact() {
1645 assert!(glob_match("gpt-4o", "gpt-4o"));
1646 assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
1647 }
1648
1649 #[test]
1650 fn test_infer_provider_from_defaults() {
1651 let _guard = crate::llm::env_lock().lock().expect("env lock");
1652 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1653 unsafe {
1654 std::env::remove_var("HARN_DEFAULT_PROVIDER");
1655 }
1656
1657 assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
1658 assert_eq!(infer_provider("gpt-4o"), "openai");
1659 assert_eq!(infer_provider("o1-preview"), "openai");
1660 assert_eq!(infer_provider("o3-mini"), "openai");
1661 assert_eq!(infer_provider("o4-mini"), "openai");
1662 assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
1663 assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
1664 assert_eq!(infer_provider("llama3.2:latest"), "ollama");
1665 assert_eq!(infer_provider("unknown-model"), "anthropic");
1666
1667 unsafe {
1668 match prev_default_provider {
1669 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1670 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1671 }
1672 }
1673 }
1674
1675 #[test]
1676 fn test_infer_provider_prefix_rules() {
1677 assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
1678 assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
1679 assert_eq!(infer_provider("local:owner/model"), "ollama");
1681 assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
1682 }
1683
1684 #[test]
1685 fn test_openrouter_inference_requires_one_slash() {
1686 let _guard = crate::llm::env_lock().lock().expect("env lock");
1687 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1688 unsafe {
1689 std::env::remove_var("HARN_DEFAULT_PROVIDER");
1690 }
1691
1692 assert_eq!(infer_provider("org/model"), "openrouter");
1693 assert_eq!(infer_provider("org/team/model"), "anthropic");
1694
1695 unsafe {
1696 match prev_default_provider {
1697 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1698 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1699 }
1700 }
1701 }
1702
1703 #[test]
1704 fn test_resolve_model_info_normalizes_provider_prefixes() {
1705 let local = resolve_model_info("local:gemma-4-e4b-it");
1706 assert_eq!(local.id, "gemma-4-e4b-it");
1707 assert_eq!(local.provider, "ollama");
1708
1709 let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
1710 assert_eq!(ollama.id, "qwen3:30b-a3b");
1711 assert_eq!(ollama.provider, "ollama");
1712
1713 let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
1714 assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
1715 assert_eq!(hf.provider, "huggingface");
1716 }
1717
1718 #[test]
1719 fn test_model_tier_from_defaults() {
1720 assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
1721 assert_eq!(model_tier("gpt-4o"), "frontier");
1722 assert_eq!(model_tier("Qwen3.5-9B"), "small");
1723 assert_eq!(model_tier("deepseek-v3"), "mid");
1724 }
1725
1726 #[test]
1727 fn test_resolve_model_unknown_alias() {
1728 let (id, provider) = resolve_model("gpt-4o");
1729 assert_eq!(id, "gpt-4o");
1730 assert!(provider.is_none());
1731 }
1732
1733 #[test]
1734 fn test_provider_names() {
1735 let names = provider_names();
1736 assert!(names.len() >= 7);
1737 assert!(names.contains(&"anthropic".to_string()));
1738 assert!(names.contains(&"together".to_string()));
1739 assert!(names.contains(&"local".to_string()));
1740 assert!(names.contains(&"mlx".to_string()));
1741 assert!(names.contains(&"openai".to_string()));
1742 assert!(names.contains(&"ollama".to_string()));
1743 assert!(names.contains(&"bedrock".to_string()));
1744 assert!(names.contains(&"azure_openai".to_string()));
1745 assert!(names.contains(&"vertex".to_string()));
1746 }
1747
1748 #[test]
1749 fn test_resolve_tier_model_default_aliases() {
1750 let (model, provider) = resolve_tier_model("frontier", None).unwrap();
1751 assert_eq!(model, "claude-sonnet-4-20250514");
1752 assert_eq!(provider, "anthropic");
1753
1754 let (model, provider) = resolve_tier_model("small", None).unwrap();
1755 assert_eq!(model, "Qwen/Qwen3.5-9B");
1756 assert_eq!(provider, "openrouter");
1757 }
1758
1759 #[test]
1760 fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
1761 let (model, provider) = resolve_tier_model("mid", Some("openai")).unwrap();
1762 assert_eq!(model, "gpt-4o-mini");
1763 assert_eq!(provider, "openai");
1764 }
1765
1766 #[test]
1767 fn test_provider_config_anthropic() {
1768 let pdef = provider_config("anthropic").unwrap();
1769 assert_eq!(pdef.auth_style, "header");
1770 assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
1771 }
1772
1773 #[test]
1774 fn test_provider_config_mlx() {
1775 let pdef = provider_config("mlx").unwrap();
1776 assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
1777 assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
1778 assert_eq!(
1779 pdef.healthcheck.unwrap().path.as_deref(),
1780 Some("/v1/models")
1781 );
1782
1783 let (model, provider) = resolve_model("mlx-qwen36-27b");
1784 assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
1785 assert_eq!(provider.as_deref(), Some("mlx"));
1786 }
1787
1788 #[test]
1789 fn test_enterprise_provider_defaults_and_inference() {
1790 let bedrock = provider_config("bedrock").unwrap();
1791 assert_eq!(bedrock.auth_style, "aws_sigv4");
1792 assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
1793 assert_eq!(
1794 infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
1795 "bedrock"
1796 );
1797 assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
1798
1799 let azure = provider_config("azure_openai").unwrap();
1800 assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
1801 assert_eq!(
1802 auth_env_names(&azure.auth_env),
1803 vec![
1804 "AZURE_OPENAI_API_KEY".to_string(),
1805 "AZURE_OPENAI_AD_TOKEN".to_string(),
1806 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1807 ]
1808 );
1809
1810 let vertex = provider_config("vertex").unwrap();
1811 assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
1812 assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
1813 }
1814
1815 #[test]
1816 fn test_default_provider_env_override_for_unknown_model() {
1817 let _guard = crate::llm::env_lock().lock().expect("env lock");
1818 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1819 unsafe {
1820 std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
1821 }
1822
1823 let inference = infer_provider_detail("unknown-model");
1824
1825 unsafe {
1826 match prev_default_provider {
1827 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1828 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1829 }
1830 }
1831
1832 assert_eq!(inference.provider, "openai");
1833 assert_eq!(
1834 inference.source,
1835 crate::llm::provider::ProviderInferenceSource::DefaultFallback
1836 );
1837 }
1838
1839 #[test]
1840 fn test_resolve_base_url_no_env() {
1841 let pdef = ProviderDef {
1842 base_url: "https://example.com".to_string(),
1843 ..Default::default()
1844 };
1845 assert_eq!(resolve_base_url(&pdef), "https://example.com");
1846 }
1847
1848 #[test]
1849 fn test_default_config_roundtrip() {
1850 let config = default_config();
1851 assert!(!config.providers.is_empty());
1852 assert!(!config.inference_rules.is_empty());
1853 assert!(!config.tier_rules.is_empty());
1854 assert_eq!(config.tier_defaults.default, "mid");
1855 }
1856
1857 #[test]
1858 fn test_model_params_empty() {
1859 let params = model_params("claude-sonnet-4-20250514");
1860 assert!(params.is_empty());
1861 }
1862
1863 #[test]
1864 fn test_user_overrides_add_provider_and_alias() {
1865 reset_overrides();
1866 let mut overlay = ProvidersConfig::default();
1867 overlay.providers.insert(
1868 "acme".to_string(),
1869 ProviderDef {
1870 base_url: "https://llm.acme.test/v1".to_string(),
1871 chat_endpoint: "/chat/completions".to_string(),
1872 ..Default::default()
1873 },
1874 );
1875 overlay.aliases.insert(
1876 "acme-fast".to_string(),
1877 AliasDef {
1878 id: "acme/model-fast".to_string(),
1879 provider: "acme".to_string(),
1880 tool_format: Some("native".to_string()),
1881 },
1882 );
1883 set_user_overrides(Some(overlay));
1884
1885 let (model, provider) = resolve_model("acme-fast");
1886 assert_eq!(model, "acme/model-fast");
1887 assert_eq!(provider.as_deref(), Some("acme"));
1888 assert!(provider_names().contains(&"acme".to_string()));
1889 assert_eq!(
1890 provider_config("acme").map(|provider| provider.base_url),
1891 Some("https://llm.acme.test/v1".to_string())
1892 );
1893
1894 reset_overrides();
1895 }
1896
1897 #[test]
1898 fn test_default_tool_format_uses_capability_matrix() {
1899 reset_overrides();
1900
1901 assert_eq!(
1902 default_tool_format("qwen3.6-35b-a3b-ud-q4-k-xl", "llamacpp"),
1903 "native"
1904 );
1905 assert_eq!(default_tool_format("gemma-4-26b-a4b-it", "local"), "text");
1906 }
1907
1908 #[test]
1909 fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
1910 reset_overrides();
1911 let mut overlay = ProvidersConfig::default();
1912 overlay.models.insert(
1913 "acme/model-fast".to_string(),
1914 ModelDef {
1915 name: "Acme Fast".to_string(),
1916 provider: "acme".to_string(),
1917 context_window: 65_536,
1918 stream_timeout: Some(42.0),
1919 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1920 pricing: Some(ModelPricing {
1921 input_per_mtok: 1.25,
1922 output_per_mtok: 2.5,
1923 cache_read_per_mtok: Some(0.25),
1924 cache_write_per_mtok: None,
1925 }),
1926 },
1927 );
1928 overlay
1929 .qc_defaults
1930 .insert("acme".to_string(), "acme/model-cheap".to_string());
1931 set_user_overrides(Some(overlay));
1932
1933 let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
1934 assert_eq!(entry.context_window, 65_536);
1935 assert_eq!(entry.capabilities, vec!["streaming".to_string()]);
1936 assert_eq!(
1937 entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
1938 Some(1.25)
1939 );
1940 assert_eq!(
1941 pricing_per_1k_for("acme", "acme/model-fast"),
1942 Some((0.00125, 0.0025))
1943 );
1944 assert_eq!(
1945 qc_default_model("acme").as_deref(),
1946 Some("acme/model-cheap")
1947 );
1948
1949 reset_overrides();
1950 }
1951
1952 #[test]
1953 fn test_user_overrides_prepend_inference_rules() {
1954 reset_overrides();
1955 let mut overlay = ProvidersConfig::default();
1956 overlay.inference_rules.push(InferenceRule {
1957 pattern: Some("internal-*".to_string()),
1958 contains: None,
1959 exact: None,
1960 provider: "openai".to_string(),
1961 });
1962 set_user_overrides(Some(overlay));
1963
1964 assert_eq!(infer_provider("internal-foo"), "openai");
1965
1966 reset_overrides();
1967 }
1968}