1use serde::{Deserialize, Serialize};
2use std::cell::RefCell;
3use std::collections::BTreeMap;
4use std::sync::OnceLock;
5
6static CONFIG: OnceLock<ProvidersConfig> = OnceLock::new();
7static CONFIG_PATH: OnceLock<String> = OnceLock::new();
8
9thread_local! {
10 static USER_OVERRIDES: RefCell<Option<ProvidersConfig>> = const { RefCell::new(None) };
15}
16
17#[derive(Debug, Clone, Deserialize, Default)]
18pub struct ProvidersConfig {
19 #[serde(default)]
20 pub default_provider: Option<String>,
21 #[serde(default)]
22 pub providers: BTreeMap<String, ProviderDef>,
23 #[serde(default)]
24 pub aliases: BTreeMap<String, AliasDef>,
25 #[serde(default)]
26 pub models: BTreeMap<String, ModelDef>,
27 #[serde(default)]
28 pub qc_defaults: BTreeMap<String, String>,
29 #[serde(default)]
30 pub inference_rules: Vec<InferenceRule>,
31 #[serde(default)]
32 pub tier_rules: Vec<TierRule>,
33 #[serde(default)]
34 pub tier_defaults: TierDefaults,
35 #[serde(default)]
36 pub model_defaults: BTreeMap<String, BTreeMap<String, toml::Value>>,
37}
38
39impl ProvidersConfig {
40 pub fn is_empty(&self) -> bool {
41 self.default_provider.is_none()
42 && self.providers.is_empty()
43 && self.aliases.is_empty()
44 && self.models.is_empty()
45 && self.qc_defaults.is_empty()
46 && self.inference_rules.is_empty()
47 && self.tier_rules.is_empty()
48 && self.model_defaults.is_empty()
49 && self.tier_defaults.default == default_mid()
50 }
51
52 pub fn merge_from(&mut self, overlay: &ProvidersConfig) {
53 self.providers.extend(overlay.providers.clone());
54 self.aliases.extend(overlay.aliases.clone());
55 self.models.extend(overlay.models.clone());
56 self.qc_defaults.extend(overlay.qc_defaults.clone());
57
58 if overlay.default_provider.is_some() {
59 self.default_provider = overlay.default_provider.clone();
60 }
61
62 if !overlay.inference_rules.is_empty() {
63 let mut merged = overlay.inference_rules.clone();
64 merged.extend(self.inference_rules.clone());
65 self.inference_rules = merged;
66 }
67
68 if !overlay.tier_rules.is_empty() {
69 let mut merged = overlay.tier_rules.clone();
70 merged.extend(self.tier_rules.clone());
71 self.tier_rules = merged;
72 }
73
74 if overlay.tier_defaults.default != default_mid() {
75 self.tier_defaults = overlay.tier_defaults.clone();
76 }
77
78 for (pattern, defaults) in &overlay.model_defaults {
79 self.model_defaults
80 .entry(pattern.clone())
81 .or_default()
82 .extend(defaults.clone());
83 }
84 }
85}
86
87#[derive(Debug, Clone, Deserialize)]
88pub struct ProviderDef {
89 #[serde(default)]
90 pub display_name: Option<String>,
91 #[serde(default)]
92 pub icon: Option<String>,
93 pub base_url: String,
94 #[serde(default)]
95 pub base_url_env: Option<String>,
96 #[serde(default = "default_bearer")]
97 pub auth_style: String,
98 #[serde(default)]
99 pub auth_header: Option<String>,
100 #[serde(default)]
101 pub auth_env: AuthEnv,
102 #[serde(default)]
103 pub extra_headers: BTreeMap<String, String>,
104 #[serde(default)]
105 pub chat_endpoint: String,
106 #[serde(default)]
107 pub completion_endpoint: Option<String>,
108 #[serde(default)]
109 pub healthcheck: Option<HealthcheckDef>,
110 #[serde(default)]
111 pub features: Vec<String>,
112 #[serde(default)]
114 pub fallback: Option<String>,
115 #[serde(default)]
117 pub retry_count: Option<u32>,
118 #[serde(default)]
120 pub retry_delay_ms: Option<u64>,
121 #[serde(default)]
123 pub rpm: Option<u32>,
124 #[serde(default)]
126 pub cost_per_1k_in: Option<f64>,
127 #[serde(default)]
129 pub cost_per_1k_out: Option<f64>,
130 #[serde(default)]
132 pub latency_p50_ms: Option<u64>,
133}
134
135impl Default for ProviderDef {
136 fn default() -> Self {
137 Self {
138 display_name: None,
139 icon: None,
140 base_url: String::new(),
141 base_url_env: None,
142 auth_style: default_bearer(),
143 auth_header: None,
144 auth_env: AuthEnv::None,
145 extra_headers: BTreeMap::new(),
146 chat_endpoint: String::new(),
147 completion_endpoint: None,
148 healthcheck: None,
149 features: Vec::new(),
150 fallback: None,
151 retry_count: None,
152 retry_delay_ms: None,
153 rpm: None,
154 cost_per_1k_in: None,
155 cost_per_1k_out: None,
156 latency_p50_ms: None,
157 }
158 }
159}
160
161fn default_bearer() -> String {
162 "bearer".to_string()
163}
164
165#[derive(Debug, Clone, Deserialize, Default)]
168#[serde(untagged)]
169pub enum AuthEnv {
170 #[default]
171 None,
172 Single(String),
173 Multiple(Vec<String>),
174}
175
176#[derive(Debug, Clone, Deserialize)]
177pub struct HealthcheckDef {
178 pub method: String,
179 #[serde(default)]
180 pub path: Option<String>,
181 #[serde(default)]
182 pub url: Option<String>,
183 #[serde(default)]
184 pub body: Option<String>,
185}
186
187#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
188pub struct AliasDef {
189 pub id: String,
190 pub provider: String,
191 #[serde(default)]
196 pub tool_format: Option<String>,
197}
198
199#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
200pub struct ModelPricing {
201 pub input_per_mtok: f64,
202 pub output_per_mtok: f64,
203 #[serde(default)]
204 pub cache_read_per_mtok: Option<f64>,
205 #[serde(default)]
206 pub cache_write_per_mtok: Option<f64>,
207}
208
209#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
210pub struct ModelDef {
211 pub name: String,
212 pub provider: String,
213 pub context_window: u64,
214 #[serde(default)]
215 pub stream_timeout: Option<f64>,
216 #[serde(default)]
217 pub capabilities: Vec<String>,
218 #[serde(default)]
219 pub pricing: Option<ModelPricing>,
220}
221
222#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
223pub struct ResolvedModel {
224 pub id: String,
225 pub provider: String,
226 pub alias: Option<String>,
227 pub tool_format: String,
228 pub tier: String,
229}
230
231#[derive(Debug, Clone, Deserialize)]
232pub struct InferenceRule {
233 #[serde(default)]
234 pub pattern: Option<String>,
235 #[serde(default)]
236 pub contains: Option<String>,
237 #[serde(default)]
238 pub exact: Option<String>,
239 pub provider: String,
240}
241
242#[derive(Debug, Clone, Deserialize)]
243pub struct TierRule {
244 #[serde(default)]
245 pub pattern: Option<String>,
246 #[serde(default)]
247 pub contains: Option<String>,
248 #[serde(default)]
249 pub exact: Option<String>,
250 pub tier: String,
251}
252
253#[derive(Debug, Clone, Deserialize)]
254pub struct TierDefaults {
255 #[serde(default = "default_mid")]
256 pub default: String,
257}
258
259impl Default for TierDefaults {
260 fn default() -> Self {
261 Self {
262 default: default_mid(),
263 }
264 }
265}
266
267fn default_mid() -> String {
268 "mid".to_string()
269}
270
271pub fn load_config() -> &'static ProvidersConfig {
273 CONFIG.get_or_init(|| {
274 let verbose_config_logging = matches!(
275 std::env::var("HARN_VERBOSE_CONFIG").ok().as_deref(),
276 Some("1" | "true" | "TRUE" | "yes" | "YES")
277 ) || matches!(
278 std::env::var("HARN_ACP_VERBOSE").ok().as_deref(),
279 Some("1" | "true" | "TRUE" | "yes" | "YES")
280 );
281 if let Ok(path) = std::env::var("HARN_PROVIDERS_CONFIG") {
282 match std::fs::read_to_string(&path) {
283 Ok(content) => match toml::from_str::<ProvidersConfig>(&content) {
284 Ok(config) => {
285 if verbose_config_logging {
286 eprintln!(
287 "[llm_config] Loaded {} providers, {} aliases from {}",
288 config.providers.len(),
289 config.aliases.len(),
290 path
291 );
292 }
293 let _ = CONFIG_PATH.set(path);
294 return config;
295 }
296 Err(e) => eprintln!("[llm_config] TOML parse error in {}: {}", path, e),
297 },
298 Err(e) => eprintln!("[llm_config] Cannot read {}: {}", path, e),
299 }
300 }
301 if let Some(home) = dirs_or_home() {
302 let path = format!("{home}/.config/harn/providers.toml");
303 if let Ok(content) = std::fs::read_to_string(&path) {
304 if let Ok(config) = toml::from_str::<ProvidersConfig>(&content) {
305 let _ = CONFIG_PATH.set(path);
306 return config;
307 }
308 }
309 }
310 default_config()
311 })
312}
313
314pub fn loaded_config_path() -> Option<std::path::PathBuf> {
317 let _ = load_config();
319 CONFIG_PATH.get().map(std::path::PathBuf::from)
320}
321
322pub fn set_user_overrides(config: Option<ProvidersConfig>) {
326 USER_OVERRIDES.with(|cell| *cell.borrow_mut() = config);
327}
328
329pub fn clear_user_overrides() {
331 set_user_overrides(None);
332}
333
334fn effective_config() -> ProvidersConfig {
335 let mut merged = load_config().clone();
336 USER_OVERRIDES.with(|cell| {
337 if let Some(overlay) = cell.borrow().as_ref() {
338 merged.merge_from(overlay);
339 }
340 });
341 merged
342}
343
344pub fn resolve_model(alias: &str) -> (String, Option<String>) {
346 let config = effective_config();
347 if let Some(a) = config.aliases.get(alias) {
348 return (a.id.clone(), Some(a.provider.clone()));
349 }
350 (normalize_model_id(alias), None)
351}
352
353pub fn normalize_model_id(raw: &str) -> String {
358 for prefix in ["ollama:", "local:", "huggingface:", "hf:"] {
359 if let Some(stripped) = raw.strip_prefix(prefix) {
360 return stripped.to_string();
361 }
362 }
363 raw.to_string()
364}
365
366pub fn resolve_model_info(selector: &str) -> ResolvedModel {
369 let config = effective_config();
370 if let Some(alias) = config.aliases.get(selector) {
371 let id = alias.id.clone();
372 let provider = alias.provider.clone();
373 let tool_format = alias
374 .tool_format
375 .clone()
376 .unwrap_or_else(|| default_tool_format_with_config(&config, &id, &provider));
377 return ResolvedModel {
378 tier: model_tier_with_config(&config, &id),
379 id,
380 provider,
381 alias: Some(selector.to_string()),
382 tool_format,
383 };
384 }
385
386 let provider = infer_provider_with_config(&config, selector).provider;
387 let id = normalize_model_id(selector);
388 let tool_format = default_tool_format_with_config(&config, &id, &provider);
389 let tier = model_tier_with_config(&config, &id);
390 ResolvedModel {
391 id,
392 provider,
393 alias: None,
394 tool_format,
395 tier,
396 }
397}
398
399pub fn infer_provider(model_id: &str) -> String {
401 infer_provider_detail(model_id).provider
402}
403
404pub(crate) fn infer_provider_detail(model_id: &str) -> crate::llm::provider::ProviderInference {
406 let config = effective_config();
407 infer_provider_with_config(&config, model_id)
408}
409
410fn infer_provider_with_config(
411 config: &ProvidersConfig,
412 model_id: &str,
413) -> crate::llm::provider::ProviderInference {
414 if model_id.starts_with("local:") || model_id.starts_with("ollama:") {
415 return crate::llm::provider::ProviderInference::builtin("ollama");
416 }
417 if model_id.starts_with("huggingface:") || model_id.starts_with("hf:") {
418 return crate::llm::provider::ProviderInference::builtin("huggingface");
419 }
420 for rule in &config.inference_rules {
421 if let Some(exact) = &rule.exact {
422 if model_id == exact {
423 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
424 }
425 }
426 if let Some(pattern) = &rule.pattern {
427 if glob_match(pattern, model_id) {
428 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
429 }
430 }
431 if let Some(substr) = &rule.contains {
432 if model_id.contains(substr.as_str()) {
433 return crate::llm::provider::ProviderInference::builtin(rule.provider.clone());
434 }
435 }
436 }
437 crate::llm::provider::infer_provider_from_model_id(
438 model_id,
439 &default_provider_with_config(config),
440 )
441}
442
443pub fn default_provider() -> String {
444 let config = effective_config();
445 default_provider_with_config(&config)
446}
447
448fn default_provider_with_config(config: &ProvidersConfig) -> String {
449 std::env::var("HARN_DEFAULT_PROVIDER")
450 .ok()
451 .map(|value| value.trim().to_string())
452 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
453 .or_else(|| {
454 config
455 .default_provider
456 .as_deref()
457 .map(str::trim)
458 .filter(|value| !value.is_empty() && !value.eq_ignore_ascii_case("auto"))
459 .map(str::to_string)
460 })
461 .unwrap_or_else(|| "anthropic".to_string())
462}
463
464pub fn model_tier(model_id: &str) -> String {
466 let config = effective_config();
467 model_tier_with_config(&config, model_id)
468}
469
470fn model_tier_with_config(config: &ProvidersConfig, model_id: &str) -> String {
471 for rule in &config.tier_rules {
472 if let Some(exact) = &rule.exact {
473 if model_id == exact {
474 return rule.tier.clone();
475 }
476 }
477 if let Some(pattern) = &rule.pattern {
478 if glob_match(pattern, model_id) {
479 return rule.tier.clone();
480 }
481 }
482 if let Some(substr) = &rule.contains {
483 if model_id.contains(substr.as_str()) {
484 return rule.tier.clone();
485 }
486 }
487 }
488 let lower = model_id.to_lowercase();
489 if lower.contains("9b") || lower.contains("a3b") {
490 return "small".to_string();
491 }
492 if lower.starts_with("claude-") || lower == "gpt-4o" {
493 return "frontier".to_string();
494 }
495 config.tier_defaults.default.clone()
496}
497
498pub fn provider_config(name: &str) -> Option<ProviderDef> {
500 effective_config().providers.get(name).cloned()
501}
502
503pub fn model_params(model_id: &str) -> BTreeMap<String, toml::Value> {
506 let config = effective_config();
507 let mut params = BTreeMap::new();
508 for (pattern, defaults) in &config.model_defaults {
509 if glob_match(pattern, model_id) {
510 for (k, v) in defaults {
511 params.insert(k.clone(), v.clone());
512 }
513 }
514 }
515 params
516}
517
518pub fn provider_names() -> Vec<String> {
520 effective_config().providers.keys().cloned().collect()
521}
522
523pub fn known_model_names() -> Vec<String> {
525 effective_config().aliases.keys().cloned().collect()
526}
527
528pub fn alias_entries() -> Vec<(String, AliasDef)> {
529 effective_config().aliases.into_iter().collect()
530}
531
532pub fn model_catalog_entries() -> Vec<(String, ModelDef)> {
534 let mut entries: Vec<_> = effective_config().models.into_iter().collect();
535 entries.sort_by(|(id_a, model_a), (id_b, model_b)| {
536 model_a
537 .provider
538 .cmp(&model_b.provider)
539 .then_with(|| id_a.cmp(id_b))
540 });
541 entries
542}
543
544pub fn model_catalog_entry(model_id: &str) -> Option<ModelDef> {
545 effective_config().models.get(model_id).cloned()
546}
547
548pub fn qc_default_model(provider: &str) -> Option<String> {
549 std::env::var("BURIN_QC_MODEL")
550 .ok()
551 .filter(|value| !value.trim().is_empty())
552 .or_else(|| {
553 effective_config()
554 .qc_defaults
555 .get(&provider.to_lowercase())
556 .cloned()
557 })
558}
559
560pub fn qc_defaults() -> BTreeMap<String, String> {
561 effective_config().qc_defaults
562}
563
564pub fn model_pricing_per_mtok(model_id: &str) -> Option<ModelPricing> {
565 effective_config()
566 .models
567 .get(model_id)
568 .and_then(|model| model.pricing.clone())
569}
570
571pub fn pricing_per_1k_for(provider: &str, model_id: &str) -> Option<(f64, f64)> {
572 model_pricing_per_mtok(model_id)
573 .map(|pricing| {
574 (
575 pricing.input_per_mtok / 1000.0,
576 pricing.output_per_mtok / 1000.0,
577 )
578 })
579 .or_else(|| {
580 let (input, output, _) = provider_economics(provider);
581 match (input, output) {
582 (Some(input), Some(output)) => Some((input, output)),
583 _ => None,
584 }
585 })
586}
587
588pub fn auth_env_names(auth_env: &AuthEnv) -> Vec<String> {
589 match auth_env {
590 AuthEnv::None => Vec::new(),
591 AuthEnv::Single(name) => vec![name.clone()],
592 AuthEnv::Multiple(names) => names.clone(),
593 }
594}
595
596pub fn provider_key_available(provider: &str) -> bool {
597 let Some(pdef) = provider_config(provider) else {
598 return provider == "ollama";
599 };
600 if pdef.auth_style == "none" || matches!(pdef.auth_env, AuthEnv::None) {
601 return true;
602 }
603 auth_env_names(&pdef.auth_env).into_iter().any(|env_name| {
604 std::env::var(env_name)
605 .ok()
606 .is_some_and(|value| !value.trim().is_empty())
607 })
608}
609
610pub fn available_provider_names() -> Vec<String> {
611 provider_names()
612 .into_iter()
613 .filter(|provider| provider_key_available(provider))
614 .collect()
615}
616
617pub fn provider_has_feature(provider: &str, feature: &str) -> bool {
619 provider_config(provider)
620 .map(|p| p.features.iter().any(|f| f == feature))
621 .unwrap_or(false)
622}
623
624pub fn provider_economics(provider: &str) -> (Option<f64>, Option<f64>, Option<u64>) {
628 provider_config(provider)
629 .map(|p| (p.cost_per_1k_in, p.cost_per_1k_out, p.latency_p50_ms))
630 .unwrap_or((None, None, None))
631}
632
633pub fn default_tool_format(model: &str, provider: &str) -> String {
636 let config = effective_config();
637 default_tool_format_with_config(&config, model, provider)
638}
639
640fn default_tool_format_with_config(
641 config: &ProvidersConfig,
642 model: &str,
643 provider: &str,
644) -> String {
645 for (name, alias) in &config.aliases {
647 let matches = (alias.id == model && alias.provider == provider) || name == model;
648 if matches {
649 if let Some(ref fmt) = alias.tool_format {
650 return fmt.clone();
651 }
652 }
653 }
654 if config
655 .providers
656 .get(provider)
657 .map(|p| p.features.iter().any(|f| f == "native_tools"))
658 .unwrap_or(false)
659 {
660 "native".to_string()
661 } else {
662 "text".to_string()
663 }
664}
665
666pub fn resolve_tier_model(
668 target: &str,
669 preferred_provider: Option<&str>,
670) -> Option<(String, String)> {
671 let config = effective_config();
672
673 if let Some(alias) = config.aliases.get(target) {
674 return Some((alias.id.clone(), alias.provider.clone()));
675 }
676
677 let candidate_aliases = if let Some(provider) = preferred_provider {
678 vec![
679 format!("{provider}/{target}"),
680 format!("{provider}:{target}"),
681 format!("tier/{target}"),
682 target.to_string(),
683 ]
684 } else {
685 vec![format!("tier/{target}"), target.to_string()]
686 };
687
688 for alias_name in candidate_aliases {
689 if let Some(alias) = config.aliases.get(&alias_name) {
690 return Some((alias.id.clone(), alias.provider.clone()));
691 }
692 }
693
694 None
695}
696
697pub fn tier_candidates(target: &str) -> Vec<(String, String)> {
701 let config = effective_config();
702 let mut seen = std::collections::BTreeSet::new();
703 let mut candidates = Vec::new();
704
705 for alias in config.aliases.values() {
706 let pair = (alias.id.clone(), alias.provider.clone());
707 if seen.contains(&pair) {
708 continue;
709 }
710 if model_tier(&alias.id) == target {
711 seen.insert(pair.clone());
712 candidates.push(pair);
713 }
714 }
715
716 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
717 provider_a
718 .cmp(provider_b)
719 .then_with(|| model_a.cmp(model_b))
720 });
721 candidates
722}
723
724pub fn all_model_candidates() -> Vec<(String, String)> {
727 let config = effective_config();
728 let mut seen = std::collections::BTreeSet::new();
729 let mut candidates = Vec::new();
730
731 for alias in config.aliases.values() {
732 let pair = (alias.id.clone(), alias.provider.clone());
733 if seen.insert(pair.clone()) {
734 candidates.push(pair);
735 }
736 }
737
738 candidates.sort_by(|(model_a, provider_a), (model_b, provider_b)| {
739 provider_a
740 .cmp(provider_b)
741 .then_with(|| model_a.cmp(model_b))
742 });
743 candidates
744}
745
746fn glob_match(pattern: &str, input: &str) -> bool {
748 if let Some(prefix) = pattern.strip_suffix('*') {
749 input.starts_with(prefix)
750 } else if let Some(suffix) = pattern.strip_prefix('*') {
751 input.ends_with(suffix)
752 } else if pattern.contains('*') {
753 let parts: Vec<&str> = pattern.split('*').collect();
754 if parts.len() == 2 {
755 input.starts_with(parts[0]) && input.ends_with(parts[1])
756 } else {
757 input == pattern
758 }
759 } else {
760 input == pattern
761 }
762}
763
764fn dirs_or_home() -> Option<String> {
765 std::env::var("HOME").ok()
766}
767
768pub fn resolve_base_url(pdef: &ProviderDef) -> String {
771 if let Some(env_name) = &pdef.base_url_env {
772 if let Ok(val) = std::env::var(env_name) {
773 let trimmed = val.trim().trim_matches('"').trim_matches('\'');
775 if !trimmed.is_empty() {
776 return trimmed.to_string();
777 }
778 }
779 }
780 pdef.base_url.clone()
781}
782
783fn default_config() -> ProvidersConfig {
784 let mut config = ProvidersConfig {
785 default_provider: Some("anthropic".to_string()),
786 ..Default::default()
787 };
788
789 config.providers.insert(
790 "anthropic".to_string(),
791 ProviderDef {
792 base_url: "https://api.anthropic.com/v1".to_string(),
793 auth_style: "header".to_string(),
794 auth_header: Some("x-api-key".to_string()),
795 auth_env: AuthEnv::Single("ANTHROPIC_API_KEY".to_string()),
796 extra_headers: BTreeMap::from([(
797 "anthropic-version".to_string(),
798 "2023-06-01".to_string(),
799 )]),
800 chat_endpoint: "/messages".to_string(),
801 completion_endpoint: None,
802 healthcheck: Some(HealthcheckDef {
803 method: "POST".to_string(),
804 path: Some("/messages/count_tokens".to_string()),
805 url: None,
806 body: Some(
807 r#"{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":"x"}]}"#
808 .to_string(),
809 ),
810 }),
811 features: vec!["prompt_caching".to_string(), "thinking".to_string()],
812 cost_per_1k_in: Some(0.003),
813 cost_per_1k_out: Some(0.015),
814 latency_p50_ms: Some(2500),
815 ..Default::default()
816 },
817 );
818
819 config.providers.insert(
821 "openai".to_string(),
822 ProviderDef {
823 base_url: "https://api.openai.com/v1".to_string(),
824 auth_style: "bearer".to_string(),
825 auth_env: AuthEnv::Single("OPENAI_API_KEY".to_string()),
826 chat_endpoint: "/chat/completions".to_string(),
827 completion_endpoint: Some("/completions".to_string()),
828 healthcheck: Some(HealthcheckDef {
829 method: "GET".to_string(),
830 path: Some("/models".to_string()),
831 url: None,
832 body: None,
833 }),
834 cost_per_1k_in: Some(0.0025),
835 cost_per_1k_out: Some(0.010),
836 latency_p50_ms: Some(1800),
837 ..Default::default()
838 },
839 );
840
841 config.providers.insert(
843 "openrouter".to_string(),
844 ProviderDef {
845 base_url: "https://openrouter.ai/api/v1".to_string(),
846 auth_style: "bearer".to_string(),
847 auth_env: AuthEnv::Single("OPENROUTER_API_KEY".to_string()),
848 chat_endpoint: "/chat/completions".to_string(),
849 completion_endpoint: Some("/completions".to_string()),
850 healthcheck: Some(HealthcheckDef {
851 method: "GET".to_string(),
852 path: Some("/auth/key".to_string()),
853 url: None,
854 body: None,
855 }),
856 cost_per_1k_in: Some(0.003),
857 cost_per_1k_out: Some(0.015),
858 latency_p50_ms: Some(2200),
859 ..Default::default()
860 },
861 );
862
863 config.providers.insert(
865 "huggingface".to_string(),
866 ProviderDef {
867 base_url: "https://router.huggingface.co/v1".to_string(),
868 auth_style: "bearer".to_string(),
869 auth_env: AuthEnv::Multiple(vec![
870 "HF_TOKEN".to_string(),
871 "HUGGINGFACE_API_KEY".to_string(),
872 ]),
873 chat_endpoint: "/chat/completions".to_string(),
874 completion_endpoint: Some("/completions".to_string()),
875 healthcheck: Some(HealthcheckDef {
876 method: "GET".to_string(),
877 url: Some("https://huggingface.co/api/whoami-v2".to_string()),
878 path: None,
879 body: None,
880 }),
881 cost_per_1k_in: Some(0.0002),
882 cost_per_1k_out: Some(0.0006),
883 latency_p50_ms: Some(2400),
884 ..Default::default()
885 },
886 );
887
888 config.providers.insert(
897 "ollama".to_string(),
898 ProviderDef {
899 base_url: "http://localhost:11434".to_string(),
900 base_url_env: Some("OLLAMA_HOST".to_string()),
901 auth_style: "none".to_string(),
902 chat_endpoint: "/api/chat".to_string(),
903 completion_endpoint: Some("/api/generate".to_string()),
904 healthcheck: Some(HealthcheckDef {
905 method: "GET".to_string(),
906 path: Some("/api/tags".to_string()),
907 url: None,
908 body: None,
909 }),
910 cost_per_1k_in: Some(0.0),
911 cost_per_1k_out: Some(0.0),
912 latency_p50_ms: Some(1200),
913 ..Default::default()
914 },
915 );
916
917 config.providers.insert(
919 "gemini".to_string(),
920 ProviderDef {
921 base_url: "https://generativelanguage.googleapis.com".to_string(),
922 base_url_env: Some("GEMINI_BASE_URL".to_string()),
923 auth_style: "header".to_string(),
924 auth_header: Some("x-goog-api-key".to_string()),
925 auth_env: AuthEnv::Multiple(vec![
926 "GEMINI_API_KEY".to_string(),
927 "GOOGLE_API_KEY".to_string(),
928 ]),
929 chat_endpoint: "/v1beta/models".to_string(),
930 healthcheck: Some(HealthcheckDef {
931 method: "GET".to_string(),
932 path: Some("/v1beta/models".to_string()),
933 url: None,
934 body: None,
935 }),
936 cost_per_1k_in: Some(0.00125),
937 cost_per_1k_out: Some(0.005),
938 latency_p50_ms: Some(1800),
939 ..Default::default()
940 },
941 );
942
943 config.providers.insert(
945 "together".to_string(),
946 ProviderDef {
947 base_url: "https://api.together.xyz/v1".to_string(),
948 base_url_env: Some("TOGETHER_AI_BASE_URL".to_string()),
949 auth_style: "bearer".to_string(),
950 auth_env: AuthEnv::Single("TOGETHER_AI_API_KEY".to_string()),
951 chat_endpoint: "/chat/completions".to_string(),
952 completion_endpoint: Some("/completions".to_string()),
953 healthcheck: Some(HealthcheckDef {
954 method: "GET".to_string(),
955 path: Some("/models".to_string()),
956 url: None,
957 body: None,
958 }),
959 cost_per_1k_in: Some(0.0002),
960 cost_per_1k_out: Some(0.0006),
961 latency_p50_ms: Some(1600),
962 ..Default::default()
963 },
964 );
965
966 config.providers.insert(
968 "groq".to_string(),
969 ProviderDef {
970 base_url: "https://api.groq.com/openai/v1".to_string(),
971 base_url_env: Some("GROQ_BASE_URL".to_string()),
972 auth_style: "bearer".to_string(),
973 auth_env: AuthEnv::Single("GROQ_API_KEY".to_string()),
974 chat_endpoint: "/chat/completions".to_string(),
975 completion_endpoint: Some("/completions".to_string()),
976 healthcheck: Some(HealthcheckDef {
977 method: "GET".to_string(),
978 path: Some("/models".to_string()),
979 url: None,
980 body: None,
981 }),
982 cost_per_1k_in: Some(0.0001),
983 cost_per_1k_out: Some(0.0003),
984 latency_p50_ms: Some(450),
985 ..Default::default()
986 },
987 );
988
989 config.providers.insert(
991 "deepseek".to_string(),
992 ProviderDef {
993 base_url: "https://api.deepseek.com/v1".to_string(),
994 base_url_env: Some("DEEPSEEK_BASE_URL".to_string()),
995 auth_style: "bearer".to_string(),
996 auth_env: AuthEnv::Single("DEEPSEEK_API_KEY".to_string()),
997 chat_endpoint: "/chat/completions".to_string(),
998 completion_endpoint: Some("/completions".to_string()),
999 healthcheck: Some(HealthcheckDef {
1000 method: "GET".to_string(),
1001 path: Some("/models".to_string()),
1002 url: None,
1003 body: None,
1004 }),
1005 cost_per_1k_in: Some(0.00014),
1006 cost_per_1k_out: Some(0.00028),
1007 latency_p50_ms: Some(1800),
1008 ..Default::default()
1009 },
1010 );
1011
1012 config.providers.insert(
1014 "fireworks".to_string(),
1015 ProviderDef {
1016 base_url: "https://api.fireworks.ai/inference/v1".to_string(),
1017 base_url_env: Some("FIREWORKS_BASE_URL".to_string()),
1018 auth_style: "bearer".to_string(),
1019 auth_env: AuthEnv::Single("FIREWORKS_API_KEY".to_string()),
1020 chat_endpoint: "/chat/completions".to_string(),
1021 completion_endpoint: Some("/completions".to_string()),
1022 healthcheck: Some(HealthcheckDef {
1023 method: "GET".to_string(),
1024 path: Some("/models".to_string()),
1025 url: None,
1026 body: None,
1027 }),
1028 cost_per_1k_in: Some(0.0002),
1029 cost_per_1k_out: Some(0.0006),
1030 latency_p50_ms: Some(1400),
1031 ..Default::default()
1032 },
1033 );
1034
1035 config.providers.insert(
1037 "dashscope".to_string(),
1038 ProviderDef {
1039 base_url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1".to_string(),
1040 base_url_env: Some("DASHSCOPE_BASE_URL".to_string()),
1041 auth_style: "bearer".to_string(),
1042 auth_env: AuthEnv::Single("DASHSCOPE_API_KEY".to_string()),
1043 chat_endpoint: "/chat/completions".to_string(),
1044 completion_endpoint: Some("/completions".to_string()),
1045 healthcheck: Some(HealthcheckDef {
1046 method: "GET".to_string(),
1047 path: Some("/models".to_string()),
1048 url: None,
1049 body: None,
1050 }),
1051 cost_per_1k_in: Some(0.0003),
1052 cost_per_1k_out: Some(0.0012),
1053 latency_p50_ms: Some(1600),
1054 ..Default::default()
1055 },
1056 );
1057
1058 config.providers.insert(
1062 "bedrock".to_string(),
1063 ProviderDef {
1064 base_url: String::new(),
1065 base_url_env: Some("BEDROCK_BASE_URL".to_string()),
1066 auth_style: "aws_sigv4".to_string(),
1067 auth_env: AuthEnv::None,
1068 chat_endpoint: "/model/{model}/converse".to_string(),
1069 features: vec!["native_tools".to_string()],
1070 latency_p50_ms: Some(2600),
1071 ..Default::default()
1072 },
1073 );
1074
1075 config.providers.insert(
1079 "azure_openai".to_string(),
1080 ProviderDef {
1081 base_url: "https://{resource}.openai.azure.com".to_string(),
1082 base_url_env: Some("AZURE_OPENAI_ENDPOINT".to_string()),
1083 auth_style: "azure_openai".to_string(),
1084 auth_env: AuthEnv::Multiple(vec![
1085 "AZURE_OPENAI_API_KEY".to_string(),
1086 "AZURE_OPENAI_AD_TOKEN".to_string(),
1087 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1088 ]),
1089 chat_endpoint:
1090 "/openai/deployments/{deployment}/chat/completions?api-version={api_version}"
1091 .to_string(),
1092 features: vec!["native_tools".to_string()],
1093 cost_per_1k_in: Some(0.0025),
1094 cost_per_1k_out: Some(0.010),
1095 latency_p50_ms: Some(1900),
1096 ..Default::default()
1097 },
1098 );
1099
1100 config.providers.insert(
1102 "vertex".to_string(),
1103 ProviderDef {
1104 base_url: "https://aiplatform.googleapis.com/v1".to_string(),
1105 base_url_env: Some("VERTEX_AI_BASE_URL".to_string()),
1106 auth_style: "bearer".to_string(),
1107 auth_env: AuthEnv::Multiple(vec![
1108 "VERTEX_AI_ACCESS_TOKEN".to_string(),
1109 "GOOGLE_OAUTH_ACCESS_TOKEN".to_string(),
1110 "GOOGLE_APPLICATION_CREDENTIALS".to_string(),
1111 ]),
1112 chat_endpoint:
1113 "/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent"
1114 .to_string(),
1115 features: vec!["native_tools".to_string()],
1116 cost_per_1k_in: Some(0.00125),
1117 cost_per_1k_out: Some(0.005),
1118 latency_p50_ms: Some(2100),
1119 ..Default::default()
1120 },
1121 );
1122
1123 config.providers.insert(
1125 "local".to_string(),
1126 ProviderDef {
1127 base_url: "http://localhost:8000".to_string(),
1128 base_url_env: Some("LOCAL_LLM_BASE_URL".to_string()),
1129 auth_style: "none".to_string(),
1130 chat_endpoint: "/v1/chat/completions".to_string(),
1131 completion_endpoint: Some("/v1/completions".to_string()),
1132 healthcheck: Some(HealthcheckDef {
1133 method: "GET".to_string(),
1134 path: Some("/v1/models".to_string()),
1135 url: None,
1136 body: None,
1137 }),
1138 cost_per_1k_in: Some(0.0),
1139 cost_per_1k_out: Some(0.0),
1140 latency_p50_ms: Some(900),
1141 ..Default::default()
1142 },
1143 );
1144
1145 config.providers.insert(
1149 "mlx".to_string(),
1150 ProviderDef {
1151 base_url: "http://127.0.0.1:8002".to_string(),
1152 base_url_env: Some("MLX_BASE_URL".to_string()),
1153 auth_style: "none".to_string(),
1154 chat_endpoint: "/v1/chat/completions".to_string(),
1155 completion_endpoint: Some("/v1/completions".to_string()),
1156 healthcheck: Some(HealthcheckDef {
1157 method: "GET".to_string(),
1158 path: Some("/v1/models".to_string()),
1159 url: None,
1160 body: None,
1161 }),
1162 cost_per_1k_in: Some(0.0),
1163 cost_per_1k_out: Some(0.0),
1164 latency_p50_ms: Some(900),
1165 ..Default::default()
1166 },
1167 );
1168
1169 config.providers.insert(
1171 "vllm".to_string(),
1172 ProviderDef {
1173 base_url: "http://localhost:8000".to_string(),
1174 base_url_env: Some("VLLM_BASE_URL".to_string()),
1175 auth_style: "none".to_string(),
1176 chat_endpoint: "/v1/chat/completions".to_string(),
1177 completion_endpoint: Some("/v1/completions".to_string()),
1178 healthcheck: Some(HealthcheckDef {
1179 method: "GET".to_string(),
1180 path: Some("/v1/models".to_string()),
1181 url: None,
1182 body: None,
1183 }),
1184 cost_per_1k_in: Some(0.0),
1185 cost_per_1k_out: Some(0.0),
1186 latency_p50_ms: Some(800),
1187 ..Default::default()
1188 },
1189 );
1190
1191 config.providers.insert(
1193 "tgi".to_string(),
1194 ProviderDef {
1195 base_url: "http://localhost:8080".to_string(),
1196 base_url_env: Some("TGI_BASE_URL".to_string()),
1197 auth_style: "none".to_string(),
1198 chat_endpoint: "/v1/chat/completions".to_string(),
1199 completion_endpoint: Some("/v1/completions".to_string()),
1200 healthcheck: Some(HealthcheckDef {
1201 method: "GET".to_string(),
1202 path: Some("/health".to_string()),
1203 url: None,
1204 body: None,
1205 }),
1206 cost_per_1k_in: Some(0.0),
1207 cost_per_1k_out: Some(0.0),
1208 latency_p50_ms: Some(950),
1209 ..Default::default()
1210 },
1211 );
1212
1213 config.inference_rules = vec![
1215 InferenceRule {
1216 pattern: Some("claude-*".to_string()),
1217 contains: None,
1218 exact: None,
1219 provider: "anthropic".to_string(),
1220 },
1221 InferenceRule {
1222 pattern: Some("gpt-*".to_string()),
1223 contains: None,
1224 exact: None,
1225 provider: "openai".to_string(),
1226 },
1227 InferenceRule {
1228 pattern: Some("o1*".to_string()),
1229 contains: None,
1230 exact: None,
1231 provider: "openai".to_string(),
1232 },
1233 InferenceRule {
1234 pattern: Some("o3*".to_string()),
1235 contains: None,
1236 exact: None,
1237 provider: "openai".to_string(),
1238 },
1239 InferenceRule {
1240 pattern: Some("o4*".to_string()),
1241 contains: None,
1242 exact: None,
1243 provider: "openai".to_string(),
1244 },
1245 InferenceRule {
1246 pattern: Some("anthropic.claude-*".to_string()),
1247 contains: None,
1248 exact: None,
1249 provider: "bedrock".to_string(),
1250 },
1251 InferenceRule {
1252 pattern: Some("meta.llama*".to_string()),
1253 contains: None,
1254 exact: None,
1255 provider: "bedrock".to_string(),
1256 },
1257 InferenceRule {
1258 pattern: Some("amazon.*".to_string()),
1259 contains: None,
1260 exact: None,
1261 provider: "bedrock".to_string(),
1262 },
1263 InferenceRule {
1264 pattern: Some("mistral.*".to_string()),
1265 contains: None,
1266 exact: None,
1267 provider: "bedrock".to_string(),
1268 },
1269 InferenceRule {
1270 pattern: Some("cohere.*".to_string()),
1271 contains: None,
1272 exact: None,
1273 provider: "bedrock".to_string(),
1274 },
1275 InferenceRule {
1276 pattern: Some("gemini-*".to_string()),
1277 contains: None,
1278 exact: None,
1279 provider: "gemini".to_string(),
1280 },
1281 ];
1282
1283 config.tier_rules = vec![
1285 TierRule {
1286 contains: Some("9b".to_string()),
1287 pattern: None,
1288 exact: None,
1289 tier: "small".to_string(),
1290 },
1291 TierRule {
1292 contains: Some("a3b".to_string()),
1293 pattern: None,
1294 exact: None,
1295 tier: "small".to_string(),
1296 },
1297 TierRule {
1298 contains: Some("gemma-4-e2b".to_string()),
1299 pattern: None,
1300 exact: None,
1301 tier: "small".to_string(),
1302 },
1303 TierRule {
1304 contains: Some("gemma-4-e4b".to_string()),
1305 pattern: None,
1306 exact: None,
1307 tier: "small".to_string(),
1308 },
1309 TierRule {
1310 contains: Some("gemma-4-26b".to_string()),
1311 pattern: None,
1312 exact: None,
1313 tier: "mid".to_string(),
1314 },
1315 TierRule {
1316 contains: Some("gemma-4-31b".to_string()),
1317 pattern: None,
1318 exact: None,
1319 tier: "frontier".to_string(),
1320 },
1321 TierRule {
1322 contains: Some("gemma4:26b".to_string()),
1323 pattern: None,
1324 exact: None,
1325 tier: "mid".to_string(),
1326 },
1327 TierRule {
1328 contains: Some("gemma4:31b".to_string()),
1329 pattern: None,
1330 exact: None,
1331 tier: "frontier".to_string(),
1332 },
1333 TierRule {
1334 pattern: Some("claude-*".to_string()),
1335 contains: None,
1336 exact: None,
1337 tier: "frontier".to_string(),
1338 },
1339 TierRule {
1340 exact: Some("gpt-4o".to_string()),
1341 contains: None,
1342 pattern: None,
1343 tier: "frontier".to_string(),
1344 },
1345 ];
1346
1347 config.tier_defaults = TierDefaults {
1348 default: "mid".to_string(),
1349 };
1350
1351 config.aliases.insert(
1352 "frontier".to_string(),
1353 AliasDef {
1354 id: "claude-sonnet-4-20250514".to_string(),
1355 provider: "anthropic".to_string(),
1356 tool_format: None,
1357 },
1358 );
1359 config.aliases.insert(
1360 "tier/frontier".to_string(),
1361 AliasDef {
1362 id: "claude-sonnet-4-20250514".to_string(),
1363 provider: "anthropic".to_string(),
1364 tool_format: None,
1365 },
1366 );
1367 config.aliases.insert(
1368 "mid".to_string(),
1369 AliasDef {
1370 id: "gpt-4o-mini".to_string(),
1371 provider: "openai".to_string(),
1372 tool_format: None,
1373 },
1374 );
1375 config.aliases.insert(
1376 "tier/mid".to_string(),
1377 AliasDef {
1378 id: "gpt-4o-mini".to_string(),
1379 provider: "openai".to_string(),
1380 tool_format: None,
1381 },
1382 );
1383 config.aliases.insert(
1384 "small".to_string(),
1385 AliasDef {
1386 id: "Qwen/Qwen3.5-9B".to_string(),
1387 provider: "openrouter".to_string(),
1388 tool_format: None,
1389 },
1390 );
1391 config.aliases.insert(
1392 "tier/small".to_string(),
1393 AliasDef {
1394 id: "Qwen/Qwen3.5-9B".to_string(),
1395 provider: "openrouter".to_string(),
1396 tool_format: None,
1397 },
1398 );
1399 config.aliases.insert(
1400 "local-gemma4".to_string(),
1401 AliasDef {
1402 id: "gemma-4-26b-a4b-it".to_string(),
1403 provider: "local".to_string(),
1404 tool_format: None,
1405 },
1406 );
1407 config.aliases.insert(
1408 "local-gemma4-26b".to_string(),
1409 AliasDef {
1410 id: "gemma-4-26b-a4b-it".to_string(),
1411 provider: "local".to_string(),
1412 tool_format: None,
1413 },
1414 );
1415 config.aliases.insert(
1416 "local-gemma4-31b".to_string(),
1417 AliasDef {
1418 id: "gemma-4-31b-it".to_string(),
1419 provider: "local".to_string(),
1420 tool_format: None,
1421 },
1422 );
1423 config.aliases.insert(
1424 "local-gemma4-e4b".to_string(),
1425 AliasDef {
1426 id: "gemma-4-e4b-it".to_string(),
1427 provider: "local".to_string(),
1428 tool_format: None,
1429 },
1430 );
1431 config.aliases.insert(
1432 "local-gemma4-e2b".to_string(),
1433 AliasDef {
1434 id: "gemma-4-e2b-it".to_string(),
1435 provider: "local".to_string(),
1436 tool_format: None,
1437 },
1438 );
1439 config.aliases.insert(
1440 "mlx-qwen36-27b".to_string(),
1441 AliasDef {
1442 id: "unsloth/Qwen3.6-27B-UD-MLX-4bit".to_string(),
1443 provider: "mlx".to_string(),
1444 tool_format: None,
1445 },
1446 );
1447
1448 config.qc_defaults.extend(BTreeMap::from([
1449 (
1450 "anthropic".to_string(),
1451 "claude-3-5-haiku-20241022".to_string(),
1452 ),
1453 ("openai".to_string(), "gpt-4o-mini".to_string()),
1454 (
1455 "openrouter".to_string(),
1456 "google/gemini-2.5-flash".to_string(),
1457 ),
1458 ("ollama".to_string(), "llama3.2".to_string()),
1459 ("local".to_string(), "gpt-4o".to_string()),
1460 ]));
1461
1462 config.models.extend(BTreeMap::from([
1463 (
1464 "claude-sonnet-4-20250514".to_string(),
1465 ModelDef {
1466 name: "Claude Sonnet 4".to_string(),
1467 provider: "anthropic".to_string(),
1468 context_window: 200_000,
1469 stream_timeout: None,
1470 capabilities: vec![
1471 "tools".to_string(),
1472 "streaming".to_string(),
1473 "prompt_caching".to_string(),
1474 "thinking".to_string(),
1475 ],
1476 pricing: Some(ModelPricing {
1477 input_per_mtok: 3.0,
1478 output_per_mtok: 15.0,
1479 cache_read_per_mtok: Some(0.3),
1480 cache_write_per_mtok: Some(3.75),
1481 }),
1482 },
1483 ),
1484 (
1485 "gpt-4o-mini".to_string(),
1486 ModelDef {
1487 name: "GPT-4o Mini".to_string(),
1488 provider: "openai".to_string(),
1489 context_window: 128_000,
1490 stream_timeout: None,
1491 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1492 pricing: Some(ModelPricing {
1493 input_per_mtok: 0.15,
1494 output_per_mtok: 0.60,
1495 cache_read_per_mtok: None,
1496 cache_write_per_mtok: None,
1497 }),
1498 },
1499 ),
1500 (
1501 "Qwen/Qwen3.5-9B".to_string(),
1502 ModelDef {
1503 name: "Qwen3.5 9B".to_string(),
1504 provider: "openrouter".to_string(),
1505 context_window: 131_072,
1506 stream_timeout: None,
1507 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1508 pricing: None,
1509 },
1510 ),
1511 (
1512 "llama3.2".to_string(),
1513 ModelDef {
1514 name: "Llama 3.2".to_string(),
1515 provider: "ollama".to_string(),
1516 context_window: 32_000,
1517 stream_timeout: Some(300.0),
1518 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1519 pricing: None,
1520 },
1521 ),
1522 ]));
1523
1524 config
1525}
1526
1527#[cfg(test)]
1528mod tests {
1529 use super::*;
1530
1531 fn reset_overrides() {
1532 clear_user_overrides();
1533 }
1534
1535 #[test]
1536 fn test_glob_match_prefix() {
1537 assert!(glob_match("claude-*", "claude-sonnet-4-20250514"));
1538 assert!(glob_match("gpt-*", "gpt-4o"));
1539 assert!(!glob_match("claude-*", "gpt-4o"));
1540 }
1541
1542 #[test]
1543 fn test_glob_match_suffix() {
1544 assert!(glob_match("*-latest", "llama3.2-latest"));
1545 assert!(!glob_match("*-latest", "llama3.2"));
1546 }
1547
1548 #[test]
1549 fn test_glob_match_middle() {
1550 assert!(glob_match("claude-*-latest", "claude-sonnet-latest"));
1551 assert!(!glob_match("claude-*-latest", "claude-sonnet-beta"));
1552 }
1553
1554 #[test]
1555 fn test_glob_match_exact() {
1556 assert!(glob_match("gpt-4o", "gpt-4o"));
1557 assert!(!glob_match("gpt-4o", "gpt-4o-mini"));
1558 }
1559
1560 #[test]
1561 fn test_infer_provider_from_defaults() {
1562 let _guard = crate::llm::env_lock().lock().expect("env lock");
1563 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1564 unsafe {
1565 std::env::remove_var("HARN_DEFAULT_PROVIDER");
1566 }
1567
1568 assert_eq!(infer_provider("claude-sonnet-4-20250514"), "anthropic");
1569 assert_eq!(infer_provider("gpt-4o"), "openai");
1570 assert_eq!(infer_provider("o1-preview"), "openai");
1571 assert_eq!(infer_provider("o3-mini"), "openai");
1572 assert_eq!(infer_provider("o4-mini"), "openai");
1573 assert_eq!(infer_provider("gemini-2.5-pro"), "gemini");
1574 assert_eq!(infer_provider("qwen/qwen3-coder"), "openrouter");
1575 assert_eq!(infer_provider("llama3.2:latest"), "ollama");
1576 assert_eq!(infer_provider("unknown-model"), "anthropic");
1577
1578 unsafe {
1579 match prev_default_provider {
1580 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1581 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1582 }
1583 }
1584 }
1585
1586 #[test]
1587 fn test_infer_provider_prefix_rules() {
1588 assert_eq!(infer_provider("local:gemma-4-e4b-it"), "ollama");
1589 assert_eq!(infer_provider("ollama:qwen3:30b-a3b"), "ollama");
1590 assert_eq!(infer_provider("local:owner/model"), "ollama");
1592 assert_eq!(infer_provider("hf:Qwen/Qwen3.6-35B-A3B"), "huggingface");
1593 }
1594
1595 #[test]
1596 fn test_openrouter_inference_requires_one_slash() {
1597 let _guard = crate::llm::env_lock().lock().expect("env lock");
1598 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1599 unsafe {
1600 std::env::remove_var("HARN_DEFAULT_PROVIDER");
1601 }
1602
1603 assert_eq!(infer_provider("org/model"), "openrouter");
1604 assert_eq!(infer_provider("org/team/model"), "anthropic");
1605
1606 unsafe {
1607 match prev_default_provider {
1608 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1609 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1610 }
1611 }
1612 }
1613
1614 #[test]
1615 fn test_resolve_model_info_normalizes_provider_prefixes() {
1616 let local = resolve_model_info("local:gemma-4-e4b-it");
1617 assert_eq!(local.id, "gemma-4-e4b-it");
1618 assert_eq!(local.provider, "ollama");
1619
1620 let ollama = resolve_model_info("ollama:qwen3:30b-a3b");
1621 assert_eq!(ollama.id, "qwen3:30b-a3b");
1622 assert_eq!(ollama.provider, "ollama");
1623
1624 let hf = resolve_model_info("hf:Qwen/Qwen3.6-35B-A3B");
1625 assert_eq!(hf.id, "Qwen/Qwen3.6-35B-A3B");
1626 assert_eq!(hf.provider, "huggingface");
1627 }
1628
1629 #[test]
1630 fn test_model_tier_from_defaults() {
1631 assert_eq!(model_tier("claude-sonnet-4-20250514"), "frontier");
1632 assert_eq!(model_tier("gpt-4o"), "frontier");
1633 assert_eq!(model_tier("Qwen3.5-9B"), "small");
1634 assert_eq!(model_tier("deepseek-v3"), "mid");
1635 }
1636
1637 #[test]
1638 fn test_resolve_model_unknown_alias() {
1639 let (id, provider) = resolve_model("gpt-4o");
1640 assert_eq!(id, "gpt-4o");
1641 assert!(provider.is_none());
1642 }
1643
1644 #[test]
1645 fn test_provider_names() {
1646 let names = provider_names();
1647 assert!(names.len() >= 7);
1648 assert!(names.contains(&"anthropic".to_string()));
1649 assert!(names.contains(&"together".to_string()));
1650 assert!(names.contains(&"local".to_string()));
1651 assert!(names.contains(&"mlx".to_string()));
1652 assert!(names.contains(&"openai".to_string()));
1653 assert!(names.contains(&"ollama".to_string()));
1654 assert!(names.contains(&"bedrock".to_string()));
1655 assert!(names.contains(&"azure_openai".to_string()));
1656 assert!(names.contains(&"vertex".to_string()));
1657 }
1658
1659 #[test]
1660 fn test_resolve_tier_model_default_aliases() {
1661 let (model, provider) = resolve_tier_model("frontier", None).unwrap();
1662 assert_eq!(model, "claude-sonnet-4-20250514");
1663 assert_eq!(provider, "anthropic");
1664
1665 let (model, provider) = resolve_tier_model("small", None).unwrap();
1666 assert_eq!(model, "Qwen/Qwen3.5-9B");
1667 assert_eq!(provider, "openrouter");
1668 }
1669
1670 #[test]
1671 fn test_resolve_tier_model_prefers_provider_scoped_aliases() {
1672 let (model, provider) = resolve_tier_model("mid", Some("openai")).unwrap();
1673 assert_eq!(model, "gpt-4o-mini");
1674 assert_eq!(provider, "openai");
1675 }
1676
1677 #[test]
1678 fn test_provider_config_anthropic() {
1679 let pdef = provider_config("anthropic").unwrap();
1680 assert_eq!(pdef.auth_style, "header");
1681 assert_eq!(pdef.auth_header.as_deref(), Some("x-api-key"));
1682 }
1683
1684 #[test]
1685 fn test_provider_config_mlx() {
1686 let pdef = provider_config("mlx").unwrap();
1687 assert_eq!(pdef.base_url, "http://127.0.0.1:8002");
1688 assert_eq!(pdef.base_url_env.as_deref(), Some("MLX_BASE_URL"));
1689 assert_eq!(
1690 pdef.healthcheck.unwrap().path.as_deref(),
1691 Some("/v1/models")
1692 );
1693
1694 let (model, provider) = resolve_model("mlx-qwen36-27b");
1695 assert_eq!(model, "unsloth/Qwen3.6-27B-UD-MLX-4bit");
1696 assert_eq!(provider.as_deref(), Some("mlx"));
1697 }
1698
1699 #[test]
1700 fn test_enterprise_provider_defaults_and_inference() {
1701 let bedrock = provider_config("bedrock").unwrap();
1702 assert_eq!(bedrock.auth_style, "aws_sigv4");
1703 assert_eq!(bedrock.base_url_env.as_deref(), Some("BEDROCK_BASE_URL"));
1704 assert_eq!(
1705 infer_provider("anthropic.claude-3-5-sonnet-20240620-v1:0"),
1706 "bedrock"
1707 );
1708 assert_eq!(infer_provider("meta.llama3-70b-instruct-v1:0"), "bedrock");
1709
1710 let azure = provider_config("azure_openai").unwrap();
1711 assert_eq!(azure.base_url_env.as_deref(), Some("AZURE_OPENAI_ENDPOINT"));
1712 assert_eq!(
1713 auth_env_names(&azure.auth_env),
1714 vec![
1715 "AZURE_OPENAI_API_KEY".to_string(),
1716 "AZURE_OPENAI_AD_TOKEN".to_string(),
1717 "AZURE_OPENAI_BEARER_TOKEN".to_string(),
1718 ]
1719 );
1720
1721 let vertex = provider_config("vertex").unwrap();
1722 assert_eq!(vertex.base_url, "https://aiplatform.googleapis.com/v1");
1723 assert_eq!(infer_provider("gemini-1.5-pro-002"), "gemini");
1724 }
1725
1726 #[test]
1727 fn test_default_provider_env_override_for_unknown_model() {
1728 let _guard = crate::llm::env_lock().lock().expect("env lock");
1729 let prev_default_provider = std::env::var("HARN_DEFAULT_PROVIDER").ok();
1730 unsafe {
1731 std::env::set_var("HARN_DEFAULT_PROVIDER", "openai");
1732 }
1733
1734 let inference = infer_provider_detail("unknown-model");
1735
1736 unsafe {
1737 match prev_default_provider {
1738 Some(value) => std::env::set_var("HARN_DEFAULT_PROVIDER", value),
1739 None => std::env::remove_var("HARN_DEFAULT_PROVIDER"),
1740 }
1741 }
1742
1743 assert_eq!(inference.provider, "openai");
1744 assert_eq!(
1745 inference.source,
1746 crate::llm::provider::ProviderInferenceSource::DefaultFallback
1747 );
1748 }
1749
1750 #[test]
1751 fn test_resolve_base_url_no_env() {
1752 let pdef = ProviderDef {
1753 base_url: "https://example.com".to_string(),
1754 ..Default::default()
1755 };
1756 assert_eq!(resolve_base_url(&pdef), "https://example.com");
1757 }
1758
1759 #[test]
1760 fn test_default_config_roundtrip() {
1761 let config = default_config();
1762 assert!(!config.providers.is_empty());
1763 assert!(!config.inference_rules.is_empty());
1764 assert!(!config.tier_rules.is_empty());
1765 assert_eq!(config.tier_defaults.default, "mid");
1766 }
1767
1768 #[test]
1769 fn test_model_params_empty() {
1770 let params = model_params("claude-sonnet-4-20250514");
1771 assert!(params.is_empty());
1772 }
1773
1774 #[test]
1775 fn test_user_overrides_add_provider_and_alias() {
1776 reset_overrides();
1777 let mut overlay = ProvidersConfig::default();
1778 overlay.providers.insert(
1779 "acme".to_string(),
1780 ProviderDef {
1781 base_url: "https://llm.acme.test/v1".to_string(),
1782 chat_endpoint: "/chat/completions".to_string(),
1783 ..Default::default()
1784 },
1785 );
1786 overlay.aliases.insert(
1787 "acme-fast".to_string(),
1788 AliasDef {
1789 id: "acme/model-fast".to_string(),
1790 provider: "acme".to_string(),
1791 tool_format: Some("native".to_string()),
1792 },
1793 );
1794 set_user_overrides(Some(overlay));
1795
1796 let (model, provider) = resolve_model("acme-fast");
1797 assert_eq!(model, "acme/model-fast");
1798 assert_eq!(provider.as_deref(), Some("acme"));
1799 assert!(provider_names().contains(&"acme".to_string()));
1800 assert_eq!(
1801 provider_config("acme").map(|provider| provider.base_url),
1802 Some("https://llm.acme.test/v1".to_string())
1803 );
1804
1805 reset_overrides();
1806 }
1807
1808 #[test]
1809 fn test_user_overrides_add_model_catalog_pricing_and_qc_defaults() {
1810 reset_overrides();
1811 let mut overlay = ProvidersConfig::default();
1812 overlay.models.insert(
1813 "acme/model-fast".to_string(),
1814 ModelDef {
1815 name: "Acme Fast".to_string(),
1816 provider: "acme".to_string(),
1817 context_window: 65_536,
1818 stream_timeout: Some(42.0),
1819 capabilities: vec!["tools".to_string(), "streaming".to_string()],
1820 pricing: Some(ModelPricing {
1821 input_per_mtok: 1.25,
1822 output_per_mtok: 2.5,
1823 cache_read_per_mtok: Some(0.25),
1824 cache_write_per_mtok: None,
1825 }),
1826 },
1827 );
1828 overlay
1829 .qc_defaults
1830 .insert("acme".to_string(), "acme/model-cheap".to_string());
1831 set_user_overrides(Some(overlay));
1832
1833 let entry = model_catalog_entry("acme/model-fast").expect("catalog entry");
1834 assert_eq!(entry.context_window, 65_536);
1835 assert_eq!(
1836 entry.pricing.as_ref().map(|pricing| pricing.input_per_mtok),
1837 Some(1.25)
1838 );
1839 assert_eq!(
1840 pricing_per_1k_for("acme", "acme/model-fast"),
1841 Some((0.00125, 0.0025))
1842 );
1843 assert_eq!(
1844 qc_default_model("acme").as_deref(),
1845 Some("acme/model-cheap")
1846 );
1847
1848 reset_overrides();
1849 }
1850
1851 #[test]
1852 fn test_user_overrides_prepend_inference_rules() {
1853 reset_overrides();
1854 let mut overlay = ProvidersConfig::default();
1855 overlay.inference_rules.push(InferenceRule {
1856 pattern: Some("internal-*".to_string()),
1857 contains: None,
1858 exact: None,
1859 provider: "openai".to_string(),
1860 });
1861 set_user_overrides(Some(overlay));
1862
1863 assert_eq!(infer_provider("internal-foo"), "openai");
1864
1865 reset_overrides();
1866 }
1867}