1use serde::{Deserialize, Serialize};
5use zeph_llm::{GeminiThinkingLevel, ThinkingConfig};
6
7fn default_response_cache_ttl_secs() -> u64 {
8 3600
9}
10
11fn default_semantic_cache_threshold() -> f32 {
12 0.95
13}
14
15fn default_semantic_cache_max_candidates() -> u32 {
16 10
17}
18
19fn default_router_ema_alpha() -> f64 {
20 0.1
21}
22
23fn default_router_reorder_interval() -> u64 {
24 10
25}
26
27fn default_embedding_model() -> String {
28 "qwen3-embedding".into()
29}
30
31fn default_candle_source() -> String {
32 "huggingface".into()
33}
34
35fn default_chat_template() -> String {
36 "chatml".into()
37}
38
39fn default_candle_device() -> String {
40 "cpu".into()
41}
42
43fn default_temperature() -> f64 {
44 0.7
45}
46
47fn default_max_tokens() -> usize {
48 2048
49}
50
51fn default_seed() -> u64 {
52 42
53}
54
55fn default_repeat_penalty() -> f32 {
56 1.1
57}
58
59fn default_repeat_last_n() -> usize {
60 64
61}
62
63fn default_cascade_quality_threshold() -> f64 {
64 0.5
65}
66
67fn default_cascade_max_escalations() -> u8 {
68 2
69}
70
71fn default_cascade_window_size() -> usize {
72 50
73}
74
75fn default_reputation_decay_factor() -> f64 {
76 0.95
77}
78
79fn default_reputation_weight() -> f64 {
80 0.3
81}
82
83fn default_reputation_min_observations() -> u64 {
84 5
85}
86
87#[must_use]
88pub fn default_stt_provider() -> String {
89 String::new()
90}
91
92#[must_use]
93pub fn default_stt_language() -> String {
94 "auto".into()
95}
96
97#[must_use]
98pub fn get_default_embedding_model() -> String {
99 default_embedding_model()
100}
101
102#[must_use]
103pub fn get_default_response_cache_ttl_secs() -> u64 {
104 default_response_cache_ttl_secs()
105}
106
107#[must_use]
108pub fn get_default_router_ema_alpha() -> f64 {
109 default_router_ema_alpha()
110}
111
112#[must_use]
113pub fn get_default_router_reorder_interval() -> u64 {
114 default_router_reorder_interval()
115}
116
117#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
119#[serde(rename_all = "lowercase")]
120pub enum ProviderKind {
121 Ollama,
122 Claude,
123 OpenAi,
124 Gemini,
125 Candle,
126 Compatible,
127}
128
129impl ProviderKind {
130 #[must_use]
131 pub fn as_str(self) -> &'static str {
132 match self {
133 Self::Ollama => "ollama",
134 Self::Claude => "claude",
135 Self::OpenAi => "openai",
136 Self::Gemini => "gemini",
137 Self::Candle => "candle",
138 Self::Compatible => "compatible",
139 }
140 }
141}
142
143impl std::fmt::Display for ProviderKind {
144 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
145 f.write_str(self.as_str())
146 }
147}
148
149#[derive(Debug, Deserialize, Serialize)]
150pub struct LlmConfig {
151 #[serde(default, skip_serializing_if = "Vec::is_empty")]
153 pub providers: Vec<ProviderEntry>,
154
155 #[serde(default, skip_serializing_if = "is_routing_none")]
157 pub routing: LlmRoutingStrategy,
158
159 #[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")]
161 pub routes: std::collections::HashMap<String, Vec<String>>,
162
163 #[serde(default = "default_embedding_model_opt")]
164 pub embedding_model: String,
165 #[serde(default, skip_serializing_if = "Option::is_none")]
166 pub candle: Option<CandleConfig>,
167 #[serde(default)]
168 pub stt: Option<SttConfig>,
169 #[serde(default)]
170 pub response_cache_enabled: bool,
171 #[serde(default = "default_response_cache_ttl_secs")]
172 pub response_cache_ttl_secs: u64,
173 #[serde(default)]
175 pub semantic_cache_enabled: bool,
176 #[serde(default = "default_semantic_cache_threshold")]
182 pub semantic_cache_threshold: f32,
183 #[serde(default = "default_semantic_cache_max_candidates")]
196 pub semantic_cache_max_candidates: u32,
197 #[serde(default)]
198 pub router_ema_enabled: bool,
199 #[serde(default = "default_router_ema_alpha")]
200 pub router_ema_alpha: f64,
201 #[serde(default = "default_router_reorder_interval")]
202 pub router_reorder_interval: u64,
203 #[serde(default, skip_serializing_if = "Option::is_none")]
205 pub router: Option<RouterConfig>,
206 #[serde(default, skip_serializing_if = "Option::is_none")]
209 pub instruction_file: Option<std::path::PathBuf>,
210 #[serde(default, skip_serializing_if = "Option::is_none")]
214 pub summary_model: Option<String>,
215 #[serde(default, skip_serializing_if = "Option::is_none")]
217 pub summary_provider: Option<ProviderEntry>,
218
219 #[serde(default, skip_serializing_if = "Option::is_none")]
221 pub complexity_routing: Option<ComplexityRoutingConfig>,
222}
223
224fn default_embedding_model_opt() -> String {
225 default_embedding_model()
226}
227
228#[allow(clippy::trivially_copy_pass_by_ref)]
229fn is_routing_none(s: &LlmRoutingStrategy) -> bool {
230 *s == LlmRoutingStrategy::None
231}
232
233impl LlmConfig {
234 #[must_use]
236 pub fn effective_provider(&self) -> ProviderKind {
237 self.providers
238 .first()
239 .map_or(ProviderKind::Ollama, |e| e.provider_type)
240 }
241
242 #[must_use]
244 pub fn effective_base_url(&self) -> &str {
245 self.providers
246 .first()
247 .and_then(|e| e.base_url.as_deref())
248 .unwrap_or("http://localhost:11434")
249 }
250
251 #[must_use]
253 pub fn effective_model(&self) -> &str {
254 self.providers
255 .first()
256 .and_then(|e| e.model.as_deref())
257 .unwrap_or("qwen3:8b")
258 }
259
260 #[must_use]
268 pub fn stt_provider_entry(&self) -> Option<&ProviderEntry> {
269 let name_hint = self.stt.as_ref().map_or("", |s| s.provider.as_str());
270 if name_hint.is_empty() {
271 self.providers.iter().find(|p| p.stt_model.is_some())
272 } else {
273 self.providers
274 .iter()
275 .find(|p| p.effective_name() == name_hint && p.stt_model.is_some())
276 }
277 }
278
279 pub fn check_legacy_format(&self) -> Result<(), crate::error::ConfigError> {
285 Ok(())
286 }
287
288 pub fn validate_stt(&self) -> Result<(), crate::error::ConfigError> {
294 use crate::error::ConfigError;
295
296 let Some(stt) = &self.stt else {
297 return Ok(());
298 };
299 if stt.provider.is_empty() {
300 return Ok(());
301 }
302 let found = self
303 .providers
304 .iter()
305 .find(|p| p.effective_name() == stt.provider);
306 match found {
307 None => {
308 return Err(ConfigError::Validation(format!(
309 "[llm.stt].provider = {:?} does not match any [[llm.providers]] entry",
310 stt.provider
311 )));
312 }
313 Some(entry) if entry.stt_model.is_none() => {
314 tracing::warn!(
315 provider = stt.provider,
316 "[[llm.providers]] entry exists but has no `stt_model` — STT will not be activated"
317 );
318 }
319 _ => {}
320 }
321 Ok(())
322 }
323}
324
325#[derive(Debug, Clone, Deserialize, Serialize)]
326pub struct SttConfig {
327 #[serde(default = "default_stt_provider")]
330 pub provider: String,
331 #[serde(default = "default_stt_language")]
333 pub language: String,
334}
335
336#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
338#[serde(rename_all = "lowercase")]
339pub enum RouterStrategyConfig {
340 #[default]
342 Ema,
343 Thompson,
345 Cascade,
347}
348
349#[derive(Debug, Clone, Deserialize, Serialize)]
351pub struct RouterConfig {
352 #[serde(default)]
354 pub strategy: RouterStrategyConfig,
355 #[serde(default)]
363 pub thompson_state_path: Option<String>,
364 #[serde(default)]
366 pub cascade: Option<CascadeConfig>,
367 #[serde(default)]
369 pub reputation: Option<ReputationConfig>,
370}
371
372#[derive(Debug, Clone, Deserialize, Serialize)]
379pub struct ReputationConfig {
380 #[serde(default)]
382 pub enabled: bool,
383 #[serde(default = "default_reputation_decay_factor")]
386 pub decay_factor: f64,
387 #[serde(default = "default_reputation_weight")]
394 pub weight: f64,
395 #[serde(default = "default_reputation_min_observations")]
397 pub min_observations: u64,
398 #[serde(default)]
400 pub state_path: Option<String>,
401}
402
403#[derive(Debug, Clone, Deserialize, Serialize)]
414pub struct CascadeConfig {
415 #[serde(default = "default_cascade_quality_threshold")]
418 pub quality_threshold: f64,
419
420 #[serde(default = "default_cascade_max_escalations")]
424 pub max_escalations: u8,
425
426 #[serde(default)]
430 pub classifier_mode: CascadeClassifierMode,
431
432 #[serde(default = "default_cascade_window_size")]
434 pub window_size: usize,
435
436 #[serde(default)]
440 pub max_cascade_tokens: Option<u32>,
441
442 #[serde(default, skip_serializing_if = "Option::is_none")]
447 pub cost_tiers: Option<Vec<String>>,
448}
449
450impl Default for CascadeConfig {
451 fn default() -> Self {
452 Self {
453 quality_threshold: default_cascade_quality_threshold(),
454 max_escalations: default_cascade_max_escalations(),
455 classifier_mode: CascadeClassifierMode::default(),
456 window_size: default_cascade_window_size(),
457 max_cascade_tokens: None,
458 cost_tiers: None,
459 }
460 }
461}
462
463#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
465#[serde(rename_all = "lowercase")]
466pub enum CascadeClassifierMode {
467 #[default]
470 Heuristic,
471 Judge,
474}
475
476#[derive(Debug, Deserialize, Serialize)]
477pub struct CandleConfig {
478 #[serde(default = "default_candle_source")]
479 pub source: String,
480 #[serde(default)]
481 pub local_path: String,
482 #[serde(default)]
483 pub filename: Option<String>,
484 #[serde(default = "default_chat_template")]
485 pub chat_template: String,
486 #[serde(default = "default_candle_device")]
487 pub device: String,
488 #[serde(default)]
489 pub embedding_repo: Option<String>,
490 #[serde(default)]
491 pub generation: GenerationParams,
492}
493
494#[derive(Debug, Clone, Deserialize, Serialize)]
495pub struct GenerationParams {
496 #[serde(default = "default_temperature")]
497 pub temperature: f64,
498 #[serde(default)]
499 pub top_p: Option<f64>,
500 #[serde(default)]
501 pub top_k: Option<usize>,
502 #[serde(default = "default_max_tokens")]
503 pub max_tokens: usize,
504 #[serde(default = "default_seed")]
505 pub seed: u64,
506 #[serde(default = "default_repeat_penalty")]
507 pub repeat_penalty: f32,
508 #[serde(default = "default_repeat_last_n")]
509 pub repeat_last_n: usize,
510}
511
512pub const MAX_TOKENS_CAP: usize = 32768;
513
514impl GenerationParams {
515 #[must_use]
516 pub fn capped_max_tokens(&self) -> usize {
517 self.max_tokens.min(MAX_TOKENS_CAP)
518 }
519}
520
521impl Default for GenerationParams {
522 fn default() -> Self {
523 Self {
524 temperature: default_temperature(),
525 top_p: None,
526 top_k: None,
527 max_tokens: default_max_tokens(),
528 seed: default_seed(),
529 repeat_penalty: default_repeat_penalty(),
530 repeat_last_n: default_repeat_last_n(),
531 }
532 }
533}
534
535#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
539#[serde(rename_all = "lowercase")]
540pub enum LlmRoutingStrategy {
541 #[default]
543 None,
544 Ema,
546 Thompson,
548 Cascade,
550 Task,
552 Triage,
554}
555
556fn default_triage_timeout_secs() -> u64 {
557 5
558}
559
560fn default_max_triage_tokens() -> u32 {
561 50
562}
563
564fn default_true() -> bool {
565 true
566}
567
568#[derive(Debug, Clone, Default, Deserialize, Serialize)]
570pub struct TierMapping {
571 pub simple: Option<String>,
572 pub medium: Option<String>,
573 pub complex: Option<String>,
574 pub expert: Option<String>,
575}
576
577#[derive(Debug, Clone, Deserialize, Serialize)]
598pub struct ComplexityRoutingConfig {
599 #[serde(default)]
601 pub triage_provider: Option<String>,
602
603 #[serde(default = "default_true")]
605 pub bypass_single_provider: bool,
606
607 #[serde(default)]
609 pub tiers: TierMapping,
610
611 #[serde(default = "default_max_triage_tokens")]
613 pub max_triage_tokens: u32,
614
615 #[serde(default = "default_triage_timeout_secs")]
618 pub triage_timeout_secs: u64,
619
620 #[serde(default)]
623 pub fallback_strategy: Option<String>,
624}
625
626impl Default for ComplexityRoutingConfig {
627 fn default() -> Self {
628 Self {
629 triage_provider: None,
630 bypass_single_provider: true,
631 tiers: TierMapping::default(),
632 max_triage_tokens: default_max_triage_tokens(),
633 triage_timeout_secs: default_triage_timeout_secs(),
634 fallback_strategy: None,
635 }
636 }
637}
638
639#[derive(Debug, Clone, Deserialize, Serialize)]
642pub struct CandleInlineConfig {
643 #[serde(default = "default_candle_source")]
644 pub source: String,
645 #[serde(default)]
646 pub local_path: String,
647 #[serde(default)]
648 pub filename: Option<String>,
649 #[serde(default = "default_chat_template")]
650 pub chat_template: String,
651 #[serde(default = "default_candle_device")]
652 pub device: String,
653 #[serde(default)]
654 pub embedding_repo: Option<String>,
655 #[serde(default)]
656 pub generation: GenerationParams,
657}
658
659impl Default for CandleInlineConfig {
660 fn default() -> Self {
661 Self {
662 source: default_candle_source(),
663 local_path: String::new(),
664 filename: None,
665 chat_template: default_chat_template(),
666 device: default_candle_device(),
667 embedding_repo: None,
668 generation: GenerationParams::default(),
669 }
670 }
671}
672
673#[derive(Debug, Clone, Deserialize, Serialize)]
679#[allow(clippy::struct_excessive_bools)]
680pub struct ProviderEntry {
681 #[serde(rename = "type")]
683 pub provider_type: ProviderKind,
684
685 #[serde(default)]
687 pub name: Option<String>,
688
689 #[serde(default)]
691 pub model: Option<String>,
692
693 #[serde(default)]
695 pub base_url: Option<String>,
696
697 #[serde(default)]
699 pub max_tokens: Option<u32>,
700
701 #[serde(default)]
703 pub embedding_model: Option<String>,
704
705 #[serde(default)]
708 pub stt_model: Option<String>,
709
710 #[serde(default)]
712 pub embed: bool,
713
714 #[serde(default)]
716 pub default: bool,
717
718 #[serde(default)]
720 pub thinking: Option<ThinkingConfig>,
721 #[serde(default)]
722 pub server_compaction: bool,
723 #[serde(default)]
724 pub enable_extended_context: bool,
725
726 #[serde(default)]
728 pub reasoning_effort: Option<String>,
729
730 #[serde(default)]
732 pub thinking_level: Option<GeminiThinkingLevel>,
733 #[serde(default)]
734 pub thinking_budget: Option<i32>,
735 #[serde(default)]
736 pub include_thoughts: Option<bool>,
737
738 #[serde(default)]
740 pub tool_use: bool,
741
742 #[serde(default)]
744 pub api_key: Option<String>,
745
746 #[serde(default)]
748 pub candle: Option<CandleInlineConfig>,
749
750 #[serde(default)]
752 pub vision_model: Option<String>,
753
754 #[serde(default)]
756 pub instruction_file: Option<std::path::PathBuf>,
757}
758
759impl Default for ProviderEntry {
760 fn default() -> Self {
761 Self {
762 provider_type: ProviderKind::Ollama,
763 name: None,
764 model: None,
765 base_url: None,
766 max_tokens: None,
767 embedding_model: None,
768 stt_model: None,
769 embed: false,
770 default: false,
771 thinking: None,
772 server_compaction: false,
773 enable_extended_context: false,
774 reasoning_effort: None,
775 thinking_level: None,
776 thinking_budget: None,
777 include_thoughts: None,
778 tool_use: false,
779 api_key: None,
780 candle: None,
781 vision_model: None,
782 instruction_file: None,
783 }
784 }
785}
786
787impl ProviderEntry {
788 #[must_use]
790 pub fn effective_name(&self) -> String {
791 self.name
792 .clone()
793 .unwrap_or_else(|| self.provider_type.as_str().to_owned())
794 }
795
796 #[must_use]
801 pub fn effective_model(&self) -> String {
802 if let Some(ref m) = self.model {
803 return m.clone();
804 }
805 match self.provider_type {
806 ProviderKind::Ollama => "qwen3:8b".to_owned(),
807 ProviderKind::Claude => "claude-haiku-4-5-20251001".to_owned(),
808 ProviderKind::OpenAi => "gpt-4o-mini".to_owned(),
809 ProviderKind::Gemini => "gemini-2.0-flash".to_owned(),
810 ProviderKind::Compatible | ProviderKind::Candle => String::new(),
811 }
812 }
813
814 pub fn validate(&self) -> Result<(), crate::error::ConfigError> {
821 use crate::error::ConfigError;
822
823 if self.provider_type == ProviderKind::Compatible && self.name.is_none() {
825 return Err(ConfigError::Validation(
826 "[[llm.providers]] entry with type=\"compatible\" must set `name`".into(),
827 ));
828 }
829
830 match self.provider_type {
832 ProviderKind::Ollama => {
833 if self.thinking.is_some() {
834 tracing::warn!(
835 provider = self.effective_name(),
836 "field `thinking` is only used by Claude providers"
837 );
838 }
839 if self.reasoning_effort.is_some() {
840 tracing::warn!(
841 provider = self.effective_name(),
842 "field `reasoning_effort` is only used by OpenAI providers"
843 );
844 }
845 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
846 tracing::warn!(
847 provider = self.effective_name(),
848 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
849 );
850 }
851 }
852 ProviderKind::Claude => {
853 if self.reasoning_effort.is_some() {
854 tracing::warn!(
855 provider = self.effective_name(),
856 "field `reasoning_effort` is only used by OpenAI providers"
857 );
858 }
859 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
860 tracing::warn!(
861 provider = self.effective_name(),
862 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
863 );
864 }
865 if self.tool_use {
866 tracing::warn!(
867 provider = self.effective_name(),
868 "field `tool_use` is only used by Ollama providers"
869 );
870 }
871 }
872 ProviderKind::OpenAi => {
873 if self.thinking.is_some() {
874 tracing::warn!(
875 provider = self.effective_name(),
876 "field `thinking` is only used by Claude providers"
877 );
878 }
879 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
880 tracing::warn!(
881 provider = self.effective_name(),
882 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
883 );
884 }
885 if self.tool_use {
886 tracing::warn!(
887 provider = self.effective_name(),
888 "field `tool_use` is only used by Ollama providers"
889 );
890 }
891 }
892 ProviderKind::Gemini => {
893 if self.thinking.is_some() {
894 tracing::warn!(
895 provider = self.effective_name(),
896 "field `thinking` is only used by Claude providers"
897 );
898 }
899 if self.reasoning_effort.is_some() {
900 tracing::warn!(
901 provider = self.effective_name(),
902 "field `reasoning_effort` is only used by OpenAI providers"
903 );
904 }
905 if self.tool_use {
906 tracing::warn!(
907 provider = self.effective_name(),
908 "field `tool_use` is only used by Ollama providers"
909 );
910 }
911 }
912 _ => {}
913 }
914
915 if self.stt_model.is_some() && self.provider_type == ProviderKind::Ollama {
918 tracing::warn!(
919 provider = self.effective_name(),
920 "field `stt_model` is set on an Ollama provider; Ollama does not support the \
921 Whisper STT API — use OpenAI, compatible, or candle instead"
922 );
923 }
924
925 Ok(())
926 }
927}
928
929pub fn validate_pool(entries: &[ProviderEntry]) -> Result<(), crate::error::ConfigError> {
939 use crate::error::ConfigError;
940 use std::collections::HashSet;
941
942 if entries.is_empty() {
943 return Err(ConfigError::Validation(
944 "at least one LLM provider must be configured in [[llm.providers]]".into(),
945 ));
946 }
947
948 let default_count = entries.iter().filter(|e| e.default).count();
949 if default_count > 1 {
950 return Err(ConfigError::Validation(
951 "only one [[llm.providers]] entry can be marked `default = true`".into(),
952 ));
953 }
954
955 let mut seen_names: HashSet<String> = HashSet::new();
956 for entry in entries {
957 let name = entry.effective_name();
958 if !seen_names.insert(name.clone()) {
959 return Err(ConfigError::Validation(format!(
960 "duplicate provider name \"{name}\" in [[llm.providers]]"
961 )));
962 }
963 entry.validate()?;
964 }
965
966 Ok(())
967}
968
969#[cfg(test)]
970mod tests {
971 use super::*;
972
973 fn ollama_entry() -> ProviderEntry {
974 ProviderEntry {
975 provider_type: ProviderKind::Ollama,
976 name: Some("ollama".into()),
977 model: Some("qwen3:8b".into()),
978 ..Default::default()
979 }
980 }
981
982 fn claude_entry() -> ProviderEntry {
983 ProviderEntry {
984 provider_type: ProviderKind::Claude,
985 name: Some("claude".into()),
986 model: Some("claude-sonnet-4-6".into()),
987 max_tokens: Some(8192),
988 ..Default::default()
989 }
990 }
991
992 #[test]
995 fn validate_ollama_valid() {
996 assert!(ollama_entry().validate().is_ok());
997 }
998
999 #[test]
1000 fn validate_claude_valid() {
1001 assert!(claude_entry().validate().is_ok());
1002 }
1003
1004 #[test]
1005 fn validate_compatible_without_name_errors() {
1006 let entry = ProviderEntry {
1007 provider_type: ProviderKind::Compatible,
1008 name: None,
1009 ..Default::default()
1010 };
1011 let err = entry.validate().unwrap_err();
1012 assert!(
1013 err.to_string().contains("compatible"),
1014 "error should mention compatible: {err}"
1015 );
1016 }
1017
1018 #[test]
1019 fn validate_compatible_with_name_ok() {
1020 let entry = ProviderEntry {
1021 provider_type: ProviderKind::Compatible,
1022 name: Some("my-proxy".into()),
1023 base_url: Some("http://localhost:8080".into()),
1024 model: Some("gpt-4o".into()),
1025 max_tokens: Some(4096),
1026 ..Default::default()
1027 };
1028 assert!(entry.validate().is_ok());
1029 }
1030
1031 #[test]
1032 fn validate_openai_valid() {
1033 let entry = ProviderEntry {
1034 provider_type: ProviderKind::OpenAi,
1035 name: Some("openai".into()),
1036 model: Some("gpt-4o".into()),
1037 max_tokens: Some(4096),
1038 ..Default::default()
1039 };
1040 assert!(entry.validate().is_ok());
1041 }
1042
1043 #[test]
1044 fn validate_gemini_valid() {
1045 let entry = ProviderEntry {
1046 provider_type: ProviderKind::Gemini,
1047 name: Some("gemini".into()),
1048 model: Some("gemini-2.0-flash".into()),
1049 ..Default::default()
1050 };
1051 assert!(entry.validate().is_ok());
1052 }
1053
1054 #[test]
1057 fn validate_pool_empty_errors() {
1058 let err = validate_pool(&[]).unwrap_err();
1059 assert!(err.to_string().contains("at least one"), "{err}");
1060 }
1061
1062 #[test]
1063 fn validate_pool_single_entry_ok() {
1064 assert!(validate_pool(&[ollama_entry()]).is_ok());
1065 }
1066
1067 #[test]
1068 fn validate_pool_duplicate_names_errors() {
1069 let a = ollama_entry();
1070 let b = ollama_entry(); let err = validate_pool(&[a, b]).unwrap_err();
1072 assert!(err.to_string().contains("duplicate"), "{err}");
1073 }
1074
1075 #[test]
1076 fn validate_pool_multiple_defaults_errors() {
1077 let mut a = ollama_entry();
1078 let mut b = claude_entry();
1079 a.default = true;
1080 b.default = true;
1081 let err = validate_pool(&[a, b]).unwrap_err();
1082 assert!(err.to_string().contains("default"), "{err}");
1083 }
1084
1085 #[test]
1086 fn validate_pool_two_different_providers_ok() {
1087 assert!(validate_pool(&[ollama_entry(), claude_entry()]).is_ok());
1088 }
1089
1090 #[test]
1091 fn validate_pool_propagates_entry_error() {
1092 let bad = ProviderEntry {
1093 provider_type: ProviderKind::Compatible,
1094 name: None, ..Default::default()
1096 };
1097 assert!(validate_pool(&[bad]).is_err());
1098 }
1099
1100 #[test]
1103 fn effective_model_returns_explicit_when_set() {
1104 let entry = ProviderEntry {
1105 provider_type: ProviderKind::Claude,
1106 model: Some("claude-sonnet-4-6".into()),
1107 ..Default::default()
1108 };
1109 assert_eq!(entry.effective_model(), "claude-sonnet-4-6");
1110 }
1111
1112 #[test]
1113 fn effective_model_ollama_default_when_none() {
1114 let entry = ProviderEntry {
1115 provider_type: ProviderKind::Ollama,
1116 model: None,
1117 ..Default::default()
1118 };
1119 assert_eq!(entry.effective_model(), "qwen3:8b");
1120 }
1121
1122 #[test]
1123 fn effective_model_claude_default_when_none() {
1124 let entry = ProviderEntry {
1125 provider_type: ProviderKind::Claude,
1126 model: None,
1127 ..Default::default()
1128 };
1129 assert_eq!(entry.effective_model(), "claude-haiku-4-5-20251001");
1130 }
1131
1132 #[test]
1133 fn effective_model_openai_default_when_none() {
1134 let entry = ProviderEntry {
1135 provider_type: ProviderKind::OpenAi,
1136 model: None,
1137 ..Default::default()
1138 };
1139 assert_eq!(entry.effective_model(), "gpt-4o-mini");
1140 }
1141
1142 #[test]
1143 fn effective_model_gemini_default_when_none() {
1144 let entry = ProviderEntry {
1145 provider_type: ProviderKind::Gemini,
1146 model: None,
1147 ..Default::default()
1148 };
1149 assert_eq!(entry.effective_model(), "gemini-2.0-flash");
1150 }
1151
1152 fn parse_llm(toml: &str) -> LlmConfig {
1156 #[derive(serde::Deserialize)]
1157 struct Wrapper {
1158 llm: LlmConfig,
1159 }
1160 toml::from_str::<Wrapper>(toml).unwrap().llm
1161 }
1162
1163 #[test]
1164 fn check_legacy_format_new_format_ok() {
1165 let cfg = parse_llm(
1166 r#"
1167[llm]
1168
1169[[llm.providers]]
1170type = "ollama"
1171model = "qwen3:8b"
1172"#,
1173 );
1174 assert!(cfg.check_legacy_format().is_ok());
1175 }
1176
1177 #[test]
1178 fn check_legacy_format_empty_providers_no_legacy_ok() {
1179 let cfg = parse_llm("[llm]\n");
1181 assert!(cfg.check_legacy_format().is_ok());
1182 }
1183
1184 #[test]
1187 fn effective_provider_falls_back_to_ollama_when_no_providers() {
1188 let cfg = parse_llm("[llm]\n");
1189 assert_eq!(cfg.effective_provider(), ProviderKind::Ollama);
1190 }
1191
1192 #[test]
1193 fn effective_provider_reads_from_providers_first() {
1194 let cfg = parse_llm(
1195 r#"
1196[llm]
1197
1198[[llm.providers]]
1199type = "claude"
1200model = "claude-sonnet-4-6"
1201"#,
1202 );
1203 assert_eq!(cfg.effective_provider(), ProviderKind::Claude);
1204 }
1205
1206 #[test]
1207 fn effective_model_reads_from_providers_first() {
1208 let cfg = parse_llm(
1209 r#"
1210[llm]
1211
1212[[llm.providers]]
1213type = "ollama"
1214model = "qwen3:8b"
1215"#,
1216 );
1217 assert_eq!(cfg.effective_model(), "qwen3:8b");
1218 }
1219
1220 #[test]
1221 fn effective_base_url_default_when_absent() {
1222 let cfg = parse_llm("[llm]\n");
1223 assert_eq!(cfg.effective_base_url(), "http://localhost:11434");
1224 }
1225
1226 #[test]
1227 fn effective_base_url_from_providers_entry() {
1228 let cfg = parse_llm(
1229 r#"
1230[llm]
1231
1232[[llm.providers]]
1233type = "ollama"
1234base_url = "http://myhost:11434"
1235"#,
1236 );
1237 assert_eq!(cfg.effective_base_url(), "http://myhost:11434");
1238 }
1239
1240 #[test]
1243 fn complexity_routing_defaults() {
1244 let cr = ComplexityRoutingConfig::default();
1245 assert!(
1246 cr.bypass_single_provider,
1247 "bypass_single_provider must default to true"
1248 );
1249 assert_eq!(cr.triage_timeout_secs, 5);
1250 assert_eq!(cr.max_triage_tokens, 50);
1251 assert!(cr.triage_provider.is_none());
1252 assert!(cr.tiers.simple.is_none());
1253 }
1254
1255 #[test]
1256 fn complexity_routing_toml_round_trip() {
1257 let cfg = parse_llm(
1258 r#"
1259[llm]
1260routing = "triage"
1261
1262[llm.complexity_routing]
1263triage_provider = "fast"
1264bypass_single_provider = false
1265triage_timeout_secs = 10
1266max_triage_tokens = 100
1267
1268[llm.complexity_routing.tiers]
1269simple = "fast"
1270medium = "medium"
1271complex = "large"
1272expert = "opus"
1273"#,
1274 );
1275 assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
1276 let cr = cfg
1277 .complexity_routing
1278 .expect("complexity_routing must be present");
1279 assert_eq!(cr.triage_provider.as_deref(), Some("fast"));
1280 assert!(!cr.bypass_single_provider);
1281 assert_eq!(cr.triage_timeout_secs, 10);
1282 assert_eq!(cr.max_triage_tokens, 100);
1283 assert_eq!(cr.tiers.simple.as_deref(), Some("fast"));
1284 assert_eq!(cr.tiers.medium.as_deref(), Some("medium"));
1285 assert_eq!(cr.tiers.complex.as_deref(), Some("large"));
1286 assert_eq!(cr.tiers.expert.as_deref(), Some("opus"));
1287 }
1288
1289 #[test]
1290 fn complexity_routing_partial_tiers_toml() {
1291 let cfg = parse_llm(
1293 r#"
1294[llm]
1295routing = "triage"
1296
1297[llm.complexity_routing.tiers]
1298simple = "haiku"
1299complex = "sonnet"
1300"#,
1301 );
1302 let cr = cfg
1303 .complexity_routing
1304 .expect("complexity_routing must be present");
1305 assert_eq!(cr.tiers.simple.as_deref(), Some("haiku"));
1306 assert!(cr.tiers.medium.is_none());
1307 assert_eq!(cr.tiers.complex.as_deref(), Some("sonnet"));
1308 assert!(cr.tiers.expert.is_none());
1309 assert!(cr.bypass_single_provider);
1311 assert_eq!(cr.triage_timeout_secs, 5);
1312 }
1313
1314 #[test]
1315 fn routing_strategy_triage_deserialized() {
1316 let cfg = parse_llm(
1317 r#"
1318[llm]
1319routing = "triage"
1320"#,
1321 );
1322 assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
1323 }
1324
1325 #[test]
1328 fn stt_provider_entry_by_name_match() {
1329 let cfg = parse_llm(
1330 r#"
1331[llm]
1332
1333[[llm.providers]]
1334type = "openai"
1335name = "quality"
1336model = "gpt-5.4"
1337stt_model = "gpt-4o-mini-transcribe"
1338
1339[llm.stt]
1340provider = "quality"
1341"#,
1342 );
1343 let entry = cfg.stt_provider_entry().expect("should find stt provider");
1344 assert_eq!(entry.effective_name(), "quality");
1345 assert_eq!(entry.stt_model.as_deref(), Some("gpt-4o-mini-transcribe"));
1346 }
1347
1348 #[test]
1349 fn stt_provider_entry_auto_detect_when_provider_empty() {
1350 let cfg = parse_llm(
1351 r#"
1352[llm]
1353
1354[[llm.providers]]
1355type = "openai"
1356name = "openai-stt"
1357stt_model = "whisper-1"
1358
1359[llm.stt]
1360provider = ""
1361"#,
1362 );
1363 let entry = cfg.stt_provider_entry().expect("should auto-detect");
1364 assert_eq!(entry.effective_name(), "openai-stt");
1365 }
1366
1367 #[test]
1368 fn stt_provider_entry_auto_detect_no_stt_section() {
1369 let cfg = parse_llm(
1370 r#"
1371[llm]
1372
1373[[llm.providers]]
1374type = "openai"
1375name = "openai-stt"
1376stt_model = "whisper-1"
1377"#,
1378 );
1379 let entry = cfg.stt_provider_entry().expect("should auto-detect");
1381 assert_eq!(entry.effective_name(), "openai-stt");
1382 }
1383
1384 #[test]
1385 fn stt_provider_entry_none_when_no_stt_model() {
1386 let cfg = parse_llm(
1387 r#"
1388[llm]
1389
1390[[llm.providers]]
1391type = "openai"
1392name = "quality"
1393model = "gpt-5.4"
1394"#,
1395 );
1396 assert!(cfg.stt_provider_entry().is_none());
1397 }
1398
1399 #[test]
1400 fn stt_provider_entry_name_mismatch_falls_back_to_none() {
1401 let cfg = parse_llm(
1403 r#"
1404[llm]
1405
1406[[llm.providers]]
1407type = "openai"
1408name = "quality"
1409model = "gpt-5.4"
1410
1411[[llm.providers]]
1412type = "openai"
1413name = "openai-stt"
1414stt_model = "whisper-1"
1415
1416[llm.stt]
1417provider = "quality"
1418"#,
1419 );
1420 assert!(cfg.stt_provider_entry().is_none());
1422 }
1423
1424 #[test]
1425 fn stt_config_deserializes_new_slim_format() {
1426 let cfg = parse_llm(
1427 r#"
1428[llm]
1429
1430[[llm.providers]]
1431type = "openai"
1432name = "quality"
1433stt_model = "whisper-1"
1434
1435[llm.stt]
1436provider = "quality"
1437language = "en"
1438"#,
1439 );
1440 let stt = cfg.stt.as_ref().expect("stt section present");
1441 assert_eq!(stt.provider, "quality");
1442 assert_eq!(stt.language, "en");
1443 }
1444
1445 #[test]
1446 fn stt_config_default_provider_is_empty() {
1447 assert_eq!(default_stt_provider(), "");
1449 }
1450
1451 #[test]
1452 fn validate_stt_missing_provider_ok() {
1453 let cfg = parse_llm("[llm]\n");
1454 assert!(cfg.validate_stt().is_ok());
1455 }
1456
1457 #[test]
1458 fn validate_stt_valid_reference() {
1459 let cfg = parse_llm(
1460 r#"
1461[llm]
1462
1463[[llm.providers]]
1464type = "openai"
1465name = "quality"
1466stt_model = "whisper-1"
1467
1468[llm.stt]
1469provider = "quality"
1470"#,
1471 );
1472 assert!(cfg.validate_stt().is_ok());
1473 }
1474
1475 #[test]
1476 fn validate_stt_nonexistent_provider_errors() {
1477 let cfg = parse_llm(
1478 r#"
1479[llm]
1480
1481[[llm.providers]]
1482type = "openai"
1483name = "quality"
1484model = "gpt-5.4"
1485
1486[llm.stt]
1487provider = "nonexistent"
1488"#,
1489 );
1490 assert!(cfg.validate_stt().is_err());
1491 }
1492
1493 #[test]
1494 fn validate_stt_provider_exists_but_no_stt_model_returns_ok_with_warn() {
1495 let cfg = parse_llm(
1497 r#"
1498[llm]
1499
1500[[llm.providers]]
1501type = "openai"
1502name = "quality"
1503model = "gpt-5.4"
1504
1505[llm.stt]
1506provider = "quality"
1507"#,
1508 );
1509 assert!(cfg.validate_stt().is_ok());
1511 assert!(
1513 cfg.stt_provider_entry().is_none(),
1514 "stt_provider_entry must be None when provider has no stt_model"
1515 );
1516 }
1517}