1use serde::{Deserialize, Serialize};
5
6#[non_exhaustive]
9#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
14#[serde(tag = "mode", rename_all = "snake_case")]
15pub enum ThinkingConfig {
16 Extended {
18 budget_tokens: u32,
20 },
21 Adaptive {
23 #[serde(default, skip_serializing_if = "Option::is_none")]
25 effort: Option<ThinkingEffort>,
26 },
27}
28
29#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
31#[serde(rename_all = "lowercase")]
32#[non_exhaustive]
33pub enum ThinkingEffort {
34 Low,
36 #[default]
38 Medium,
39 High,
41}
42
43#[non_exhaustive]
44#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Default)]
50#[serde(rename_all = "snake_case")]
51pub enum CacheTtl {
52 #[default]
54 Ephemeral,
55 #[serde(rename = "1h")]
58 OneHour,
59}
60
61impl CacheTtl {
62 #[must_use]
65 pub fn requires_beta(self) -> bool {
66 match self {
67 Self::OneHour => true,
68 Self::Ephemeral => false,
69 }
70 }
71}
72
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
78#[serde(rename_all = "lowercase")]
79#[non_exhaustive]
80pub enum GeminiThinkingLevel {
81 Minimal,
83 Low,
85 Medium,
87 High,
89}
90
91pub use zeph_common::ProviderName;
92
93fn default_response_cache_ttl_secs() -> u64 {
94 3600
95}
96
97fn default_semantic_cache_threshold() -> f32 {
98 0.95
99}
100
101fn default_semantic_cache_max_candidates() -> u32 {
102 10
103}
104
105fn default_router_ema_alpha() -> f64 {
106 0.1
107}
108
109fn default_router_reorder_interval() -> u64 {
110 10
111}
112
113fn default_embedding_model() -> String {
114 "qwen3-embedding".into()
115}
116
117fn default_candle_source() -> String {
118 "huggingface".into()
119}
120
121fn default_chat_template() -> String {
122 "chatml".into()
123}
124
125fn default_candle_device() -> String {
126 "cpu".into()
127}
128
129fn default_temperature() -> f64 {
130 0.7
131}
132
133fn default_max_tokens() -> usize {
134 2048
135}
136
137fn default_seed() -> u64 {
138 42
139}
140
141fn default_repeat_penalty() -> f32 {
142 1.1
143}
144
145fn default_repeat_last_n() -> usize {
146 64
147}
148
149fn default_cascade_quality_threshold() -> f64 {
150 0.5
151}
152
153fn default_cascade_max_escalations() -> u8 {
154 2
155}
156
157fn default_cascade_window_size() -> usize {
158 50
159}
160
161fn default_cascade_judge_timeout_ms() -> u64 {
162 5_000
163}
164
165fn default_reputation_decay_factor() -> f64 {
166 0.95
167}
168
169fn default_reputation_weight() -> f64 {
170 0.3
171}
172
173fn default_reputation_min_observations() -> u64 {
174 5
175}
176
177#[must_use]
179pub fn default_stt_provider() -> String {
180 String::new()
181}
182
183#[must_use]
185pub fn default_stt_language() -> String {
186 "auto".into()
187}
188
189#[must_use]
191pub(crate) fn get_default_embedding_model() -> String {
192 default_embedding_model()
193}
194
195#[must_use]
197pub(crate) fn get_default_response_cache_ttl_secs() -> u64 {
198 default_response_cache_ttl_secs()
199}
200
201#[must_use]
203pub(crate) fn get_default_router_ema_alpha() -> f64 {
204 default_router_ema_alpha()
205}
206
207#[must_use]
209pub(crate) fn get_default_router_reorder_interval() -> u64 {
210 default_router_reorder_interval()
211}
212
213#[non_exhaustive]
226#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
227#[serde(rename_all = "lowercase")]
228pub enum ProviderKind {
229 Ollama,
231 Claude,
233 OpenAi,
235 Gemini,
237 Candle,
239 Compatible,
241 Gonka,
243 Cocoon,
245}
246
247impl ProviderKind {
248 #[must_use]
259 pub fn as_str(self) -> &'static str {
260 match self {
261 Self::Ollama => "ollama",
262 Self::Claude => "claude",
263 Self::OpenAi => "openai",
264 Self::Gemini => "gemini",
265 Self::Candle => "candle",
266 Self::Compatible => "compatible",
267 Self::Gonka => "gonka",
268 Self::Cocoon => "cocoon",
269 }
270 }
271}
272
273impl std::fmt::Display for ProviderKind {
274 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
275 f.write_str(self.as_str())
276 }
277}
278
279fn default_max_tool_json_bytes() -> usize {
280 4 * 1024 * 1024
281}
282
283fn default_max_thinking_bytes() -> usize {
284 1024 * 1024
285}
286
287fn default_max_compaction_bytes() -> usize {
288 32 * 1024
289}
290
291fn stream_limits_is_default(v: &StreamLimits) -> bool {
292 v.max_tool_json_bytes == default_max_tool_json_bytes()
293 && v.max_thinking_bytes == default_max_thinking_bytes()
294 && v.max_compaction_bytes == default_max_compaction_bytes()
295}
296
297#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
313pub struct StreamLimits {
314 #[serde(default = "default_max_tool_json_bytes")]
316 pub max_tool_json_bytes: usize,
317
318 #[serde(default = "default_max_thinking_bytes")]
320 pub max_thinking_bytes: usize,
321
322 #[serde(default = "default_max_compaction_bytes")]
324 pub max_compaction_bytes: usize,
325}
326
327impl Default for StreamLimits {
328 fn default() -> Self {
329 Self {
330 max_tool_json_bytes: default_max_tool_json_bytes(),
331 max_thinking_bytes: default_max_thinking_bytes(),
332 max_compaction_bytes: default_max_compaction_bytes(),
333 }
334 }
335}
336
337#[derive(Debug, Deserialize, Serialize)]
361pub struct LlmConfig {
362 #[serde(default, skip_serializing_if = "Vec::is_empty")]
364 pub providers: Vec<ProviderEntry>,
365
366 #[serde(default, skip_serializing_if = "is_routing_none")]
368 pub routing: LlmRoutingStrategy,
369
370 #[serde(default = "default_embedding_model_opt")]
371 pub embedding_model: String,
372 #[serde(default, skip_serializing_if = "Option::is_none")]
373 pub candle: Option<CandleConfig>,
374 #[serde(default)]
375 pub stt: Option<SttConfig>,
376 #[serde(default)]
377 pub response_cache_enabled: bool,
378 #[serde(default = "default_response_cache_ttl_secs")]
379 pub response_cache_ttl_secs: u64,
380 #[serde(default)]
382 pub semantic_cache_enabled: bool,
383 #[serde(default = "default_semantic_cache_threshold")]
389 pub semantic_cache_threshold: f32,
390 #[serde(default = "default_semantic_cache_max_candidates")]
403 pub semantic_cache_max_candidates: u32,
404 #[serde(default)]
405 pub router_ema_enabled: bool,
406 #[serde(default = "default_router_ema_alpha")]
407 pub router_ema_alpha: f64,
408 #[serde(default = "default_router_reorder_interval")]
409 pub router_reorder_interval: u64,
410 #[serde(default, skip_serializing_if = "Option::is_none")]
412 pub router: Option<RouterConfig>,
413 #[serde(default, skip_serializing_if = "Option::is_none")]
416 pub instruction_file: Option<std::path::PathBuf>,
417 #[serde(default, skip_serializing_if = "Option::is_none")]
421 pub summary_model: Option<String>,
422 #[serde(default, skip_serializing_if = "Option::is_none")]
424 pub summary_provider: Option<ProviderEntry>,
425
426 #[serde(default, skip_serializing_if = "Option::is_none")]
428 pub complexity_routing: Option<ComplexityRoutingConfig>,
429
430 #[serde(default, skip_serializing_if = "Option::is_none")]
432 pub coe: Option<CoeConfig>,
433
434 #[serde(default, skip_serializing_if = "stream_limits_is_default")]
440 pub stream_limits: StreamLimits,
441}
442
443fn default_embedding_model_opt() -> String {
444 default_embedding_model()
445}
446
447impl Default for LlmConfig {
448 fn default() -> Self {
449 toml::from_str("").expect("empty TOML produces valid LlmConfig defaults")
450 }
451}
452
453#[allow(clippy::trivially_copy_pass_by_ref)]
454fn is_routing_none(s: &LlmRoutingStrategy) -> bool {
455 *s == LlmRoutingStrategy::None
456}
457
458impl LlmConfig {
459 #[must_use]
461 pub fn effective_provider(&self) -> ProviderKind {
462 self.providers
463 .first()
464 .map_or(ProviderKind::Ollama, |e| e.provider_type)
465 }
466
467 #[must_use]
469 pub fn effective_base_url(&self) -> &str {
470 self.providers
471 .first()
472 .and_then(|e| e.base_url.as_deref())
473 .unwrap_or("http://localhost:11434")
474 }
475
476 #[must_use]
482 pub fn effective_model(&self) -> &str {
483 self.providers
484 .iter()
485 .find(|e| !e.embed)
486 .and_then(|e| e.model.as_deref())
487 .unwrap_or("qwen3:8b")
488 }
489
490 #[must_use]
498 pub fn stt_provider_entry(&self) -> Option<&ProviderEntry> {
499 let name_hint = self.stt.as_ref().map_or("", |s| s.provider.as_str());
500 if name_hint.is_empty() {
501 self.providers.iter().find(|p| p.stt_model.is_some())
502 } else {
503 self.providers
504 .iter()
505 .find(|p| p.effective_name() == name_hint && p.stt_model.is_some())
506 }
507 }
508
509 #[must_use]
525 pub fn effective_embedding_model(&self) -> String {
526 if let Some(m) = self
527 .providers
528 .iter()
529 .find(|e| e.embed)
530 .and_then(|e| e.embedding_model.as_ref())
531 {
532 return m.clone();
533 }
534 if let Some(m) = self
535 .providers
536 .first()
537 .and_then(|e| e.embedding_model.as_ref())
538 {
539 return m.clone();
540 }
541 self.embedding_model.clone()
542 }
543
544 #[must_use]
561 pub fn stable_skill_embedding_model(&self) -> String {
562 let embed_entry = self
563 .providers
564 .iter()
565 .find(|e| e.embed)
566 .or_else(|| self.providers.iter().find(|e| e.embedding_model.is_some()));
567
568 if let Some(entry) = embed_entry {
569 if let Some(em) = entry.embedding_model.as_ref().filter(|s| !s.is_empty()) {
570 return em.clone();
571 }
572 if let Some(m) = entry.model.as_ref().filter(|s| !s.is_empty()) {
573 return m.clone();
574 }
575 }
576
577 self.effective_embedding_model()
578 }
579
580 pub fn check_legacy_format(&self) -> Result<(), crate::error::ConfigError> {
586 Ok(())
587 }
588
589 pub fn validate_stt(&self) -> Result<(), crate::error::ConfigError> {
595 use crate::error::ConfigError;
596
597 let Some(stt) = &self.stt else {
598 return Ok(());
599 };
600 if stt.provider.is_empty() {
601 return Ok(());
602 }
603 let found = self
604 .providers
605 .iter()
606 .find(|p| p.effective_name() == stt.provider);
607 match found {
608 None => {
609 return Err(ConfigError::Validation(format!(
610 "[llm.stt].provider = {:?} does not match any [[llm.providers]] entry",
611 stt.provider
612 )));
613 }
614 Some(entry) if entry.stt_model.is_none() => {
615 tracing::warn!(
616 provider = stt.provider,
617 "[[llm.providers]] entry exists but has no `stt_model` — STT will not be activated"
618 );
619 }
620 _ => {}
621 }
622 Ok(())
623 }
624
625 pub fn warn_non_fast_tier_provider(
649 &self,
650 provider_name: &ProviderName,
651 feature_label: &str,
652 extra_allowlist: &[String],
653 ) {
654 if provider_name.is_empty() {
655 return;
656 }
657 let name = provider_name.as_str();
658 let Some(entry) = self.providers.iter().find(|p| p.effective_name() == name) else {
659 tracing::warn!(
660 provider = name,
661 "{feature_label} provider '{name}' not found in [[llm.providers]]"
662 );
663 return;
664 };
665 let model = entry.model.as_deref().unwrap_or("");
666 if model.is_empty() {
667 return;
668 }
669 let lower = model.to_lowercase();
670 let in_hints = FAST_TIER_MODEL_HINTS.iter().any(|h| lower.contains(h));
671 let in_extra = extra_allowlist.iter().any(|h| lower.contains(h.as_str()));
672 if !in_hints && !in_extra {
673 tracing::warn!(
674 provider = name,
675 actual = model,
676 "{feature_label} provider '{name}' uses model '{model}' \
677 which may not be fast-tier; prefer a fast model to bound distillation cost"
678 );
679 }
680 }
681}
682
683pub const FAST_TIER_MODEL_HINTS: &[&str] = &[
688 "gpt-4o-mini",
689 "gpt-4.1-mini",
690 "gpt-5-mini",
691 "gpt-5-nano",
692 "claude-haiku",
693 "claude-3-haiku",
694 "claude-3-5-haiku",
695 "qwen3:8b",
696 "qwen2.5:7b",
697 "qwen2:7b",
698 "llama3.2:3b",
699 "llama3.1:8b",
700 "gemma3:4b",
701 "gemma3:8b",
702 "phi4:mini",
703 "mistral:7b",
704];
705
706#[derive(Debug, Clone, Deserialize, Serialize)]
719pub struct SttConfig {
720 #[serde(default = "default_stt_provider")]
723 pub provider: String,
724 #[serde(default = "default_stt_language")]
726 pub language: String,
727}
728
729#[non_exhaustive]
731#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
732#[serde(rename_all = "lowercase")]
733pub enum RouterStrategyConfig {
734 #[default]
736 Ema,
737 Thompson,
739 Cascade,
741 Bandit,
743}
744
745#[derive(Debug, Clone, Deserialize, Serialize)]
758pub struct AsiConfig {
759 #[serde(default)]
761 pub enabled: bool,
762
763 #[serde(default = "default_asi_window")]
765 pub window: usize,
766
767 #[serde(default = "default_asi_coherence_threshold")]
769 pub coherence_threshold: f32,
770
771 #[serde(default = "default_asi_penalty_weight")]
776 pub penalty_weight: f32,
777}
778
779fn default_asi_window() -> usize {
780 5
781}
782
783fn default_asi_coherence_threshold() -> f32 {
784 0.7
785}
786
787fn default_asi_penalty_weight() -> f32 {
788 0.3
789}
790
791impl Default for AsiConfig {
792 fn default() -> Self {
793 Self {
794 enabled: false,
795 window: default_asi_window(),
796 coherence_threshold: default_asi_coherence_threshold(),
797 penalty_weight: default_asi_penalty_weight(),
798 }
799 }
800}
801
802#[derive(Debug, Clone, Deserialize, Serialize)]
804pub struct RouterConfig {
805 #[serde(default)]
807 pub strategy: RouterStrategyConfig,
808 #[serde(default)]
816 pub thompson_state_path: Option<String>,
817 #[serde(default)]
819 pub cascade: Option<CascadeConfig>,
820 #[serde(default)]
822 pub reputation: Option<ReputationConfig>,
823 #[serde(default)]
825 pub bandit: Option<BanditConfig>,
826 #[serde(default)]
835 pub quality_gate: Option<f32>,
836 #[serde(default)]
838 pub asi: Option<AsiConfig>,
839 #[serde(default = "default_embed_concurrency")]
845 pub embed_concurrency: usize,
846}
847
848fn default_embed_concurrency() -> usize {
849 4
850}
851
852#[derive(Debug, Clone, Deserialize, Serialize)]
859pub struct ReputationConfig {
860 #[serde(default)]
862 pub enabled: bool,
863 #[serde(default = "default_reputation_decay_factor")]
866 pub decay_factor: f64,
867 #[serde(default = "default_reputation_weight")]
874 pub weight: f64,
875 #[serde(default = "default_reputation_min_observations")]
877 pub min_observations: u64,
878 #[serde(default)]
880 pub state_path: Option<String>,
881}
882
883#[derive(Debug, Clone, Deserialize, Serialize)]
894pub struct CascadeConfig {
895 #[serde(default = "default_cascade_quality_threshold")]
898 pub quality_threshold: f64,
899
900 #[serde(default = "default_cascade_max_escalations")]
904 pub max_escalations: u8,
905
906 #[serde(default)]
910 pub classifier_mode: CascadeClassifierMode,
911
912 #[serde(default = "default_cascade_window_size")]
914 pub window_size: usize,
915
916 #[serde(default)]
920 pub max_cascade_tokens: Option<u32>,
921
922 #[serde(default, skip_serializing_if = "Option::is_none")]
927 pub cost_tiers: Option<Vec<String>>,
928
929 #[serde(default = "default_cascade_judge_timeout_ms")]
933 pub judge_timeout_ms: u64,
934}
935
936impl Default for CascadeConfig {
937 fn default() -> Self {
938 Self {
939 quality_threshold: default_cascade_quality_threshold(),
940 max_escalations: default_cascade_max_escalations(),
941 classifier_mode: CascadeClassifierMode::default(),
942 window_size: default_cascade_window_size(),
943 max_cascade_tokens: None,
944 cost_tiers: None,
945 judge_timeout_ms: default_cascade_judge_timeout_ms(),
946 }
947 }
948}
949
950#[non_exhaustive]
952#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
953#[serde(rename_all = "lowercase")]
954pub enum CascadeClassifierMode {
955 #[default]
958 Heuristic,
959 Judge,
962}
963
964fn default_bandit_alpha() -> f32 {
965 1.0
966}
967
968fn default_bandit_dim() -> usize {
969 32
970}
971
972fn default_bandit_cost_weight() -> f32 {
973 0.1
974}
975
976fn default_bandit_decay_factor() -> f32 {
977 1.0
978}
979
980fn default_bandit_embedding_timeout_ms() -> u64 {
981 50
982}
983
984fn default_bandit_cache_size() -> usize {
985 512
986}
987
988#[derive(Debug, Clone, Deserialize, Serialize)]
1001pub struct BanditConfig {
1002 #[serde(default = "default_bandit_alpha")]
1005 pub alpha: f32,
1006
1007 #[serde(default = "default_bandit_dim")]
1014 pub dim: usize,
1015
1016 #[serde(default = "default_bandit_cost_weight")]
1019 pub cost_weight: f32,
1020
1021 #[serde(default = "default_bandit_decay_factor")]
1024 pub decay_factor: f32,
1025
1026 #[serde(default)]
1032 pub embedding_provider: ProviderName,
1033
1034 #[serde(default = "default_bandit_embedding_timeout_ms")]
1037 pub embedding_timeout_ms: u64,
1038
1039 #[serde(default = "default_bandit_cache_size")]
1041 pub cache_size: usize,
1042
1043 #[serde(default)]
1050 pub state_path: Option<String>,
1051
1052 #[serde(default = "default_bandit_memory_confidence_threshold")]
1058 pub memory_confidence_threshold: f32,
1059
1060 #[serde(default)]
1066 pub warmup_queries: Option<u64>,
1067}
1068
1069fn default_bandit_memory_confidence_threshold() -> f32 {
1070 0.9
1071}
1072
1073impl Default for BanditConfig {
1074 fn default() -> Self {
1075 Self {
1076 alpha: default_bandit_alpha(),
1077 dim: default_bandit_dim(),
1078 cost_weight: default_bandit_cost_weight(),
1079 decay_factor: default_bandit_decay_factor(),
1080 embedding_provider: ProviderName::default(),
1081 embedding_timeout_ms: default_bandit_embedding_timeout_ms(),
1082 cache_size: default_bandit_cache_size(),
1083 state_path: None,
1084 memory_confidence_threshold: default_bandit_memory_confidence_threshold(),
1085 warmup_queries: None,
1086 }
1087 }
1088}
1089
1090#[derive(Debug, Deserialize, Serialize)]
1091pub struct CandleConfig {
1092 #[serde(default = "default_candle_source")]
1093 pub source: String,
1094 #[serde(default)]
1095 pub local_path: String,
1096 #[serde(default)]
1097 pub filename: Option<String>,
1098 #[serde(default = "default_chat_template")]
1099 pub chat_template: String,
1100 #[serde(default = "default_candle_device")]
1101 pub device: String,
1102 #[serde(default)]
1103 pub embedding_repo: Option<String>,
1104 #[serde(default)]
1108 pub hf_token: Option<String>,
1109 #[serde(default)]
1110 pub generation: GenerationParams,
1111 #[serde(default = "default_inference_timeout_secs")]
1120 pub inference_timeout_secs: u64,
1121}
1122
1123fn default_inference_timeout_secs() -> u64 {
1124 120
1125}
1126
1127#[derive(Debug, Clone, Deserialize, Serialize)]
1131pub struct GenerationParams {
1132 #[serde(default = "default_temperature")]
1134 pub temperature: f64,
1135 #[serde(default)]
1138 pub top_p: Option<f64>,
1139 #[serde(default)]
1142 pub top_k: Option<usize>,
1143 #[serde(default = "default_max_tokens")]
1146 pub max_tokens: usize,
1147 #[serde(default = "default_seed")]
1149 pub seed: u64,
1150 #[serde(default = "default_repeat_penalty")]
1152 pub repeat_penalty: f32,
1153 #[serde(default = "default_repeat_last_n")]
1155 pub repeat_last_n: usize,
1156}
1157
1158pub const MAX_TOKENS_CAP: usize = 32768;
1160
1161impl GenerationParams {
1162 #[must_use]
1173 pub fn capped_max_tokens(&self) -> usize {
1174 self.max_tokens.min(MAX_TOKENS_CAP)
1175 }
1176}
1177
1178impl Default for GenerationParams {
1179 fn default() -> Self {
1180 Self {
1181 temperature: default_temperature(),
1182 top_p: None,
1183 top_k: None,
1184 max_tokens: default_max_tokens(),
1185 seed: default_seed(),
1186 repeat_penalty: default_repeat_penalty(),
1187 repeat_last_n: default_repeat_last_n(),
1188 }
1189 }
1190}
1191
1192#[non_exhaustive]
1196#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
1197#[serde(rename_all = "lowercase")]
1198pub enum LlmRoutingStrategy {
1199 #[default]
1201 None,
1202 Ema,
1204 Thompson,
1206 Cascade,
1208 Triage,
1210 Bandit,
1212}
1213
1214fn default_triage_timeout_secs() -> u64 {
1215 5
1216}
1217
1218fn default_max_triage_tokens() -> u32 {
1219 50
1220}
1221
1222fn default_true() -> bool {
1223 true
1224}
1225
1226#[allow(clippy::trivially_copy_pass_by_ref)]
1227fn is_true(v: &bool) -> bool {
1228 *v
1229}
1230
1231#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1233pub struct TierMapping {
1234 pub simple: Option<String>,
1235 pub medium: Option<String>,
1236 pub complex: Option<String>,
1237 pub expert: Option<String>,
1238}
1239
1240#[derive(Debug, Clone, Deserialize, Serialize)]
1261pub struct ComplexityRoutingConfig {
1262 #[serde(default)]
1264 pub triage_provider: Option<ProviderName>,
1265
1266 #[serde(default = "default_true")]
1268 pub bypass_single_provider: bool,
1269
1270 #[serde(default)]
1272 pub tiers: TierMapping,
1273
1274 #[serde(default = "default_max_triage_tokens")]
1276 pub max_triage_tokens: u32,
1277
1278 #[serde(default = "default_triage_timeout_secs")]
1281 pub triage_timeout_secs: u64,
1282
1283 #[serde(default)]
1286 pub fallback_strategy: Option<String>,
1287}
1288
1289impl Default for ComplexityRoutingConfig {
1290 fn default() -> Self {
1291 Self {
1292 triage_provider: None,
1293 bypass_single_provider: true,
1294 tiers: TierMapping::default(),
1295 max_triage_tokens: default_max_triage_tokens(),
1296 triage_timeout_secs: default_triage_timeout_secs(),
1297 fallback_strategy: None,
1298 }
1299 }
1300}
1301
1302#[derive(Debug, Clone, Deserialize, Serialize)]
1320#[serde(default)]
1321pub struct CoeConfig {
1322 pub enabled: bool,
1324 pub intra_threshold: f64,
1326 pub inter_threshold: f64,
1328 pub shadow_sample_rate: f64,
1330 pub secondary_provider: ProviderName,
1332 pub embedding_provider: ProviderName,
1334}
1335
1336impl Default for CoeConfig {
1337 fn default() -> Self {
1338 Self {
1339 enabled: false,
1340 intra_threshold: 0.8,
1341 inter_threshold: 0.20,
1342 shadow_sample_rate: 0.1,
1343 secondary_provider: ProviderName::default(),
1344 embedding_provider: ProviderName::default(),
1345 }
1346 }
1347}
1348
1349#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
1354pub struct GonkaNode {
1355 pub url: String,
1357 pub address: String,
1362 #[serde(default, skip_serializing_if = "Option::is_none")]
1364 pub name: Option<String>,
1365}
1366
1367#[derive(Debug, Clone, Deserialize, Serialize)]
1370pub struct CandleInlineConfig {
1371 #[serde(default = "default_candle_source")]
1372 pub source: String,
1373 #[serde(default)]
1374 pub local_path: String,
1375 #[serde(default)]
1376 pub filename: Option<String>,
1377 #[serde(default = "default_chat_template")]
1378 pub chat_template: String,
1379 #[serde(default = "default_candle_device")]
1380 pub device: String,
1381 #[serde(default)]
1382 pub embedding_repo: Option<String>,
1383 #[serde(default)]
1385 pub hf_token: Option<String>,
1386 #[serde(default)]
1387 pub generation: GenerationParams,
1388 #[serde(default = "default_inference_timeout_secs")]
1393 pub inference_timeout_secs: u64,
1394}
1395
1396impl Default for CandleInlineConfig {
1397 fn default() -> Self {
1398 Self {
1399 source: default_candle_source(),
1400 local_path: String::new(),
1401 filename: None,
1402 chat_template: default_chat_template(),
1403 device: default_candle_device(),
1404 embedding_repo: None,
1405 hf_token: None,
1406 generation: GenerationParams::default(),
1407 inference_timeout_secs: default_inference_timeout_secs(),
1408 }
1409 }
1410}
1411
1412#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
1421pub struct CocoonPricing {
1422 #[serde(default)]
1424 pub prompt_cents_per_1k: f64,
1425 #[serde(default)]
1428 pub completion_cents_per_1k: f64,
1429}
1430
1431#[derive(Debug, Clone, Deserialize, Serialize)]
1437#[allow(clippy::struct_excessive_bools)] pub struct ProviderEntry {
1439 #[serde(rename = "type")]
1441 pub provider_type: ProviderKind,
1442
1443 #[serde(default)]
1445 pub name: Option<String>,
1446
1447 #[serde(default)]
1449 pub model: Option<String>,
1450
1451 #[serde(default)]
1453 pub base_url: Option<String>,
1454
1455 #[serde(default)]
1457 pub max_tokens: Option<u32>,
1458
1459 #[serde(default)]
1461 pub embedding_model: Option<String>,
1462
1463 #[serde(default)]
1466 pub stt_model: Option<String>,
1467
1468 #[serde(default)]
1470 pub embed: bool,
1471
1472 #[serde(default)]
1474 pub default: bool,
1475
1476 #[serde(default)]
1478 pub thinking: Option<ThinkingConfig>,
1479 #[serde(default)]
1480 pub server_compaction: bool,
1481 #[serde(default)]
1482 pub enable_extended_context: bool,
1483 #[serde(default)]
1486 pub prompt_cache_ttl: Option<CacheTtl>,
1487
1488 #[serde(default)]
1490 pub reasoning_effort: Option<String>,
1491
1492 #[serde(default)]
1494 pub thinking_level: Option<GeminiThinkingLevel>,
1495 #[serde(default)]
1496 pub thinking_budget: Option<i32>,
1497 #[serde(default)]
1498 pub include_thoughts: Option<bool>,
1499
1500 #[serde(default)]
1502 pub api_key: Option<String>,
1503
1504 #[serde(default)]
1506 pub candle: Option<CandleInlineConfig>,
1507
1508 #[serde(default)]
1510 pub vision_model: Option<String>,
1511
1512 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1515 pub gonka_nodes: Vec<GonkaNode>,
1516 #[serde(default, skip_serializing_if = "Option::is_none")]
1518 pub gonka_chain_prefix: Option<String>,
1519
1520 #[serde(default, skip_serializing_if = "Option::is_none")]
1523 pub cocoon_client_url: Option<String>,
1524 #[serde(default, skip_serializing_if = "Option::is_none")]
1527 pub cocoon_access_hash: Option<String>,
1528 #[serde(default = "default_true", skip_serializing_if = "is_true")]
1530 pub cocoon_health_check: bool,
1531 #[serde(default, skip_serializing_if = "Option::is_none")]
1544 pub cocoon_pricing: Option<CocoonPricing>,
1545
1546 #[serde(default)]
1548 pub instruction_file: Option<std::path::PathBuf>,
1549
1550 #[serde(default, skip_serializing_if = "Option::is_none")]
1568 pub max_concurrent: Option<u32>,
1569}
1570
1571impl Default for ProviderEntry {
1572 fn default() -> Self {
1573 Self {
1574 provider_type: ProviderKind::Ollama,
1575 name: None,
1576 model: None,
1577 base_url: None,
1578 max_tokens: None,
1579 embedding_model: None,
1580 stt_model: None,
1581 embed: false,
1582 default: false,
1583 thinking: None,
1584 server_compaction: false,
1585 enable_extended_context: false,
1586 prompt_cache_ttl: None,
1587 reasoning_effort: None,
1588 thinking_level: None,
1589 thinking_budget: None,
1590 include_thoughts: None,
1591 api_key: None,
1592 candle: None,
1593 vision_model: None,
1594 gonka_nodes: Vec::new(),
1595 gonka_chain_prefix: None,
1596 cocoon_client_url: None,
1597 cocoon_access_hash: None,
1598 cocoon_health_check: true,
1599 cocoon_pricing: None,
1600 instruction_file: None,
1601 max_concurrent: None,
1602 }
1603 }
1604}
1605
1606impl ProviderEntry {
1607 #[must_use]
1609 pub fn effective_name(&self) -> String {
1610 self.name
1611 .clone()
1612 .unwrap_or_else(|| self.provider_type.as_str().to_owned())
1613 }
1614
1615 #[must_use]
1620 pub fn effective_model(&self) -> String {
1621 if let Some(ref m) = self.model {
1622 return m.clone();
1623 }
1624 match self.provider_type {
1625 ProviderKind::Ollama => "qwen3:8b".to_owned(),
1626 ProviderKind::Claude => "claude-haiku-4-5-20251001".to_owned(),
1627 ProviderKind::OpenAi => "gpt-4o-mini".to_owned(),
1628 ProviderKind::Gemini => "gemini-2.0-flash".to_owned(),
1629 ProviderKind::Compatible | ProviderKind::Candle | ProviderKind::Gonka => String::new(),
1632 ProviderKind::Cocoon => "Qwen/Qwen3-0.6B".to_owned(),
1633 }
1634 }
1635
1636 pub fn validate(&self) -> Result<(), crate::error::ConfigError> {
1643 use crate::error::ConfigError;
1644
1645 if self.provider_type == ProviderKind::Compatible && self.name.is_none() {
1647 return Err(ConfigError::Validation(
1648 "[[llm.providers]] entry with type=\"compatible\" must set `name`".into(),
1649 ));
1650 }
1651
1652 if self.provider_type == ProviderKind::Gonka {
1654 if self.name.is_none() {
1655 return Err(ConfigError::Validation(
1656 "[[llm.providers]] entry with type=\"gonka\" must set `name`".into(),
1657 ));
1658 }
1659 self.validate_gonka_nodes()?;
1660 }
1661
1662 if self.provider_type == ProviderKind::Cocoon
1664 && self.name.as_ref().is_none_or(String::is_empty)
1665 {
1666 return Err(ConfigError::Validation(
1667 "[[llm.providers]] entry with type=\"cocoon\" must set `name`".into(),
1668 ));
1669 }
1670
1671 if self.provider_type == ProviderKind::Cocoon {
1673 let name = self.effective_name();
1674 if let Some(ref url_str) = self.cocoon_client_url {
1675 match url::Url::parse(url_str) {
1676 Err(_) => {
1677 return Err(ConfigError::Validation(format!(
1678 "[[llm.providers]] entry '{name}': cocoon_client_url \
1679 '{url_str}' is not a valid URL; expected format: \
1680 http://localhost:10000"
1681 )));
1682 }
1683 Ok(u) if !matches!(u.host_str(), Some("localhost" | "127.0.0.1" | "::1")) => {
1684 return Err(ConfigError::Validation(format!(
1685 "[[llm.providers]] entry '{name}': cocoon_client_url host must be \
1686 localhost or 127.0.0.1, got '{}'",
1687 u.host_str().unwrap_or("<none>")
1688 )));
1689 }
1690 Ok(u) if u.scheme() != "http" && u.scheme() != "https" => {
1691 return Err(ConfigError::Validation(format!(
1692 "[[llm.providers]] entry '{name}': cocoon_client_url \
1693 scheme must be http or https, got '{}'",
1694 u.scheme()
1695 )));
1696 }
1697 _ => {}
1698 }
1699 }
1700 if self.model.as_deref().is_some_and(|m| m.trim().is_empty()) {
1701 return Err(ConfigError::Validation(format!(
1702 "[[llm.providers]] entry '{name}': model must not be empty \
1703 for cocoon provider"
1704 )));
1705 }
1706 if let Some(ref p) = self.cocoon_pricing {
1707 if !p.prompt_cents_per_1k.is_finite() || p.prompt_cents_per_1k < 0.0 {
1708 return Err(ConfigError::Validation(format!(
1709 "[[llm.providers]] entry '{name}': cocoon_pricing.prompt_cents_per_1k \
1710 must be a finite non-negative number"
1711 )));
1712 }
1713 if !p.completion_cents_per_1k.is_finite() || p.completion_cents_per_1k < 0.0 {
1714 return Err(ConfigError::Validation(format!(
1715 "[[llm.providers]] entry '{name}': \
1716 cocoon_pricing.completion_cents_per_1k \
1717 must be a finite non-negative number"
1718 )));
1719 }
1720 }
1721 }
1722
1723 self.warn_irrelevant_fields();
1725
1726 if self.stt_model.is_some() && self.provider_type == ProviderKind::Ollama {
1729 tracing::warn!(
1730 provider = self.effective_name(),
1731 "field `stt_model` is set on an Ollama provider; Ollama does not support the \
1732 Whisper STT API — use OpenAI, compatible, or candle instead"
1733 );
1734 }
1735
1736 Ok(())
1737 }
1738
1739 #[must_use]
1741 pub fn effective_gonka_chain_prefix(&self) -> &str {
1742 self.gonka_chain_prefix.as_deref().unwrap_or("gonka")
1743 }
1744
1745 fn warn_irrelevant_fields(&self) {
1746 let name = self.effective_name();
1747 match self.provider_type {
1748 ProviderKind::Ollama => {
1749 if self.thinking.is_some() {
1750 tracing::warn!(
1751 provider = name,
1752 "field `thinking` is only used by Claude providers"
1753 );
1754 }
1755 if self.reasoning_effort.is_some() {
1756 tracing::warn!(
1757 provider = name,
1758 "field `reasoning_effort` is only used by OpenAI providers"
1759 );
1760 }
1761 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1762 tracing::warn!(
1763 provider = name,
1764 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1765 );
1766 }
1767 }
1768 ProviderKind::Claude => {
1769 if self.reasoning_effort.is_some() {
1770 tracing::warn!(
1771 provider = name,
1772 "field `reasoning_effort` is only used by OpenAI providers"
1773 );
1774 }
1775 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1776 tracing::warn!(
1777 provider = name,
1778 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1779 );
1780 }
1781 }
1782 ProviderKind::OpenAi => {
1783 if self.thinking.is_some() {
1784 tracing::warn!(
1785 provider = name,
1786 "field `thinking` is only used by Claude providers"
1787 );
1788 }
1789 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1790 tracing::warn!(
1791 provider = name,
1792 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1793 );
1794 }
1795 }
1796 ProviderKind::Gemini => {
1797 if self.thinking.is_some() {
1798 tracing::warn!(
1799 provider = name,
1800 "field `thinking` is only used by Claude providers"
1801 );
1802 }
1803 if self.reasoning_effort.is_some() {
1804 tracing::warn!(
1805 provider = name,
1806 "field `reasoning_effort` is only used by OpenAI providers"
1807 );
1808 }
1809 }
1810 ProviderKind::Gonka => {
1811 if self.thinking.is_some() {
1812 tracing::warn!(
1813 provider = name,
1814 "field `thinking` is only used by Claude providers"
1815 );
1816 }
1817 if self.reasoning_effort.is_some() {
1818 tracing::warn!(
1819 provider = name,
1820 "field `reasoning_effort` is only used by OpenAI providers"
1821 );
1822 }
1823 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1824 tracing::warn!(
1825 provider = name,
1826 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1827 );
1828 }
1829 }
1830 ProviderKind::Compatible | ProviderKind::Candle => {}
1831 ProviderKind::Cocoon => {
1832 if self.base_url.is_some() {
1833 tracing::warn!(
1834 provider = name,
1835 "field `base_url` is ignored for cocoon providers; use `cocoon_client_url` instead"
1836 );
1837 }
1838 }
1839 }
1840 }
1841
1842 fn validate_gonka_nodes(&self) -> Result<(), crate::error::ConfigError> {
1843 use crate::error::ConfigError;
1844 if self.gonka_nodes.is_empty() {
1845 return Err(ConfigError::Validation(format!(
1846 "[[llm.providers]] entry '{}' with type=\"gonka\" must set non-empty `gonka_nodes`",
1847 self.effective_name()
1848 )));
1849 }
1850 for (i, node) in self.gonka_nodes.iter().enumerate() {
1851 if node.url.is_empty() {
1852 return Err(ConfigError::Validation(format!(
1853 "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must not be empty",
1854 self.effective_name()
1855 )));
1856 }
1857 if !node.url.starts_with("http://") && !node.url.starts_with("https://") {
1858 return Err(ConfigError::Validation(format!(
1859 "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must start with http:// or https://",
1860 self.effective_name()
1861 )));
1862 }
1863 }
1864 Ok(())
1865 }
1866}
1867
1868#[derive(Debug, Default, Clone, PartialEq, Serialize, Deserialize)]
1892#[serde(default)]
1893pub struct ProviderOverrides {
1894 #[serde(skip_serializing_if = "Option::is_none")]
1896 pub reasoning_effort: Option<String>,
1897}
1898
1899impl ProviderOverrides {
1900 #[must_use]
1913 pub fn is_empty(&self) -> bool {
1914 self.reasoning_effort.is_none()
1915 }
1916}
1917
1918pub fn validate_pool(entries: &[ProviderEntry]) -> Result<(), crate::error::ConfigError> {
1928 use crate::error::ConfigError;
1929 use std::collections::HashSet;
1930
1931 if entries.is_empty() {
1932 return Err(ConfigError::Validation(
1933 "at least one LLM provider must be configured in [[llm.providers]]".into(),
1934 ));
1935 }
1936
1937 let default_count = entries.iter().filter(|e| e.default).count();
1938 if default_count > 1 {
1939 return Err(ConfigError::Validation(
1940 "only one [[llm.providers]] entry can be marked `default = true`".into(),
1941 ));
1942 }
1943
1944 let mut seen_names: HashSet<String> = HashSet::new();
1945 for entry in entries {
1946 let name = entry.effective_name();
1947 if !seen_names.insert(name.clone()) {
1948 return Err(ConfigError::Validation(format!(
1949 "duplicate provider name \"{name}\" in [[llm.providers]]"
1950 )));
1951 }
1952 entry.validate()?;
1953 }
1954
1955 Ok(())
1956}
1957
1958#[cfg(test)]
1959mod tests {
1960 use super::*;
1961
1962 fn ollama_entry() -> ProviderEntry {
1963 ProviderEntry {
1964 provider_type: ProviderKind::Ollama,
1965 name: Some("ollama".into()),
1966 model: Some("qwen3:8b".into()),
1967 ..Default::default()
1968 }
1969 }
1970
1971 fn claude_entry() -> ProviderEntry {
1972 ProviderEntry {
1973 provider_type: ProviderKind::Claude,
1974 name: Some("claude".into()),
1975 model: Some("claude-sonnet-4-6".into()),
1976 max_tokens: Some(8192),
1977 ..Default::default()
1978 }
1979 }
1980
1981 #[test]
1984 fn validate_ollama_valid() {
1985 assert!(ollama_entry().validate().is_ok());
1986 }
1987
1988 #[test]
1989 fn validate_claude_valid() {
1990 assert!(claude_entry().validate().is_ok());
1991 }
1992
1993 #[test]
1994 fn validate_compatible_without_name_errors() {
1995 let entry = ProviderEntry {
1996 provider_type: ProviderKind::Compatible,
1997 name: None,
1998 ..Default::default()
1999 };
2000 let err = entry.validate().unwrap_err();
2001 assert!(
2002 err.to_string().contains("compatible"),
2003 "error should mention compatible: {err}"
2004 );
2005 }
2006
2007 #[test]
2008 fn validate_compatible_with_name_ok() {
2009 let entry = ProviderEntry {
2010 provider_type: ProviderKind::Compatible,
2011 name: Some("my-proxy".into()),
2012 base_url: Some("http://localhost:8080".into()),
2013 model: Some("gpt-4o".into()),
2014 max_tokens: Some(4096),
2015 ..Default::default()
2016 };
2017 assert!(entry.validate().is_ok());
2018 }
2019
2020 #[test]
2021 fn validate_openai_valid() {
2022 let entry = ProviderEntry {
2023 provider_type: ProviderKind::OpenAi,
2024 name: Some("openai".into()),
2025 model: Some("gpt-4o".into()),
2026 max_tokens: Some(4096),
2027 ..Default::default()
2028 };
2029 assert!(entry.validate().is_ok());
2030 }
2031
2032 #[test]
2033 fn validate_gemini_valid() {
2034 let entry = ProviderEntry {
2035 provider_type: ProviderKind::Gemini,
2036 name: Some("gemini".into()),
2037 model: Some("gemini-2.0-flash".into()),
2038 ..Default::default()
2039 };
2040 assert!(entry.validate().is_ok());
2041 }
2042
2043 #[test]
2046 fn validate_pool_empty_errors() {
2047 let err = validate_pool(&[]).unwrap_err();
2048 assert!(err.to_string().contains("at least one"), "{err}");
2049 }
2050
2051 #[test]
2052 fn validate_pool_single_entry_ok() {
2053 assert!(validate_pool(&[ollama_entry()]).is_ok());
2054 }
2055
2056 #[test]
2057 fn validate_pool_duplicate_names_errors() {
2058 let a = ollama_entry();
2059 let b = ollama_entry(); let err = validate_pool(&[a, b]).unwrap_err();
2061 assert!(err.to_string().contains("duplicate"), "{err}");
2062 }
2063
2064 #[test]
2065 fn validate_pool_multiple_defaults_errors() {
2066 let mut a = ollama_entry();
2067 let mut b = claude_entry();
2068 a.default = true;
2069 b.default = true;
2070 let err = validate_pool(&[a, b]).unwrap_err();
2071 assert!(err.to_string().contains("default"), "{err}");
2072 }
2073
2074 #[test]
2075 fn validate_pool_two_different_providers_ok() {
2076 assert!(validate_pool(&[ollama_entry(), claude_entry()]).is_ok());
2077 }
2078
2079 #[test]
2080 fn validate_pool_propagates_entry_error() {
2081 let bad = ProviderEntry {
2082 provider_type: ProviderKind::Compatible,
2083 name: None, ..Default::default()
2085 };
2086 assert!(validate_pool(&[bad]).is_err());
2087 }
2088
2089 #[test]
2092 fn effective_model_returns_explicit_when_set() {
2093 let entry = ProviderEntry {
2094 provider_type: ProviderKind::Claude,
2095 model: Some("claude-sonnet-4-6".into()),
2096 ..Default::default()
2097 };
2098 assert_eq!(entry.effective_model(), "claude-sonnet-4-6");
2099 }
2100
2101 #[test]
2102 fn effective_model_ollama_default_when_none() {
2103 let entry = ProviderEntry {
2104 provider_type: ProviderKind::Ollama,
2105 model: None,
2106 ..Default::default()
2107 };
2108 assert_eq!(entry.effective_model(), "qwen3:8b");
2109 }
2110
2111 #[test]
2112 fn effective_model_claude_default_when_none() {
2113 let entry = ProviderEntry {
2114 provider_type: ProviderKind::Claude,
2115 model: None,
2116 ..Default::default()
2117 };
2118 assert_eq!(entry.effective_model(), "claude-haiku-4-5-20251001");
2119 }
2120
2121 #[test]
2122 fn effective_model_openai_default_when_none() {
2123 let entry = ProviderEntry {
2124 provider_type: ProviderKind::OpenAi,
2125 model: None,
2126 ..Default::default()
2127 };
2128 assert_eq!(entry.effective_model(), "gpt-4o-mini");
2129 }
2130
2131 #[test]
2132 fn effective_model_gemini_default_when_none() {
2133 let entry = ProviderEntry {
2134 provider_type: ProviderKind::Gemini,
2135 model: None,
2136 ..Default::default()
2137 };
2138 assert_eq!(entry.effective_model(), "gemini-2.0-flash");
2139 }
2140
2141 fn parse_llm(toml: &str) -> LlmConfig {
2145 #[derive(serde::Deserialize)]
2146 struct Wrapper {
2147 llm: LlmConfig,
2148 }
2149 toml::from_str::<Wrapper>(toml).unwrap().llm
2150 }
2151
2152 #[test]
2153 fn check_legacy_format_new_format_ok() {
2154 let cfg = parse_llm(
2155 r#"
2156[llm]
2157
2158[[llm.providers]]
2159type = "ollama"
2160model = "qwen3:8b"
2161"#,
2162 );
2163 assert!(cfg.check_legacy_format().is_ok());
2164 }
2165
2166 #[test]
2167 fn check_legacy_format_empty_providers_no_legacy_ok() {
2168 let cfg = parse_llm("[llm]\n");
2170 assert!(cfg.check_legacy_format().is_ok());
2171 }
2172
2173 #[test]
2176 fn effective_provider_falls_back_to_ollama_when_no_providers() {
2177 let cfg = parse_llm("[llm]\n");
2178 assert_eq!(cfg.effective_provider(), ProviderKind::Ollama);
2179 }
2180
2181 #[test]
2182 fn effective_provider_reads_from_providers_first() {
2183 let cfg = parse_llm(
2184 r#"
2185[llm]
2186
2187[[llm.providers]]
2188type = "claude"
2189model = "claude-sonnet-4-6"
2190"#,
2191 );
2192 assert_eq!(cfg.effective_provider(), ProviderKind::Claude);
2193 }
2194
2195 #[test]
2196 fn effective_model_reads_from_providers_first() {
2197 let cfg = parse_llm(
2198 r#"
2199[llm]
2200
2201[[llm.providers]]
2202type = "ollama"
2203model = "qwen3:8b"
2204"#,
2205 );
2206 assert_eq!(cfg.effective_model(), "qwen3:8b");
2207 }
2208
2209 #[test]
2210 fn effective_model_skips_embed_only_provider() {
2211 let cfg = parse_llm(
2212 r#"
2213[llm]
2214
2215[[llm.providers]]
2216type = "ollama"
2217model = "gemma4:26b"
2218embed = true
2219
2220[[llm.providers]]
2221type = "openai"
2222model = "gpt-4o-mini"
2223"#,
2224 );
2225 assert_eq!(cfg.effective_model(), "gpt-4o-mini");
2226 }
2227
2228 #[test]
2229 fn effective_base_url_default_when_absent() {
2230 let cfg = parse_llm("[llm]\n");
2231 assert_eq!(cfg.effective_base_url(), "http://localhost:11434");
2232 }
2233
2234 #[test]
2235 fn effective_base_url_from_providers_entry() {
2236 let cfg = parse_llm(
2237 r#"
2238[llm]
2239
2240[[llm.providers]]
2241type = "ollama"
2242base_url = "http://myhost:11434"
2243"#,
2244 );
2245 assert_eq!(cfg.effective_base_url(), "http://myhost:11434");
2246 }
2247
2248 #[test]
2251 fn complexity_routing_defaults() {
2252 let cr = ComplexityRoutingConfig::default();
2253 assert!(
2254 cr.bypass_single_provider,
2255 "bypass_single_provider must default to true"
2256 );
2257 assert_eq!(cr.triage_timeout_secs, 5);
2258 assert_eq!(cr.max_triage_tokens, 50);
2259 assert!(cr.triage_provider.is_none());
2260 assert!(cr.tiers.simple.is_none());
2261 }
2262
2263 #[test]
2264 fn complexity_routing_toml_round_trip() {
2265 let cfg = parse_llm(
2266 r#"
2267[llm]
2268routing = "triage"
2269
2270[llm.complexity_routing]
2271triage_provider = "fast"
2272bypass_single_provider = false
2273triage_timeout_secs = 10
2274max_triage_tokens = 100
2275
2276[llm.complexity_routing.tiers]
2277simple = "fast"
2278medium = "medium"
2279complex = "large"
2280expert = "opus"
2281"#,
2282 );
2283 assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2284 let cr = cfg
2285 .complexity_routing
2286 .expect("complexity_routing must be present");
2287 assert_eq!(
2288 cr.triage_provider.as_ref().map(ProviderName::as_str),
2289 Some("fast")
2290 );
2291 assert!(!cr.bypass_single_provider);
2292 assert_eq!(cr.triage_timeout_secs, 10);
2293 assert_eq!(cr.max_triage_tokens, 100);
2294 assert_eq!(cr.tiers.simple.as_deref(), Some("fast"));
2295 assert_eq!(cr.tiers.medium.as_deref(), Some("medium"));
2296 assert_eq!(cr.tiers.complex.as_deref(), Some("large"));
2297 assert_eq!(cr.tiers.expert.as_deref(), Some("opus"));
2298 }
2299
2300 #[test]
2301 fn complexity_routing_partial_tiers_toml() {
2302 let cfg = parse_llm(
2304 r#"
2305[llm]
2306routing = "triage"
2307
2308[llm.complexity_routing.tiers]
2309simple = "haiku"
2310complex = "sonnet"
2311"#,
2312 );
2313 let cr = cfg
2314 .complexity_routing
2315 .expect("complexity_routing must be present");
2316 assert_eq!(cr.tiers.simple.as_deref(), Some("haiku"));
2317 assert!(cr.tiers.medium.is_none());
2318 assert_eq!(cr.tiers.complex.as_deref(), Some("sonnet"));
2319 assert!(cr.tiers.expert.is_none());
2320 assert!(cr.bypass_single_provider);
2322 assert_eq!(cr.triage_timeout_secs, 5);
2323 }
2324
2325 #[test]
2326 fn routing_strategy_triage_deserialized() {
2327 let cfg = parse_llm(
2328 r#"
2329[llm]
2330routing = "triage"
2331"#,
2332 );
2333 assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2334 }
2335
2336 #[test]
2339 fn stt_provider_entry_by_name_match() {
2340 let cfg = parse_llm(
2341 r#"
2342[llm]
2343
2344[[llm.providers]]
2345type = "openai"
2346name = "quality"
2347model = "gpt-5.4"
2348stt_model = "gpt-4o-mini-transcribe"
2349
2350[llm.stt]
2351provider = "quality"
2352"#,
2353 );
2354 let entry = cfg.stt_provider_entry().expect("should find stt provider");
2355 assert_eq!(entry.effective_name(), "quality");
2356 assert_eq!(entry.stt_model.as_deref(), Some("gpt-4o-mini-transcribe"));
2357 }
2358
2359 #[test]
2360 fn stt_provider_entry_auto_detect_when_provider_empty() {
2361 let cfg = parse_llm(
2362 r#"
2363[llm]
2364
2365[[llm.providers]]
2366type = "openai"
2367name = "openai-stt"
2368stt_model = "whisper-1"
2369
2370[llm.stt]
2371provider = ""
2372"#,
2373 );
2374 let entry = cfg.stt_provider_entry().expect("should auto-detect");
2375 assert_eq!(entry.effective_name(), "openai-stt");
2376 }
2377
2378 #[test]
2379 fn stt_provider_entry_auto_detect_no_stt_section() {
2380 let cfg = parse_llm(
2381 r#"
2382[llm]
2383
2384[[llm.providers]]
2385type = "openai"
2386name = "openai-stt"
2387stt_model = "whisper-1"
2388"#,
2389 );
2390 let entry = cfg.stt_provider_entry().expect("should auto-detect");
2392 assert_eq!(entry.effective_name(), "openai-stt");
2393 }
2394
2395 #[test]
2396 fn stt_provider_entry_none_when_no_stt_model() {
2397 let cfg = parse_llm(
2398 r#"
2399[llm]
2400
2401[[llm.providers]]
2402type = "openai"
2403name = "quality"
2404model = "gpt-5.4"
2405"#,
2406 );
2407 assert!(cfg.stt_provider_entry().is_none());
2408 }
2409
2410 #[test]
2411 fn stt_provider_entry_name_mismatch_falls_back_to_none() {
2412 let cfg = parse_llm(
2414 r#"
2415[llm]
2416
2417[[llm.providers]]
2418type = "openai"
2419name = "quality"
2420model = "gpt-5.4"
2421
2422[[llm.providers]]
2423type = "openai"
2424name = "openai-stt"
2425stt_model = "whisper-1"
2426
2427[llm.stt]
2428provider = "quality"
2429"#,
2430 );
2431 assert!(cfg.stt_provider_entry().is_none());
2433 }
2434
2435 #[test]
2436 fn stt_config_deserializes_new_slim_format() {
2437 let cfg = parse_llm(
2438 r#"
2439[llm]
2440
2441[[llm.providers]]
2442type = "openai"
2443name = "quality"
2444stt_model = "whisper-1"
2445
2446[llm.stt]
2447provider = "quality"
2448language = "en"
2449"#,
2450 );
2451 let stt = cfg.stt.as_ref().expect("stt section present");
2452 assert_eq!(stt.provider, "quality");
2453 assert_eq!(stt.language, "en");
2454 }
2455
2456 #[test]
2457 fn stt_config_default_provider_is_empty() {
2458 assert_eq!(default_stt_provider(), "");
2460 }
2461
2462 #[test]
2463 fn validate_stt_missing_provider_ok() {
2464 let cfg = parse_llm("[llm]\n");
2465 assert!(cfg.validate_stt().is_ok());
2466 }
2467
2468 #[test]
2469 fn validate_stt_valid_reference() {
2470 let cfg = parse_llm(
2471 r#"
2472[llm]
2473
2474[[llm.providers]]
2475type = "openai"
2476name = "quality"
2477stt_model = "whisper-1"
2478
2479[llm.stt]
2480provider = "quality"
2481"#,
2482 );
2483 assert!(cfg.validate_stt().is_ok());
2484 }
2485
2486 #[test]
2487 fn validate_stt_nonexistent_provider_errors() {
2488 let cfg = parse_llm(
2489 r#"
2490[llm]
2491
2492[[llm.providers]]
2493type = "openai"
2494name = "quality"
2495model = "gpt-5.4"
2496
2497[llm.stt]
2498provider = "nonexistent"
2499"#,
2500 );
2501 assert!(cfg.validate_stt().is_err());
2502 }
2503
2504 #[test]
2505 fn validate_stt_provider_exists_but_no_stt_model_returns_ok_with_warn() {
2506 let cfg = parse_llm(
2508 r#"
2509[llm]
2510
2511[[llm.providers]]
2512type = "openai"
2513name = "quality"
2514model = "gpt-5.4"
2515
2516[llm.stt]
2517provider = "quality"
2518"#,
2519 );
2520 assert!(cfg.validate_stt().is_ok());
2522 assert!(
2524 cfg.stt_provider_entry().is_none(),
2525 "stt_provider_entry must be None when provider has no stt_model"
2526 );
2527 }
2528
2529 #[test]
2532 fn bandit_warmup_queries_explicit_value_is_deserialized() {
2533 let cfg = parse_llm(
2534 r#"
2535[llm]
2536
2537[llm.router]
2538strategy = "bandit"
2539
2540[llm.router.bandit]
2541warmup_queries = 50
2542"#,
2543 );
2544 let bandit = cfg
2545 .router
2546 .expect("router section must be present")
2547 .bandit
2548 .expect("bandit section must be present");
2549 assert_eq!(
2550 bandit.warmup_queries,
2551 Some(50),
2552 "warmup_queries = 50 must deserialize to Some(50)"
2553 );
2554 }
2555
2556 #[test]
2557 fn bandit_warmup_queries_explicit_null_is_none() {
2558 let cfg = parse_llm(
2561 r#"
2562[llm]
2563
2564[llm.router]
2565strategy = "bandit"
2566
2567[llm.router.bandit]
2568warmup_queries = 0
2569"#,
2570 );
2571 let bandit = cfg
2572 .router
2573 .expect("router section must be present")
2574 .bandit
2575 .expect("bandit section must be present");
2576 assert_eq!(
2578 bandit.warmup_queries,
2579 Some(0),
2580 "warmup_queries = 0 must deserialize to Some(0)"
2581 );
2582 }
2583
2584 #[test]
2585 fn bandit_warmup_queries_missing_field_defaults_to_none() {
2586 let cfg = parse_llm(
2588 r#"
2589[llm]
2590
2591[llm.router]
2592strategy = "bandit"
2593
2594[llm.router.bandit]
2595alpha = 1.5
2596"#,
2597 );
2598 let bandit = cfg
2599 .router
2600 .expect("router section must be present")
2601 .bandit
2602 .expect("bandit section must be present");
2603 assert_eq!(
2604 bandit.warmup_queries, None,
2605 "omitted warmup_queries must default to None"
2606 );
2607 }
2608
2609 #[test]
2610 fn provider_name_new_and_as_str() {
2611 let n = ProviderName::new("fast");
2612 assert_eq!(n.as_str(), "fast");
2613 assert!(!n.is_empty());
2614 }
2615
2616 #[test]
2617 fn provider_name_default_is_empty() {
2618 let n = ProviderName::default();
2619 assert!(n.is_empty());
2620 assert_eq!(n.as_str(), "");
2621 }
2622
2623 #[test]
2624 fn provider_name_partial_eq_str() {
2625 let n = ProviderName::new("fast");
2626 assert_eq!(n, "fast");
2627 assert_ne!(n, "slow");
2628 }
2629
2630 #[test]
2631 fn provider_name_serde_roundtrip() {
2632 let n = ProviderName::new("my-provider");
2633 let json = serde_json::to_string(&n).expect("serialize");
2634 assert_eq!(json, "\"my-provider\"");
2635 let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2636 assert_eq!(back, n);
2637 }
2638
2639 #[test]
2640 fn provider_name_serde_empty_roundtrip() {
2641 let n = ProviderName::default();
2642 let json = serde_json::to_string(&n).expect("serialize");
2643 assert_eq!(json, "\"\"");
2644 let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2645 assert_eq!(back, n);
2646 assert!(back.is_empty());
2647 }
2648
2649 fn gonka_entry_with_nodes(nodes: Vec<GonkaNode>) -> ProviderEntry {
2652 ProviderEntry {
2653 provider_type: ProviderKind::Gonka,
2654 name: Some("my-gonka".into()),
2655 gonka_nodes: nodes,
2656 ..Default::default()
2657 }
2658 }
2659
2660 fn valid_gonka_nodes() -> Vec<GonkaNode> {
2661 vec![
2662 GonkaNode {
2663 url: "https://node1.gonka.ai".into(),
2664 address: "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6".into(),
2665 name: Some("node1".into()),
2666 },
2667 GonkaNode {
2668 url: "https://node2.gonka.ai".into(),
2669 address: "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum".into(),
2670 name: Some("node2".into()),
2671 },
2672 GonkaNode {
2673 url: "http://node3.internal".into(),
2674 address: "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg".into(),
2675 name: None,
2676 },
2677 ]
2678 }
2679
2680 #[test]
2681 fn validate_gonka_valid() {
2682 let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2683 assert!(entry.validate().is_ok());
2684 }
2685
2686 #[test]
2687 fn validate_gonka_empty_nodes_errors() {
2688 let entry = gonka_entry_with_nodes(vec![]);
2689 let err = entry.validate().unwrap_err();
2690 assert!(
2691 err.to_string().contains("gonka_nodes"),
2692 "error should mention gonka_nodes: {err}"
2693 );
2694 }
2695
2696 #[test]
2697 fn validate_gonka_node_empty_url_errors() {
2698 let entry = gonka_entry_with_nodes(vec![GonkaNode {
2699 url: String::new(),
2700 address: "gonka1test".into(),
2701 name: None,
2702 }]);
2703 let err = entry.validate().unwrap_err();
2704 assert!(err.to_string().contains("url"), "{err}");
2705 }
2706
2707 #[test]
2708 fn validate_gonka_node_invalid_scheme_errors() {
2709 let entry = gonka_entry_with_nodes(vec![GonkaNode {
2710 url: "ftp://node.gonka.ai".into(),
2711 address: "gonka1test".into(),
2712 name: None,
2713 }]);
2714 let err = entry.validate().unwrap_err();
2715 assert!(err.to_string().contains("http"), "{err}");
2716 }
2717
2718 #[test]
2719 fn validate_gonka_without_name_errors() {
2720 let entry = ProviderEntry {
2721 provider_type: ProviderKind::Gonka,
2722 name: None,
2723 gonka_nodes: valid_gonka_nodes(),
2724 ..Default::default()
2725 };
2726 let err = entry.validate().unwrap_err();
2727 assert!(err.to_string().contains("gonka"), "{err}");
2728 }
2729
2730 #[test]
2731 fn gonka_toml_round_trip() {
2732 let toml = r#"
2733[llm]
2734
2735[[llm.providers]]
2736type = "gonka"
2737name = "my-gonka"
2738gonka_chain_prefix = "custom-chain"
2739
2740[[llm.providers.gonka_nodes]]
2741url = "https://node1.gonka.ai"
2742address = "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2743name = "node1"
2744
2745[[llm.providers.gonka_nodes]]
2746url = "https://node2.gonka.ai"
2747address = "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum"
2748name = "node2"
2749
2750[[llm.providers.gonka_nodes]]
2751url = "https://node3.gonka.ai"
2752address = "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg"
2753"#;
2754 let cfg = parse_llm(toml);
2755 assert_eq!(cfg.providers.len(), 1);
2756 let entry = &cfg.providers[0];
2757 assert_eq!(entry.provider_type, ProviderKind::Gonka);
2758 assert_eq!(entry.name.as_deref(), Some("my-gonka"));
2759 let nodes = &entry.gonka_nodes;
2760 assert_eq!(nodes.len(), 3);
2761 assert_eq!(nodes[0].url, "https://node1.gonka.ai");
2762 assert_eq!(
2763 nodes[0].address,
2764 "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2765 );
2766 assert_eq!(nodes[0].name.as_deref(), Some("node1"));
2767 assert_eq!(nodes[2].name, None);
2768 assert_eq!(entry.gonka_chain_prefix.as_deref(), Some("custom-chain"));
2769 }
2770
2771 #[test]
2772 fn gonka_default_chain_prefix() {
2773 let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2774 assert_eq!(entry.effective_gonka_chain_prefix(), "gonka");
2775 }
2776
2777 #[test]
2778 fn gonka_explicit_chain_prefix() {
2779 let entry = ProviderEntry {
2780 provider_type: ProviderKind::Gonka,
2781 name: Some("my-gonka".into()),
2782 gonka_nodes: valid_gonka_nodes(),
2783 gonka_chain_prefix: Some("my-chain".into()),
2784 ..Default::default()
2785 };
2786 assert_eq!(entry.effective_gonka_chain_prefix(), "my-chain");
2787 }
2788
2789 #[test]
2790 fn effective_model_gonka_is_empty() {
2791 let entry = ProviderEntry {
2792 provider_type: ProviderKind::Gonka,
2793 model: None,
2794 ..Default::default()
2795 };
2796 assert_eq!(entry.effective_model(), "");
2797 }
2798
2799 #[test]
2800 fn existing_configs_still_parse() {
2801 let toml = r#"
2802[llm]
2803
2804[[llm.providers]]
2805type = "ollama"
2806model = "qwen3:8b"
2807
2808[[llm.providers]]
2809type = "claude"
2810name = "claude"
2811model = "claude-sonnet-4-6"
2812"#;
2813 let cfg = parse_llm(toml);
2814 assert_eq!(cfg.providers.len(), 2);
2815 assert_eq!(cfg.providers[0].provider_type, ProviderKind::Ollama);
2816 assert_eq!(cfg.providers[1].provider_type, ProviderKind::Claude);
2817 }
2818
2819 fn cocoon_entry(url: Option<&str>, model: Option<&str>) -> ProviderEntry {
2822 ProviderEntry {
2823 provider_type: ProviderKind::Cocoon,
2824 name: Some("cocoon".into()),
2825 cocoon_client_url: url.map(str::to_owned),
2826 model: model.map(str::to_owned),
2827 ..Default::default()
2828 }
2829 }
2830
2831 #[test]
2832 fn test_cocoon_url_validation_accepts_http() {
2833 assert!(
2834 cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2835 .validate()
2836 .is_ok()
2837 );
2838 }
2839
2840 #[test]
2841 fn test_cocoon_url_validation_accepts_https_localhost() {
2842 assert!(
2843 cocoon_entry(Some("https://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2844 .validate()
2845 .is_ok()
2846 );
2847 }
2848
2849 #[test]
2850 fn test_cocoon_url_validation_rejects_non_localhost() {
2851 let err = cocoon_entry(Some("http://192.168.1.10:10000"), Some("Qwen/Qwen3-0.6B"))
2852 .validate()
2853 .unwrap_err();
2854 assert!(
2855 err.to_string().contains("localhost"),
2856 "error should mention localhost restriction: {err}"
2857 );
2858 }
2859
2860 #[test]
2861 fn test_cocoon_url_validation_rejects_non_http_scheme() {
2862 let err = cocoon_entry(Some("ftp://localhost"), Some("Qwen/Qwen3-0.6B"))
2863 .validate()
2864 .unwrap_err();
2865 assert!(
2866 err.to_string().contains("ftp"),
2867 "error should mention the bad scheme: {err}"
2868 );
2869 }
2870
2871 #[test]
2872 fn test_cocoon_url_validation_rejects_invalid_url() {
2873 let err = cocoon_entry(Some("not-a-url"), Some("Qwen/Qwen3-0.6B"))
2874 .validate()
2875 .unwrap_err();
2876 assert!(
2877 err.to_string().contains("not-a-url"),
2878 "error should mention the bad value: {err}"
2879 );
2880 }
2881
2882 #[test]
2883 fn test_cocoon_url_none_passes() {
2884 assert!(
2885 cocoon_entry(None, Some("Qwen/Qwen3-0.6B"))
2886 .validate()
2887 .is_ok()
2888 );
2889 }
2890
2891 #[test]
2892 fn test_cocoon_model_empty_rejected() {
2893 let err = cocoon_entry(Some("http://localhost:10000"), Some(""))
2894 .validate()
2895 .unwrap_err();
2896 assert!(
2897 err.to_string().contains("empty"),
2898 "error should mention 'empty': {err}"
2899 );
2900 }
2901
2902 #[test]
2903 fn test_cocoon_model_none_passes() {
2904 assert!(
2905 cocoon_entry(Some("http://localhost:10000"), None)
2906 .validate()
2907 .is_ok()
2908 );
2909 }
2910
2911 #[test]
2912 fn validate_cocoon_pricing_negative_prompt_errors() {
2913 let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2914 e.cocoon_pricing = Some(CocoonPricing {
2915 prompt_cents_per_1k: -1.0,
2916 completion_cents_per_1k: 0.03,
2917 });
2918 assert!(e.validate().is_err());
2919 }
2920
2921 #[test]
2922 fn validate_cocoon_pricing_negative_completion_errors() {
2923 let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2924 e.cocoon_pricing = Some(CocoonPricing {
2925 prompt_cents_per_1k: 0.01,
2926 completion_cents_per_1k: -0.5,
2927 });
2928 assert!(e.validate().is_err());
2929 }
2930
2931 #[test]
2932 fn validate_cocoon_pricing_valid_passes() {
2933 let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2934 e.cocoon_pricing = Some(CocoonPricing {
2935 prompt_cents_per_1k: 0.01,
2936 completion_cents_per_1k: 0.03,
2937 });
2938 assert!(e.validate().is_ok());
2939 }
2940}