1use std::fmt;
5
6use serde::{Deserialize, Serialize};
7
8#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
15#[serde(tag = "mode", rename_all = "snake_case")]
16pub enum ThinkingConfig {
17 Extended {
19 budget_tokens: u32,
21 },
22 Adaptive {
24 #[serde(default, skip_serializing_if = "Option::is_none")]
26 effort: Option<ThinkingEffort>,
27 },
28}
29
30#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
32#[serde(rename_all = "lowercase")]
33pub enum ThinkingEffort {
34 Low,
36 #[default]
38 Medium,
39 High,
41}
42
43#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Default)]
49#[serde(rename_all = "snake_case")]
50pub enum CacheTtl {
51 #[default]
53 Ephemeral,
54 #[serde(rename = "1h")]
57 OneHour,
58}
59
60impl CacheTtl {
61 #[must_use]
64 pub fn requires_beta(self) -> bool {
65 match self {
66 Self::OneHour => true,
67 Self::Ephemeral => false,
68 }
69 }
70}
71
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
77#[serde(rename_all = "lowercase")]
78pub enum GeminiThinkingLevel {
79 Minimal,
81 Low,
83 Medium,
85 High,
87}
88
89#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
102#[serde(transparent)]
103pub struct ProviderName(String);
104
105impl ProviderName {
106 #[must_use]
120 pub fn new(name: impl Into<String>) -> Self {
121 Self(name.into())
122 }
123
124 #[must_use]
135 pub fn is_empty(&self) -> bool {
136 self.0.is_empty()
137 }
138
139 #[must_use]
150 pub fn as_str(&self) -> &str {
151 &self.0
152 }
153
154 #[must_use]
168 pub fn as_non_empty(&self) -> Option<&str> {
169 if self.0.is_empty() {
170 None
171 } else {
172 Some(&self.0)
173 }
174 }
175}
176
177impl fmt::Display for ProviderName {
178 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179 self.0.fmt(f)
180 }
181}
182
183impl AsRef<str> for ProviderName {
184 fn as_ref(&self) -> &str {
185 &self.0
186 }
187}
188
189impl std::ops::Deref for ProviderName {
190 type Target = str;
191
192 fn deref(&self) -> &str {
193 &self.0
194 }
195}
196
197impl PartialEq<str> for ProviderName {
198 fn eq(&self, other: &str) -> bool {
199 self.0 == other
200 }
201}
202
203impl PartialEq<&str> for ProviderName {
204 fn eq(&self, other: &&str) -> bool {
205 self.0 == *other
206 }
207}
208
209fn default_response_cache_ttl_secs() -> u64 {
210 3600
211}
212
213fn default_semantic_cache_threshold() -> f32 {
214 0.95
215}
216
217fn default_semantic_cache_max_candidates() -> u32 {
218 10
219}
220
221fn default_router_ema_alpha() -> f64 {
222 0.1
223}
224
225fn default_router_reorder_interval() -> u64 {
226 10
227}
228
229fn default_embedding_model() -> String {
230 "qwen3-embedding".into()
231}
232
233fn default_candle_source() -> String {
234 "huggingface".into()
235}
236
237fn default_chat_template() -> String {
238 "chatml".into()
239}
240
241fn default_candle_device() -> String {
242 "cpu".into()
243}
244
245fn default_temperature() -> f64 {
246 0.7
247}
248
249fn default_max_tokens() -> usize {
250 2048
251}
252
253fn default_seed() -> u64 {
254 42
255}
256
257fn default_repeat_penalty() -> f32 {
258 1.1
259}
260
261fn default_repeat_last_n() -> usize {
262 64
263}
264
265fn default_cascade_quality_threshold() -> f64 {
266 0.5
267}
268
269fn default_cascade_max_escalations() -> u8 {
270 2
271}
272
273fn default_cascade_window_size() -> usize {
274 50
275}
276
277fn default_reputation_decay_factor() -> f64 {
278 0.95
279}
280
281fn default_reputation_weight() -> f64 {
282 0.3
283}
284
285fn default_reputation_min_observations() -> u64 {
286 5
287}
288
289#[must_use]
291pub fn default_stt_provider() -> String {
292 String::new()
293}
294
295#[must_use]
297pub fn default_stt_language() -> String {
298 "auto".into()
299}
300
301#[must_use]
303pub fn get_default_embedding_model() -> String {
304 default_embedding_model()
305}
306
307#[must_use]
309pub fn get_default_response_cache_ttl_secs() -> u64 {
310 default_response_cache_ttl_secs()
311}
312
313#[must_use]
315pub fn get_default_router_ema_alpha() -> f64 {
316 default_router_ema_alpha()
317}
318
319#[must_use]
321pub fn get_default_router_reorder_interval() -> u64 {
322 default_router_reorder_interval()
323}
324
325#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
338#[serde(rename_all = "lowercase")]
339pub enum ProviderKind {
340 Ollama,
342 Claude,
344 OpenAi,
346 Gemini,
348 Candle,
350 Compatible,
352 Gonka,
354 Cocoon,
356}
357
358impl ProviderKind {
359 #[must_use]
370 pub fn as_str(self) -> &'static str {
371 match self {
372 Self::Ollama => "ollama",
373 Self::Claude => "claude",
374 Self::OpenAi => "openai",
375 Self::Gemini => "gemini",
376 Self::Candle => "candle",
377 Self::Compatible => "compatible",
378 Self::Gonka => "gonka",
379 Self::Cocoon => "cocoon",
380 }
381 }
382}
383
384impl std::fmt::Display for ProviderKind {
385 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
386 f.write_str(self.as_str())
387 }
388}
389
390#[derive(Debug, Deserialize, Serialize)]
414pub struct LlmConfig {
415 #[serde(default, skip_serializing_if = "Vec::is_empty")]
417 pub providers: Vec<ProviderEntry>,
418
419 #[serde(default, skip_serializing_if = "is_routing_none")]
421 pub routing: LlmRoutingStrategy,
422
423 #[serde(default = "default_embedding_model_opt")]
424 pub embedding_model: String,
425 #[serde(default, skip_serializing_if = "Option::is_none")]
426 pub candle: Option<CandleConfig>,
427 #[serde(default)]
428 pub stt: Option<SttConfig>,
429 #[serde(default)]
430 pub response_cache_enabled: bool,
431 #[serde(default = "default_response_cache_ttl_secs")]
432 pub response_cache_ttl_secs: u64,
433 #[serde(default)]
435 pub semantic_cache_enabled: bool,
436 #[serde(default = "default_semantic_cache_threshold")]
442 pub semantic_cache_threshold: f32,
443 #[serde(default = "default_semantic_cache_max_candidates")]
456 pub semantic_cache_max_candidates: u32,
457 #[serde(default)]
458 pub router_ema_enabled: bool,
459 #[serde(default = "default_router_ema_alpha")]
460 pub router_ema_alpha: f64,
461 #[serde(default = "default_router_reorder_interval")]
462 pub router_reorder_interval: u64,
463 #[serde(default, skip_serializing_if = "Option::is_none")]
465 pub router: Option<RouterConfig>,
466 #[serde(default, skip_serializing_if = "Option::is_none")]
469 pub instruction_file: Option<std::path::PathBuf>,
470 #[serde(default, skip_serializing_if = "Option::is_none")]
474 pub summary_model: Option<String>,
475 #[serde(default, skip_serializing_if = "Option::is_none")]
477 pub summary_provider: Option<ProviderEntry>,
478
479 #[serde(default, skip_serializing_if = "Option::is_none")]
481 pub complexity_routing: Option<ComplexityRoutingConfig>,
482
483 #[serde(default, skip_serializing_if = "Option::is_none")]
485 pub coe: Option<CoeConfig>,
486}
487
488fn default_embedding_model_opt() -> String {
489 default_embedding_model()
490}
491
492#[allow(clippy::trivially_copy_pass_by_ref)]
493fn is_routing_none(s: &LlmRoutingStrategy) -> bool {
494 *s == LlmRoutingStrategy::None
495}
496
497impl LlmConfig {
498 #[must_use]
500 pub fn effective_provider(&self) -> ProviderKind {
501 self.providers
502 .first()
503 .map_or(ProviderKind::Ollama, |e| e.provider_type)
504 }
505
506 #[must_use]
508 pub fn effective_base_url(&self) -> &str {
509 self.providers
510 .first()
511 .and_then(|e| e.base_url.as_deref())
512 .unwrap_or("http://localhost:11434")
513 }
514
515 #[must_use]
521 pub fn effective_model(&self) -> &str {
522 self.providers
523 .iter()
524 .find(|e| !e.embed)
525 .and_then(|e| e.model.as_deref())
526 .unwrap_or("qwen3:8b")
527 }
528
529 #[must_use]
537 pub fn stt_provider_entry(&self) -> Option<&ProviderEntry> {
538 let name_hint = self.stt.as_ref().map_or("", |s| s.provider.as_str());
539 if name_hint.is_empty() {
540 self.providers.iter().find(|p| p.stt_model.is_some())
541 } else {
542 self.providers
543 .iter()
544 .find(|p| p.effective_name() == name_hint && p.stt_model.is_some())
545 }
546 }
547
548 pub fn check_legacy_format(&self) -> Result<(), crate::error::ConfigError> {
554 Ok(())
555 }
556
557 pub fn validate_stt(&self) -> Result<(), crate::error::ConfigError> {
563 use crate::error::ConfigError;
564
565 let Some(stt) = &self.stt else {
566 return Ok(());
567 };
568 if stt.provider.is_empty() {
569 return Ok(());
570 }
571 let found = self
572 .providers
573 .iter()
574 .find(|p| p.effective_name() == stt.provider);
575 match found {
576 None => {
577 return Err(ConfigError::Validation(format!(
578 "[llm.stt].provider = {:?} does not match any [[llm.providers]] entry",
579 stt.provider
580 )));
581 }
582 Some(entry) if entry.stt_model.is_none() => {
583 tracing::warn!(
584 provider = stt.provider,
585 "[[llm.providers]] entry exists but has no `stt_model` — STT will not be activated"
586 );
587 }
588 _ => {}
589 }
590 Ok(())
591 }
592
593 pub fn warn_non_fast_tier_provider(
617 &self,
618 provider_name: &ProviderName,
619 feature_label: &str,
620 extra_allowlist: &[String],
621 ) {
622 if provider_name.is_empty() {
623 return;
624 }
625 let name = provider_name.as_str();
626 let Some(entry) = self.providers.iter().find(|p| p.effective_name() == name) else {
627 tracing::warn!(
628 provider = name,
629 "{feature_label} provider '{name}' not found in [[llm.providers]]"
630 );
631 return;
632 };
633 let model = entry.model.as_deref().unwrap_or("");
634 if model.is_empty() {
635 return;
636 }
637 let lower = model.to_lowercase();
638 let in_hints = FAST_TIER_MODEL_HINTS.iter().any(|h| lower.contains(h));
639 let in_extra = extra_allowlist.iter().any(|h| lower.contains(h.as_str()));
640 if !in_hints && !in_extra {
641 tracing::warn!(
642 provider = name,
643 actual = model,
644 "{feature_label} provider '{name}' uses model '{model}' \
645 which may not be fast-tier; prefer a fast model to bound distillation cost"
646 );
647 }
648 }
649}
650
651pub const FAST_TIER_MODEL_HINTS: &[&str] = &[
656 "gpt-4o-mini",
657 "gpt-4.1-mini",
658 "gpt-5-mini",
659 "gpt-5-nano",
660 "claude-haiku",
661 "claude-3-haiku",
662 "claude-3-5-haiku",
663 "qwen3:8b",
664 "qwen2.5:7b",
665 "qwen2:7b",
666 "llama3.2:3b",
667 "llama3.1:8b",
668 "gemma3:4b",
669 "gemma3:8b",
670 "phi4:mini",
671 "mistral:7b",
672];
673
674#[derive(Debug, Clone, Deserialize, Serialize)]
687pub struct SttConfig {
688 #[serde(default = "default_stt_provider")]
691 pub provider: String,
692 #[serde(default = "default_stt_language")]
694 pub language: String,
695}
696
697#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
699#[serde(rename_all = "lowercase")]
700pub enum RouterStrategyConfig {
701 #[default]
703 Ema,
704 Thompson,
706 Cascade,
708 Bandit,
710}
711
712#[derive(Debug, Clone, Deserialize, Serialize)]
725pub struct AsiConfig {
726 #[serde(default)]
728 pub enabled: bool,
729
730 #[serde(default = "default_asi_window")]
732 pub window: usize,
733
734 #[serde(default = "default_asi_coherence_threshold")]
736 pub coherence_threshold: f32,
737
738 #[serde(default = "default_asi_penalty_weight")]
743 pub penalty_weight: f32,
744}
745
746fn default_asi_window() -> usize {
747 5
748}
749
750fn default_asi_coherence_threshold() -> f32 {
751 0.7
752}
753
754fn default_asi_penalty_weight() -> f32 {
755 0.3
756}
757
758impl Default for AsiConfig {
759 fn default() -> Self {
760 Self {
761 enabled: false,
762 window: default_asi_window(),
763 coherence_threshold: default_asi_coherence_threshold(),
764 penalty_weight: default_asi_penalty_weight(),
765 }
766 }
767}
768
769#[derive(Debug, Clone, Deserialize, Serialize)]
771pub struct RouterConfig {
772 #[serde(default)]
774 pub strategy: RouterStrategyConfig,
775 #[serde(default)]
783 pub thompson_state_path: Option<String>,
784 #[serde(default)]
786 pub cascade: Option<CascadeConfig>,
787 #[serde(default)]
789 pub reputation: Option<ReputationConfig>,
790 #[serde(default)]
792 pub bandit: Option<BanditConfig>,
793 #[serde(default)]
802 pub quality_gate: Option<f32>,
803 #[serde(default)]
805 pub asi: Option<AsiConfig>,
806 #[serde(default = "default_embed_concurrency")]
812 pub embed_concurrency: usize,
813}
814
815fn default_embed_concurrency() -> usize {
816 4
817}
818
819#[derive(Debug, Clone, Deserialize, Serialize)]
826pub struct ReputationConfig {
827 #[serde(default)]
829 pub enabled: bool,
830 #[serde(default = "default_reputation_decay_factor")]
833 pub decay_factor: f64,
834 #[serde(default = "default_reputation_weight")]
841 pub weight: f64,
842 #[serde(default = "default_reputation_min_observations")]
844 pub min_observations: u64,
845 #[serde(default)]
847 pub state_path: Option<String>,
848}
849
850#[derive(Debug, Clone, Deserialize, Serialize)]
861pub struct CascadeConfig {
862 #[serde(default = "default_cascade_quality_threshold")]
865 pub quality_threshold: f64,
866
867 #[serde(default = "default_cascade_max_escalations")]
871 pub max_escalations: u8,
872
873 #[serde(default)]
877 pub classifier_mode: CascadeClassifierMode,
878
879 #[serde(default = "default_cascade_window_size")]
881 pub window_size: usize,
882
883 #[serde(default)]
887 pub max_cascade_tokens: Option<u32>,
888
889 #[serde(default, skip_serializing_if = "Option::is_none")]
894 pub cost_tiers: Option<Vec<String>>,
895}
896
897impl Default for CascadeConfig {
898 fn default() -> Self {
899 Self {
900 quality_threshold: default_cascade_quality_threshold(),
901 max_escalations: default_cascade_max_escalations(),
902 classifier_mode: CascadeClassifierMode::default(),
903 window_size: default_cascade_window_size(),
904 max_cascade_tokens: None,
905 cost_tiers: None,
906 }
907 }
908}
909
910#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
912#[serde(rename_all = "lowercase")]
913pub enum CascadeClassifierMode {
914 #[default]
917 Heuristic,
918 Judge,
921}
922
923fn default_bandit_alpha() -> f32 {
924 1.0
925}
926
927fn default_bandit_dim() -> usize {
928 32
929}
930
931fn default_bandit_cost_weight() -> f32 {
932 0.1
933}
934
935fn default_bandit_decay_factor() -> f32 {
936 1.0
937}
938
939fn default_bandit_embedding_timeout_ms() -> u64 {
940 50
941}
942
943fn default_bandit_cache_size() -> usize {
944 512
945}
946
947#[derive(Debug, Clone, Deserialize, Serialize)]
960pub struct BanditConfig {
961 #[serde(default = "default_bandit_alpha")]
964 pub alpha: f32,
965
966 #[serde(default = "default_bandit_dim")]
973 pub dim: usize,
974
975 #[serde(default = "default_bandit_cost_weight")]
978 pub cost_weight: f32,
979
980 #[serde(default = "default_bandit_decay_factor")]
983 pub decay_factor: f32,
984
985 #[serde(default)]
991 pub embedding_provider: ProviderName,
992
993 #[serde(default = "default_bandit_embedding_timeout_ms")]
996 pub embedding_timeout_ms: u64,
997
998 #[serde(default = "default_bandit_cache_size")]
1000 pub cache_size: usize,
1001
1002 #[serde(default)]
1009 pub state_path: Option<String>,
1010
1011 #[serde(default = "default_bandit_memory_confidence_threshold")]
1017 pub memory_confidence_threshold: f32,
1018
1019 #[serde(default)]
1025 pub warmup_queries: Option<u64>,
1026}
1027
1028fn default_bandit_memory_confidence_threshold() -> f32 {
1029 0.9
1030}
1031
1032impl Default for BanditConfig {
1033 fn default() -> Self {
1034 Self {
1035 alpha: default_bandit_alpha(),
1036 dim: default_bandit_dim(),
1037 cost_weight: default_bandit_cost_weight(),
1038 decay_factor: default_bandit_decay_factor(),
1039 embedding_provider: ProviderName::default(),
1040 embedding_timeout_ms: default_bandit_embedding_timeout_ms(),
1041 cache_size: default_bandit_cache_size(),
1042 state_path: None,
1043 memory_confidence_threshold: default_bandit_memory_confidence_threshold(),
1044 warmup_queries: None,
1045 }
1046 }
1047}
1048
1049#[derive(Debug, Deserialize, Serialize)]
1050pub struct CandleConfig {
1051 #[serde(default = "default_candle_source")]
1052 pub source: String,
1053 #[serde(default)]
1054 pub local_path: String,
1055 #[serde(default)]
1056 pub filename: Option<String>,
1057 #[serde(default = "default_chat_template")]
1058 pub chat_template: String,
1059 #[serde(default = "default_candle_device")]
1060 pub device: String,
1061 #[serde(default)]
1062 pub embedding_repo: Option<String>,
1063 #[serde(default)]
1067 pub hf_token: Option<String>,
1068 #[serde(default)]
1069 pub generation: GenerationParams,
1070 #[serde(default = "default_inference_timeout_secs")]
1079 pub inference_timeout_secs: u64,
1080}
1081
1082fn default_inference_timeout_secs() -> u64 {
1083 120
1084}
1085
1086#[derive(Debug, Clone, Deserialize, Serialize)]
1090pub struct GenerationParams {
1091 #[serde(default = "default_temperature")]
1093 pub temperature: f64,
1094 #[serde(default)]
1097 pub top_p: Option<f64>,
1098 #[serde(default)]
1101 pub top_k: Option<usize>,
1102 #[serde(default = "default_max_tokens")]
1105 pub max_tokens: usize,
1106 #[serde(default = "default_seed")]
1108 pub seed: u64,
1109 #[serde(default = "default_repeat_penalty")]
1111 pub repeat_penalty: f32,
1112 #[serde(default = "default_repeat_last_n")]
1114 pub repeat_last_n: usize,
1115}
1116
1117pub const MAX_TOKENS_CAP: usize = 32768;
1119
1120impl GenerationParams {
1121 #[must_use]
1132 pub fn capped_max_tokens(&self) -> usize {
1133 self.max_tokens.min(MAX_TOKENS_CAP)
1134 }
1135}
1136
1137impl Default for GenerationParams {
1138 fn default() -> Self {
1139 Self {
1140 temperature: default_temperature(),
1141 top_p: None,
1142 top_k: None,
1143 max_tokens: default_max_tokens(),
1144 seed: default_seed(),
1145 repeat_penalty: default_repeat_penalty(),
1146 repeat_last_n: default_repeat_last_n(),
1147 }
1148 }
1149}
1150
1151#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
1155#[serde(rename_all = "lowercase")]
1156pub enum LlmRoutingStrategy {
1157 #[default]
1159 None,
1160 Ema,
1162 Thompson,
1164 Cascade,
1166 Triage,
1168 Bandit,
1170}
1171
1172fn default_triage_timeout_secs() -> u64 {
1173 5
1174}
1175
1176fn default_max_triage_tokens() -> u32 {
1177 50
1178}
1179
1180fn default_true() -> bool {
1181 true
1182}
1183
1184#[allow(clippy::trivially_copy_pass_by_ref)]
1185fn is_true(v: &bool) -> bool {
1186 *v
1187}
1188
1189#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1191pub struct TierMapping {
1192 pub simple: Option<String>,
1193 pub medium: Option<String>,
1194 pub complex: Option<String>,
1195 pub expert: Option<String>,
1196}
1197
1198#[derive(Debug, Clone, Deserialize, Serialize)]
1219pub struct ComplexityRoutingConfig {
1220 #[serde(default)]
1222 pub triage_provider: Option<ProviderName>,
1223
1224 #[serde(default = "default_true")]
1226 pub bypass_single_provider: bool,
1227
1228 #[serde(default)]
1230 pub tiers: TierMapping,
1231
1232 #[serde(default = "default_max_triage_tokens")]
1234 pub max_triage_tokens: u32,
1235
1236 #[serde(default = "default_triage_timeout_secs")]
1239 pub triage_timeout_secs: u64,
1240
1241 #[serde(default)]
1244 pub fallback_strategy: Option<String>,
1245}
1246
1247impl Default for ComplexityRoutingConfig {
1248 fn default() -> Self {
1249 Self {
1250 triage_provider: None,
1251 bypass_single_provider: true,
1252 tiers: TierMapping::default(),
1253 max_triage_tokens: default_max_triage_tokens(),
1254 triage_timeout_secs: default_triage_timeout_secs(),
1255 fallback_strategy: None,
1256 }
1257 }
1258}
1259
1260#[derive(Debug, Clone, Deserialize, Serialize)]
1278#[serde(default)]
1279pub struct CoeConfig {
1280 pub enabled: bool,
1282 pub intra_threshold: f64,
1284 pub inter_threshold: f64,
1286 pub shadow_sample_rate: f64,
1288 pub secondary_provider: ProviderName,
1290 pub embed_provider: ProviderName,
1292}
1293
1294impl Default for CoeConfig {
1295 fn default() -> Self {
1296 Self {
1297 enabled: false,
1298 intra_threshold: 0.8,
1299 inter_threshold: 0.20,
1300 shadow_sample_rate: 0.1,
1301 secondary_provider: ProviderName::default(),
1302 embed_provider: ProviderName::default(),
1303 }
1304 }
1305}
1306
1307#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
1312pub struct GonkaNode {
1313 pub url: String,
1315 pub address: String,
1320 #[serde(default, skip_serializing_if = "Option::is_none")]
1322 pub name: Option<String>,
1323}
1324
1325#[derive(Debug, Clone, Deserialize, Serialize)]
1328pub struct CandleInlineConfig {
1329 #[serde(default = "default_candle_source")]
1330 pub source: String,
1331 #[serde(default)]
1332 pub local_path: String,
1333 #[serde(default)]
1334 pub filename: Option<String>,
1335 #[serde(default = "default_chat_template")]
1336 pub chat_template: String,
1337 #[serde(default = "default_candle_device")]
1338 pub device: String,
1339 #[serde(default)]
1340 pub embedding_repo: Option<String>,
1341 #[serde(default)]
1343 pub hf_token: Option<String>,
1344 #[serde(default)]
1345 pub generation: GenerationParams,
1346 #[serde(default = "default_inference_timeout_secs")]
1351 pub inference_timeout_secs: u64,
1352}
1353
1354impl Default for CandleInlineConfig {
1355 fn default() -> Self {
1356 Self {
1357 source: default_candle_source(),
1358 local_path: String::new(),
1359 filename: None,
1360 chat_template: default_chat_template(),
1361 device: default_candle_device(),
1362 embedding_repo: None,
1363 hf_token: None,
1364 generation: GenerationParams::default(),
1365 inference_timeout_secs: default_inference_timeout_secs(),
1366 }
1367 }
1368}
1369
1370#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
1379pub struct CocoonPricing {
1380 #[serde(default)]
1382 pub prompt_cents_per_1k: f64,
1383 #[serde(default)]
1386 pub completion_cents_per_1k: f64,
1387}
1388
1389#[derive(Debug, Clone, Deserialize, Serialize)]
1395#[allow(clippy::struct_excessive_bools)] pub struct ProviderEntry {
1397 #[serde(rename = "type")]
1399 pub provider_type: ProviderKind,
1400
1401 #[serde(default)]
1403 pub name: Option<String>,
1404
1405 #[serde(default)]
1407 pub model: Option<String>,
1408
1409 #[serde(default)]
1411 pub base_url: Option<String>,
1412
1413 #[serde(default)]
1415 pub max_tokens: Option<u32>,
1416
1417 #[serde(default)]
1419 pub embedding_model: Option<String>,
1420
1421 #[serde(default)]
1424 pub stt_model: Option<String>,
1425
1426 #[serde(default)]
1428 pub embed: bool,
1429
1430 #[serde(default)]
1432 pub default: bool,
1433
1434 #[serde(default)]
1436 pub thinking: Option<ThinkingConfig>,
1437 #[serde(default)]
1438 pub server_compaction: bool,
1439 #[serde(default)]
1440 pub enable_extended_context: bool,
1441 #[serde(default)]
1444 pub prompt_cache_ttl: Option<CacheTtl>,
1445
1446 #[serde(default)]
1448 pub reasoning_effort: Option<String>,
1449
1450 #[serde(default)]
1452 pub thinking_level: Option<GeminiThinkingLevel>,
1453 #[serde(default)]
1454 pub thinking_budget: Option<i32>,
1455 #[serde(default)]
1456 pub include_thoughts: Option<bool>,
1457
1458 #[serde(default)]
1460 pub api_key: Option<String>,
1461
1462 #[serde(default)]
1464 pub candle: Option<CandleInlineConfig>,
1465
1466 #[serde(default)]
1468 pub vision_model: Option<String>,
1469
1470 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1473 pub gonka_nodes: Vec<GonkaNode>,
1474 #[serde(default, skip_serializing_if = "Option::is_none")]
1476 pub gonka_chain_prefix: Option<String>,
1477
1478 #[serde(default, skip_serializing_if = "Option::is_none")]
1481 pub cocoon_client_url: Option<String>,
1482 #[serde(default, skip_serializing_if = "Option::is_none")]
1485 pub cocoon_access_hash: Option<String>,
1486 #[serde(default = "default_true", skip_serializing_if = "is_true")]
1488 pub cocoon_health_check: bool,
1489 #[serde(default, skip_serializing_if = "Option::is_none")]
1502 pub cocoon_pricing: Option<CocoonPricing>,
1503
1504 #[serde(default)]
1506 pub instruction_file: Option<std::path::PathBuf>,
1507
1508 #[serde(default, skip_serializing_if = "Option::is_none")]
1526 pub max_concurrent: Option<u32>,
1527}
1528
1529impl Default for ProviderEntry {
1530 fn default() -> Self {
1531 Self {
1532 provider_type: ProviderKind::Ollama,
1533 name: None,
1534 model: None,
1535 base_url: None,
1536 max_tokens: None,
1537 embedding_model: None,
1538 stt_model: None,
1539 embed: false,
1540 default: false,
1541 thinking: None,
1542 server_compaction: false,
1543 enable_extended_context: false,
1544 prompt_cache_ttl: None,
1545 reasoning_effort: None,
1546 thinking_level: None,
1547 thinking_budget: None,
1548 include_thoughts: None,
1549 api_key: None,
1550 candle: None,
1551 vision_model: None,
1552 gonka_nodes: Vec::new(),
1553 gonka_chain_prefix: None,
1554 cocoon_client_url: None,
1555 cocoon_access_hash: None,
1556 cocoon_health_check: true,
1557 cocoon_pricing: None,
1558 instruction_file: None,
1559 max_concurrent: None,
1560 }
1561 }
1562}
1563
1564impl ProviderEntry {
1565 #[must_use]
1567 pub fn effective_name(&self) -> String {
1568 self.name
1569 .clone()
1570 .unwrap_or_else(|| self.provider_type.as_str().to_owned())
1571 }
1572
1573 #[must_use]
1578 pub fn effective_model(&self) -> String {
1579 if let Some(ref m) = self.model {
1580 return m.clone();
1581 }
1582 match self.provider_type {
1583 ProviderKind::Ollama => "qwen3:8b".to_owned(),
1584 ProviderKind::Claude => "claude-haiku-4-5-20251001".to_owned(),
1585 ProviderKind::OpenAi => "gpt-4o-mini".to_owned(),
1586 ProviderKind::Gemini => "gemini-2.0-flash".to_owned(),
1587 ProviderKind::Compatible | ProviderKind::Candle | ProviderKind::Gonka => String::new(),
1590 ProviderKind::Cocoon => "Qwen/Qwen3-0.6B".to_owned(),
1591 }
1592 }
1593
1594 pub fn validate(&self) -> Result<(), crate::error::ConfigError> {
1601 use crate::error::ConfigError;
1602
1603 if self.provider_type == ProviderKind::Compatible && self.name.is_none() {
1605 return Err(ConfigError::Validation(
1606 "[[llm.providers]] entry with type=\"compatible\" must set `name`".into(),
1607 ));
1608 }
1609
1610 if self.provider_type == ProviderKind::Gonka {
1612 if self.name.is_none() {
1613 return Err(ConfigError::Validation(
1614 "[[llm.providers]] entry with type=\"gonka\" must set `name`".into(),
1615 ));
1616 }
1617 self.validate_gonka_nodes()?;
1618 }
1619
1620 if self.provider_type == ProviderKind::Cocoon
1622 && self.name.as_ref().is_none_or(String::is_empty)
1623 {
1624 return Err(ConfigError::Validation(
1625 "[[llm.providers]] entry with type=\"cocoon\" must set `name`".into(),
1626 ));
1627 }
1628
1629 if self.provider_type == ProviderKind::Cocoon {
1631 let name = self.effective_name();
1632 if let Some(ref url_str) = self.cocoon_client_url {
1633 match url::Url::parse(url_str) {
1634 Err(_) => {
1635 return Err(ConfigError::Validation(format!(
1636 "[[llm.providers]] entry '{name}': cocoon_client_url \
1637 '{url_str}' is not a valid URL; expected format: \
1638 http://localhost:10000"
1639 )));
1640 }
1641 Ok(u) if !matches!(u.host_str(), Some("localhost" | "127.0.0.1" | "::1")) => {
1642 return Err(ConfigError::Validation(format!(
1643 "[[llm.providers]] entry '{name}': cocoon_client_url host must be \
1644 localhost or 127.0.0.1, got '{}'",
1645 u.host_str().unwrap_or("<none>")
1646 )));
1647 }
1648 Ok(u) if u.scheme() != "http" && u.scheme() != "https" => {
1649 return Err(ConfigError::Validation(format!(
1650 "[[llm.providers]] entry '{name}': cocoon_client_url \
1651 scheme must be http or https, got '{}'",
1652 u.scheme()
1653 )));
1654 }
1655 _ => {}
1656 }
1657 }
1658 if self.model.as_deref().is_some_and(|m| m.trim().is_empty()) {
1659 return Err(ConfigError::Validation(format!(
1660 "[[llm.providers]] entry '{name}': model must not be empty \
1661 for cocoon provider"
1662 )));
1663 }
1664 if let Some(ref p) = self.cocoon_pricing {
1665 if !p.prompt_cents_per_1k.is_finite() || p.prompt_cents_per_1k < 0.0 {
1666 return Err(ConfigError::Validation(format!(
1667 "[[llm.providers]] entry '{name}': cocoon_pricing.prompt_cents_per_1k \
1668 must be a finite non-negative number"
1669 )));
1670 }
1671 if !p.completion_cents_per_1k.is_finite() || p.completion_cents_per_1k < 0.0 {
1672 return Err(ConfigError::Validation(format!(
1673 "[[llm.providers]] entry '{name}': \
1674 cocoon_pricing.completion_cents_per_1k \
1675 must be a finite non-negative number"
1676 )));
1677 }
1678 }
1679 }
1680
1681 self.warn_irrelevant_fields();
1683
1684 if self.stt_model.is_some() && self.provider_type == ProviderKind::Ollama {
1687 tracing::warn!(
1688 provider = self.effective_name(),
1689 "field `stt_model` is set on an Ollama provider; Ollama does not support the \
1690 Whisper STT API — use OpenAI, compatible, or candle instead"
1691 );
1692 }
1693
1694 Ok(())
1695 }
1696
1697 #[must_use]
1699 pub fn effective_gonka_chain_prefix(&self) -> &str {
1700 self.gonka_chain_prefix.as_deref().unwrap_or("gonka")
1701 }
1702
1703 fn warn_irrelevant_fields(&self) {
1704 let name = self.effective_name();
1705 match self.provider_type {
1706 ProviderKind::Ollama => {
1707 if self.thinking.is_some() {
1708 tracing::warn!(
1709 provider = name,
1710 "field `thinking` is only used by Claude providers"
1711 );
1712 }
1713 if self.reasoning_effort.is_some() {
1714 tracing::warn!(
1715 provider = name,
1716 "field `reasoning_effort` is only used by OpenAI providers"
1717 );
1718 }
1719 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1720 tracing::warn!(
1721 provider = name,
1722 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1723 );
1724 }
1725 }
1726 ProviderKind::Claude => {
1727 if self.reasoning_effort.is_some() {
1728 tracing::warn!(
1729 provider = name,
1730 "field `reasoning_effort` is only used by OpenAI providers"
1731 );
1732 }
1733 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1734 tracing::warn!(
1735 provider = name,
1736 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1737 );
1738 }
1739 }
1740 ProviderKind::OpenAi => {
1741 if self.thinking.is_some() {
1742 tracing::warn!(
1743 provider = name,
1744 "field `thinking` is only used by Claude providers"
1745 );
1746 }
1747 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1748 tracing::warn!(
1749 provider = name,
1750 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1751 );
1752 }
1753 }
1754 ProviderKind::Gemini => {
1755 if self.thinking.is_some() {
1756 tracing::warn!(
1757 provider = name,
1758 "field `thinking` is only used by Claude providers"
1759 );
1760 }
1761 if self.reasoning_effort.is_some() {
1762 tracing::warn!(
1763 provider = name,
1764 "field `reasoning_effort` is only used by OpenAI providers"
1765 );
1766 }
1767 }
1768 ProviderKind::Gonka => {
1769 if self.thinking.is_some() {
1770 tracing::warn!(
1771 provider = name,
1772 "field `thinking` is only used by Claude providers"
1773 );
1774 }
1775 if self.reasoning_effort.is_some() {
1776 tracing::warn!(
1777 provider = name,
1778 "field `reasoning_effort` is only used by OpenAI providers"
1779 );
1780 }
1781 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1782 tracing::warn!(
1783 provider = name,
1784 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1785 );
1786 }
1787 }
1788 ProviderKind::Compatible | ProviderKind::Candle => {}
1789 ProviderKind::Cocoon => {
1790 if self.base_url.is_some() {
1791 tracing::warn!(
1792 provider = name,
1793 "field `base_url` is ignored for cocoon providers; use `cocoon_client_url` instead"
1794 );
1795 }
1796 }
1797 }
1798 }
1799
1800 fn validate_gonka_nodes(&self) -> Result<(), crate::error::ConfigError> {
1801 use crate::error::ConfigError;
1802 if self.gonka_nodes.is_empty() {
1803 return Err(ConfigError::Validation(format!(
1804 "[[llm.providers]] entry '{}' with type=\"gonka\" must set non-empty `gonka_nodes`",
1805 self.effective_name()
1806 )));
1807 }
1808 for (i, node) in self.gonka_nodes.iter().enumerate() {
1809 if node.url.is_empty() {
1810 return Err(ConfigError::Validation(format!(
1811 "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must not be empty",
1812 self.effective_name()
1813 )));
1814 }
1815 if !node.url.starts_with("http://") && !node.url.starts_with("https://") {
1816 return Err(ConfigError::Validation(format!(
1817 "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must start with http:// or https://",
1818 self.effective_name()
1819 )));
1820 }
1821 }
1822 Ok(())
1823 }
1824}
1825
1826pub fn validate_pool(entries: &[ProviderEntry]) -> Result<(), crate::error::ConfigError> {
1836 use crate::error::ConfigError;
1837 use std::collections::HashSet;
1838
1839 if entries.is_empty() {
1840 return Err(ConfigError::Validation(
1841 "at least one LLM provider must be configured in [[llm.providers]]".into(),
1842 ));
1843 }
1844
1845 let default_count = entries.iter().filter(|e| e.default).count();
1846 if default_count > 1 {
1847 return Err(ConfigError::Validation(
1848 "only one [[llm.providers]] entry can be marked `default = true`".into(),
1849 ));
1850 }
1851
1852 let mut seen_names: HashSet<String> = HashSet::new();
1853 for entry in entries {
1854 let name = entry.effective_name();
1855 if !seen_names.insert(name.clone()) {
1856 return Err(ConfigError::Validation(format!(
1857 "duplicate provider name \"{name}\" in [[llm.providers]]"
1858 )));
1859 }
1860 entry.validate()?;
1861 }
1862
1863 Ok(())
1864}
1865
1866#[cfg(test)]
1867mod tests {
1868 use super::*;
1869
1870 fn ollama_entry() -> ProviderEntry {
1871 ProviderEntry {
1872 provider_type: ProviderKind::Ollama,
1873 name: Some("ollama".into()),
1874 model: Some("qwen3:8b".into()),
1875 ..Default::default()
1876 }
1877 }
1878
1879 fn claude_entry() -> ProviderEntry {
1880 ProviderEntry {
1881 provider_type: ProviderKind::Claude,
1882 name: Some("claude".into()),
1883 model: Some("claude-sonnet-4-6".into()),
1884 max_tokens: Some(8192),
1885 ..Default::default()
1886 }
1887 }
1888
1889 #[test]
1892 fn validate_ollama_valid() {
1893 assert!(ollama_entry().validate().is_ok());
1894 }
1895
1896 #[test]
1897 fn validate_claude_valid() {
1898 assert!(claude_entry().validate().is_ok());
1899 }
1900
1901 #[test]
1902 fn validate_compatible_without_name_errors() {
1903 let entry = ProviderEntry {
1904 provider_type: ProviderKind::Compatible,
1905 name: None,
1906 ..Default::default()
1907 };
1908 let err = entry.validate().unwrap_err();
1909 assert!(
1910 err.to_string().contains("compatible"),
1911 "error should mention compatible: {err}"
1912 );
1913 }
1914
1915 #[test]
1916 fn validate_compatible_with_name_ok() {
1917 let entry = ProviderEntry {
1918 provider_type: ProviderKind::Compatible,
1919 name: Some("my-proxy".into()),
1920 base_url: Some("http://localhost:8080".into()),
1921 model: Some("gpt-4o".into()),
1922 max_tokens: Some(4096),
1923 ..Default::default()
1924 };
1925 assert!(entry.validate().is_ok());
1926 }
1927
1928 #[test]
1929 fn validate_openai_valid() {
1930 let entry = ProviderEntry {
1931 provider_type: ProviderKind::OpenAi,
1932 name: Some("openai".into()),
1933 model: Some("gpt-4o".into()),
1934 max_tokens: Some(4096),
1935 ..Default::default()
1936 };
1937 assert!(entry.validate().is_ok());
1938 }
1939
1940 #[test]
1941 fn validate_gemini_valid() {
1942 let entry = ProviderEntry {
1943 provider_type: ProviderKind::Gemini,
1944 name: Some("gemini".into()),
1945 model: Some("gemini-2.0-flash".into()),
1946 ..Default::default()
1947 };
1948 assert!(entry.validate().is_ok());
1949 }
1950
1951 #[test]
1954 fn validate_pool_empty_errors() {
1955 let err = validate_pool(&[]).unwrap_err();
1956 assert!(err.to_string().contains("at least one"), "{err}");
1957 }
1958
1959 #[test]
1960 fn validate_pool_single_entry_ok() {
1961 assert!(validate_pool(&[ollama_entry()]).is_ok());
1962 }
1963
1964 #[test]
1965 fn validate_pool_duplicate_names_errors() {
1966 let a = ollama_entry();
1967 let b = ollama_entry(); let err = validate_pool(&[a, b]).unwrap_err();
1969 assert!(err.to_string().contains("duplicate"), "{err}");
1970 }
1971
1972 #[test]
1973 fn validate_pool_multiple_defaults_errors() {
1974 let mut a = ollama_entry();
1975 let mut b = claude_entry();
1976 a.default = true;
1977 b.default = true;
1978 let err = validate_pool(&[a, b]).unwrap_err();
1979 assert!(err.to_string().contains("default"), "{err}");
1980 }
1981
1982 #[test]
1983 fn validate_pool_two_different_providers_ok() {
1984 assert!(validate_pool(&[ollama_entry(), claude_entry()]).is_ok());
1985 }
1986
1987 #[test]
1988 fn validate_pool_propagates_entry_error() {
1989 let bad = ProviderEntry {
1990 provider_type: ProviderKind::Compatible,
1991 name: None, ..Default::default()
1993 };
1994 assert!(validate_pool(&[bad]).is_err());
1995 }
1996
1997 #[test]
2000 fn effective_model_returns_explicit_when_set() {
2001 let entry = ProviderEntry {
2002 provider_type: ProviderKind::Claude,
2003 model: Some("claude-sonnet-4-6".into()),
2004 ..Default::default()
2005 };
2006 assert_eq!(entry.effective_model(), "claude-sonnet-4-6");
2007 }
2008
2009 #[test]
2010 fn effective_model_ollama_default_when_none() {
2011 let entry = ProviderEntry {
2012 provider_type: ProviderKind::Ollama,
2013 model: None,
2014 ..Default::default()
2015 };
2016 assert_eq!(entry.effective_model(), "qwen3:8b");
2017 }
2018
2019 #[test]
2020 fn effective_model_claude_default_when_none() {
2021 let entry = ProviderEntry {
2022 provider_type: ProviderKind::Claude,
2023 model: None,
2024 ..Default::default()
2025 };
2026 assert_eq!(entry.effective_model(), "claude-haiku-4-5-20251001");
2027 }
2028
2029 #[test]
2030 fn effective_model_openai_default_when_none() {
2031 let entry = ProviderEntry {
2032 provider_type: ProviderKind::OpenAi,
2033 model: None,
2034 ..Default::default()
2035 };
2036 assert_eq!(entry.effective_model(), "gpt-4o-mini");
2037 }
2038
2039 #[test]
2040 fn effective_model_gemini_default_when_none() {
2041 let entry = ProviderEntry {
2042 provider_type: ProviderKind::Gemini,
2043 model: None,
2044 ..Default::default()
2045 };
2046 assert_eq!(entry.effective_model(), "gemini-2.0-flash");
2047 }
2048
2049 fn parse_llm(toml: &str) -> LlmConfig {
2053 #[derive(serde::Deserialize)]
2054 struct Wrapper {
2055 llm: LlmConfig,
2056 }
2057 toml::from_str::<Wrapper>(toml).unwrap().llm
2058 }
2059
2060 #[test]
2061 fn check_legacy_format_new_format_ok() {
2062 let cfg = parse_llm(
2063 r#"
2064[llm]
2065
2066[[llm.providers]]
2067type = "ollama"
2068model = "qwen3:8b"
2069"#,
2070 );
2071 assert!(cfg.check_legacy_format().is_ok());
2072 }
2073
2074 #[test]
2075 fn check_legacy_format_empty_providers_no_legacy_ok() {
2076 let cfg = parse_llm("[llm]\n");
2078 assert!(cfg.check_legacy_format().is_ok());
2079 }
2080
2081 #[test]
2084 fn effective_provider_falls_back_to_ollama_when_no_providers() {
2085 let cfg = parse_llm("[llm]\n");
2086 assert_eq!(cfg.effective_provider(), ProviderKind::Ollama);
2087 }
2088
2089 #[test]
2090 fn effective_provider_reads_from_providers_first() {
2091 let cfg = parse_llm(
2092 r#"
2093[llm]
2094
2095[[llm.providers]]
2096type = "claude"
2097model = "claude-sonnet-4-6"
2098"#,
2099 );
2100 assert_eq!(cfg.effective_provider(), ProviderKind::Claude);
2101 }
2102
2103 #[test]
2104 fn effective_model_reads_from_providers_first() {
2105 let cfg = parse_llm(
2106 r#"
2107[llm]
2108
2109[[llm.providers]]
2110type = "ollama"
2111model = "qwen3:8b"
2112"#,
2113 );
2114 assert_eq!(cfg.effective_model(), "qwen3:8b");
2115 }
2116
2117 #[test]
2118 fn effective_model_skips_embed_only_provider() {
2119 let cfg = parse_llm(
2120 r#"
2121[llm]
2122
2123[[llm.providers]]
2124type = "ollama"
2125model = "gemma4:26b"
2126embed = true
2127
2128[[llm.providers]]
2129type = "openai"
2130model = "gpt-4o-mini"
2131"#,
2132 );
2133 assert_eq!(cfg.effective_model(), "gpt-4o-mini");
2134 }
2135
2136 #[test]
2137 fn effective_base_url_default_when_absent() {
2138 let cfg = parse_llm("[llm]\n");
2139 assert_eq!(cfg.effective_base_url(), "http://localhost:11434");
2140 }
2141
2142 #[test]
2143 fn effective_base_url_from_providers_entry() {
2144 let cfg = parse_llm(
2145 r#"
2146[llm]
2147
2148[[llm.providers]]
2149type = "ollama"
2150base_url = "http://myhost:11434"
2151"#,
2152 );
2153 assert_eq!(cfg.effective_base_url(), "http://myhost:11434");
2154 }
2155
2156 #[test]
2159 fn complexity_routing_defaults() {
2160 let cr = ComplexityRoutingConfig::default();
2161 assert!(
2162 cr.bypass_single_provider,
2163 "bypass_single_provider must default to true"
2164 );
2165 assert_eq!(cr.triage_timeout_secs, 5);
2166 assert_eq!(cr.max_triage_tokens, 50);
2167 assert!(cr.triage_provider.is_none());
2168 assert!(cr.tiers.simple.is_none());
2169 }
2170
2171 #[test]
2172 fn complexity_routing_toml_round_trip() {
2173 let cfg = parse_llm(
2174 r#"
2175[llm]
2176routing = "triage"
2177
2178[llm.complexity_routing]
2179triage_provider = "fast"
2180bypass_single_provider = false
2181triage_timeout_secs = 10
2182max_triage_tokens = 100
2183
2184[llm.complexity_routing.tiers]
2185simple = "fast"
2186medium = "medium"
2187complex = "large"
2188expert = "opus"
2189"#,
2190 );
2191 assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2192 let cr = cfg
2193 .complexity_routing
2194 .expect("complexity_routing must be present");
2195 assert_eq!(cr.triage_provider.as_deref(), Some("fast"));
2196 assert!(!cr.bypass_single_provider);
2197 assert_eq!(cr.triage_timeout_secs, 10);
2198 assert_eq!(cr.max_triage_tokens, 100);
2199 assert_eq!(cr.tiers.simple.as_deref(), Some("fast"));
2200 assert_eq!(cr.tiers.medium.as_deref(), Some("medium"));
2201 assert_eq!(cr.tiers.complex.as_deref(), Some("large"));
2202 assert_eq!(cr.tiers.expert.as_deref(), Some("opus"));
2203 }
2204
2205 #[test]
2206 fn complexity_routing_partial_tiers_toml() {
2207 let cfg = parse_llm(
2209 r#"
2210[llm]
2211routing = "triage"
2212
2213[llm.complexity_routing.tiers]
2214simple = "haiku"
2215complex = "sonnet"
2216"#,
2217 );
2218 let cr = cfg
2219 .complexity_routing
2220 .expect("complexity_routing must be present");
2221 assert_eq!(cr.tiers.simple.as_deref(), Some("haiku"));
2222 assert!(cr.tiers.medium.is_none());
2223 assert_eq!(cr.tiers.complex.as_deref(), Some("sonnet"));
2224 assert!(cr.tiers.expert.is_none());
2225 assert!(cr.bypass_single_provider);
2227 assert_eq!(cr.triage_timeout_secs, 5);
2228 }
2229
2230 #[test]
2231 fn routing_strategy_triage_deserialized() {
2232 let cfg = parse_llm(
2233 r#"
2234[llm]
2235routing = "triage"
2236"#,
2237 );
2238 assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2239 }
2240
2241 #[test]
2244 fn stt_provider_entry_by_name_match() {
2245 let cfg = parse_llm(
2246 r#"
2247[llm]
2248
2249[[llm.providers]]
2250type = "openai"
2251name = "quality"
2252model = "gpt-5.4"
2253stt_model = "gpt-4o-mini-transcribe"
2254
2255[llm.stt]
2256provider = "quality"
2257"#,
2258 );
2259 let entry = cfg.stt_provider_entry().expect("should find stt provider");
2260 assert_eq!(entry.effective_name(), "quality");
2261 assert_eq!(entry.stt_model.as_deref(), Some("gpt-4o-mini-transcribe"));
2262 }
2263
2264 #[test]
2265 fn stt_provider_entry_auto_detect_when_provider_empty() {
2266 let cfg = parse_llm(
2267 r#"
2268[llm]
2269
2270[[llm.providers]]
2271type = "openai"
2272name = "openai-stt"
2273stt_model = "whisper-1"
2274
2275[llm.stt]
2276provider = ""
2277"#,
2278 );
2279 let entry = cfg.stt_provider_entry().expect("should auto-detect");
2280 assert_eq!(entry.effective_name(), "openai-stt");
2281 }
2282
2283 #[test]
2284 fn stt_provider_entry_auto_detect_no_stt_section() {
2285 let cfg = parse_llm(
2286 r#"
2287[llm]
2288
2289[[llm.providers]]
2290type = "openai"
2291name = "openai-stt"
2292stt_model = "whisper-1"
2293"#,
2294 );
2295 let entry = cfg.stt_provider_entry().expect("should auto-detect");
2297 assert_eq!(entry.effective_name(), "openai-stt");
2298 }
2299
2300 #[test]
2301 fn stt_provider_entry_none_when_no_stt_model() {
2302 let cfg = parse_llm(
2303 r#"
2304[llm]
2305
2306[[llm.providers]]
2307type = "openai"
2308name = "quality"
2309model = "gpt-5.4"
2310"#,
2311 );
2312 assert!(cfg.stt_provider_entry().is_none());
2313 }
2314
2315 #[test]
2316 fn stt_provider_entry_name_mismatch_falls_back_to_none() {
2317 let cfg = parse_llm(
2319 r#"
2320[llm]
2321
2322[[llm.providers]]
2323type = "openai"
2324name = "quality"
2325model = "gpt-5.4"
2326
2327[[llm.providers]]
2328type = "openai"
2329name = "openai-stt"
2330stt_model = "whisper-1"
2331
2332[llm.stt]
2333provider = "quality"
2334"#,
2335 );
2336 assert!(cfg.stt_provider_entry().is_none());
2338 }
2339
2340 #[test]
2341 fn stt_config_deserializes_new_slim_format() {
2342 let cfg = parse_llm(
2343 r#"
2344[llm]
2345
2346[[llm.providers]]
2347type = "openai"
2348name = "quality"
2349stt_model = "whisper-1"
2350
2351[llm.stt]
2352provider = "quality"
2353language = "en"
2354"#,
2355 );
2356 let stt = cfg.stt.as_ref().expect("stt section present");
2357 assert_eq!(stt.provider, "quality");
2358 assert_eq!(stt.language, "en");
2359 }
2360
2361 #[test]
2362 fn stt_config_default_provider_is_empty() {
2363 assert_eq!(default_stt_provider(), "");
2365 }
2366
2367 #[test]
2368 fn validate_stt_missing_provider_ok() {
2369 let cfg = parse_llm("[llm]\n");
2370 assert!(cfg.validate_stt().is_ok());
2371 }
2372
2373 #[test]
2374 fn validate_stt_valid_reference() {
2375 let cfg = parse_llm(
2376 r#"
2377[llm]
2378
2379[[llm.providers]]
2380type = "openai"
2381name = "quality"
2382stt_model = "whisper-1"
2383
2384[llm.stt]
2385provider = "quality"
2386"#,
2387 );
2388 assert!(cfg.validate_stt().is_ok());
2389 }
2390
2391 #[test]
2392 fn validate_stt_nonexistent_provider_errors() {
2393 let cfg = parse_llm(
2394 r#"
2395[llm]
2396
2397[[llm.providers]]
2398type = "openai"
2399name = "quality"
2400model = "gpt-5.4"
2401
2402[llm.stt]
2403provider = "nonexistent"
2404"#,
2405 );
2406 assert!(cfg.validate_stt().is_err());
2407 }
2408
2409 #[test]
2410 fn validate_stt_provider_exists_but_no_stt_model_returns_ok_with_warn() {
2411 let cfg = parse_llm(
2413 r#"
2414[llm]
2415
2416[[llm.providers]]
2417type = "openai"
2418name = "quality"
2419model = "gpt-5.4"
2420
2421[llm.stt]
2422provider = "quality"
2423"#,
2424 );
2425 assert!(cfg.validate_stt().is_ok());
2427 assert!(
2429 cfg.stt_provider_entry().is_none(),
2430 "stt_provider_entry must be None when provider has no stt_model"
2431 );
2432 }
2433
2434 #[test]
2437 fn bandit_warmup_queries_explicit_value_is_deserialized() {
2438 let cfg = parse_llm(
2439 r#"
2440[llm]
2441
2442[llm.router]
2443strategy = "bandit"
2444
2445[llm.router.bandit]
2446warmup_queries = 50
2447"#,
2448 );
2449 let bandit = cfg
2450 .router
2451 .expect("router section must be present")
2452 .bandit
2453 .expect("bandit section must be present");
2454 assert_eq!(
2455 bandit.warmup_queries,
2456 Some(50),
2457 "warmup_queries = 50 must deserialize to Some(50)"
2458 );
2459 }
2460
2461 #[test]
2462 fn bandit_warmup_queries_explicit_null_is_none() {
2463 let cfg = parse_llm(
2466 r#"
2467[llm]
2468
2469[llm.router]
2470strategy = "bandit"
2471
2472[llm.router.bandit]
2473warmup_queries = 0
2474"#,
2475 );
2476 let bandit = cfg
2477 .router
2478 .expect("router section must be present")
2479 .bandit
2480 .expect("bandit section must be present");
2481 assert_eq!(
2483 bandit.warmup_queries,
2484 Some(0),
2485 "warmup_queries = 0 must deserialize to Some(0)"
2486 );
2487 }
2488
2489 #[test]
2490 fn bandit_warmup_queries_missing_field_defaults_to_none() {
2491 let cfg = parse_llm(
2493 r#"
2494[llm]
2495
2496[llm.router]
2497strategy = "bandit"
2498
2499[llm.router.bandit]
2500alpha = 1.5
2501"#,
2502 );
2503 let bandit = cfg
2504 .router
2505 .expect("router section must be present")
2506 .bandit
2507 .expect("bandit section must be present");
2508 assert_eq!(
2509 bandit.warmup_queries, None,
2510 "omitted warmup_queries must default to None"
2511 );
2512 }
2513
2514 #[test]
2515 fn provider_name_new_and_as_str() {
2516 let n = ProviderName::new("fast");
2517 assert_eq!(n.as_str(), "fast");
2518 assert!(!n.is_empty());
2519 }
2520
2521 #[test]
2522 fn provider_name_default_is_empty() {
2523 let n = ProviderName::default();
2524 assert!(n.is_empty());
2525 assert_eq!(n.as_str(), "");
2526 }
2527
2528 #[test]
2529 fn provider_name_deref_to_str() {
2530 let n = ProviderName::new("quality");
2531 let s: &str = &n;
2532 assert_eq!(s, "quality");
2533 }
2534
2535 #[test]
2536 fn provider_name_partial_eq_str() {
2537 let n = ProviderName::new("fast");
2538 assert_eq!(n, "fast");
2539 assert_ne!(n, "slow");
2540 }
2541
2542 #[test]
2543 fn provider_name_serde_roundtrip() {
2544 let n = ProviderName::new("my-provider");
2545 let json = serde_json::to_string(&n).expect("serialize");
2546 assert_eq!(json, "\"my-provider\"");
2547 let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2548 assert_eq!(back, n);
2549 }
2550
2551 #[test]
2552 fn provider_name_serde_empty_roundtrip() {
2553 let n = ProviderName::default();
2554 let json = serde_json::to_string(&n).expect("serialize");
2555 assert_eq!(json, "\"\"");
2556 let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2557 assert_eq!(back, n);
2558 assert!(back.is_empty());
2559 }
2560
2561 fn gonka_entry_with_nodes(nodes: Vec<GonkaNode>) -> ProviderEntry {
2564 ProviderEntry {
2565 provider_type: ProviderKind::Gonka,
2566 name: Some("my-gonka".into()),
2567 gonka_nodes: nodes,
2568 ..Default::default()
2569 }
2570 }
2571
2572 fn valid_gonka_nodes() -> Vec<GonkaNode> {
2573 vec![
2574 GonkaNode {
2575 url: "https://node1.gonka.ai".into(),
2576 address: "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6".into(),
2577 name: Some("node1".into()),
2578 },
2579 GonkaNode {
2580 url: "https://node2.gonka.ai".into(),
2581 address: "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum".into(),
2582 name: Some("node2".into()),
2583 },
2584 GonkaNode {
2585 url: "http://node3.internal".into(),
2586 address: "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg".into(),
2587 name: None,
2588 },
2589 ]
2590 }
2591
2592 #[test]
2593 fn validate_gonka_valid() {
2594 let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2595 assert!(entry.validate().is_ok());
2596 }
2597
2598 #[test]
2599 fn validate_gonka_empty_nodes_errors() {
2600 let entry = gonka_entry_with_nodes(vec![]);
2601 let err = entry.validate().unwrap_err();
2602 assert!(
2603 err.to_string().contains("gonka_nodes"),
2604 "error should mention gonka_nodes: {err}"
2605 );
2606 }
2607
2608 #[test]
2609 fn validate_gonka_node_empty_url_errors() {
2610 let entry = gonka_entry_with_nodes(vec![GonkaNode {
2611 url: String::new(),
2612 address: "gonka1test".into(),
2613 name: None,
2614 }]);
2615 let err = entry.validate().unwrap_err();
2616 assert!(err.to_string().contains("url"), "{err}");
2617 }
2618
2619 #[test]
2620 fn validate_gonka_node_invalid_scheme_errors() {
2621 let entry = gonka_entry_with_nodes(vec![GonkaNode {
2622 url: "ftp://node.gonka.ai".into(),
2623 address: "gonka1test".into(),
2624 name: None,
2625 }]);
2626 let err = entry.validate().unwrap_err();
2627 assert!(err.to_string().contains("http"), "{err}");
2628 }
2629
2630 #[test]
2631 fn validate_gonka_without_name_errors() {
2632 let entry = ProviderEntry {
2633 provider_type: ProviderKind::Gonka,
2634 name: None,
2635 gonka_nodes: valid_gonka_nodes(),
2636 ..Default::default()
2637 };
2638 let err = entry.validate().unwrap_err();
2639 assert!(err.to_string().contains("gonka"), "{err}");
2640 }
2641
2642 #[test]
2643 fn gonka_toml_round_trip() {
2644 let toml = r#"
2645[llm]
2646
2647[[llm.providers]]
2648type = "gonka"
2649name = "my-gonka"
2650gonka_chain_prefix = "custom-chain"
2651
2652[[llm.providers.gonka_nodes]]
2653url = "https://node1.gonka.ai"
2654address = "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2655name = "node1"
2656
2657[[llm.providers.gonka_nodes]]
2658url = "https://node2.gonka.ai"
2659address = "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum"
2660name = "node2"
2661
2662[[llm.providers.gonka_nodes]]
2663url = "https://node3.gonka.ai"
2664address = "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg"
2665"#;
2666 let cfg = parse_llm(toml);
2667 assert_eq!(cfg.providers.len(), 1);
2668 let entry = &cfg.providers[0];
2669 assert_eq!(entry.provider_type, ProviderKind::Gonka);
2670 assert_eq!(entry.name.as_deref(), Some("my-gonka"));
2671 let nodes = &entry.gonka_nodes;
2672 assert_eq!(nodes.len(), 3);
2673 assert_eq!(nodes[0].url, "https://node1.gonka.ai");
2674 assert_eq!(
2675 nodes[0].address,
2676 "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2677 );
2678 assert_eq!(nodes[0].name.as_deref(), Some("node1"));
2679 assert_eq!(nodes[2].name, None);
2680 assert_eq!(entry.gonka_chain_prefix.as_deref(), Some("custom-chain"));
2681 }
2682
2683 #[test]
2684 fn gonka_default_chain_prefix() {
2685 let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2686 assert_eq!(entry.effective_gonka_chain_prefix(), "gonka");
2687 }
2688
2689 #[test]
2690 fn gonka_explicit_chain_prefix() {
2691 let entry = ProviderEntry {
2692 provider_type: ProviderKind::Gonka,
2693 name: Some("my-gonka".into()),
2694 gonka_nodes: valid_gonka_nodes(),
2695 gonka_chain_prefix: Some("my-chain".into()),
2696 ..Default::default()
2697 };
2698 assert_eq!(entry.effective_gonka_chain_prefix(), "my-chain");
2699 }
2700
2701 #[test]
2702 fn effective_model_gonka_is_empty() {
2703 let entry = ProviderEntry {
2704 provider_type: ProviderKind::Gonka,
2705 model: None,
2706 ..Default::default()
2707 };
2708 assert_eq!(entry.effective_model(), "");
2709 }
2710
2711 #[test]
2712 fn existing_configs_still_parse() {
2713 let toml = r#"
2714[llm]
2715
2716[[llm.providers]]
2717type = "ollama"
2718model = "qwen3:8b"
2719
2720[[llm.providers]]
2721type = "claude"
2722name = "claude"
2723model = "claude-sonnet-4-6"
2724"#;
2725 let cfg = parse_llm(toml);
2726 assert_eq!(cfg.providers.len(), 2);
2727 assert_eq!(cfg.providers[0].provider_type, ProviderKind::Ollama);
2728 assert_eq!(cfg.providers[1].provider_type, ProviderKind::Claude);
2729 }
2730
2731 fn cocoon_entry(url: Option<&str>, model: Option<&str>) -> ProviderEntry {
2734 ProviderEntry {
2735 provider_type: ProviderKind::Cocoon,
2736 name: Some("cocoon".into()),
2737 cocoon_client_url: url.map(str::to_owned),
2738 model: model.map(str::to_owned),
2739 ..Default::default()
2740 }
2741 }
2742
2743 #[test]
2744 fn test_cocoon_url_validation_accepts_http() {
2745 assert!(
2746 cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2747 .validate()
2748 .is_ok()
2749 );
2750 }
2751
2752 #[test]
2753 fn test_cocoon_url_validation_accepts_https_localhost() {
2754 assert!(
2755 cocoon_entry(Some("https://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2756 .validate()
2757 .is_ok()
2758 );
2759 }
2760
2761 #[test]
2762 fn test_cocoon_url_validation_rejects_non_localhost() {
2763 let err = cocoon_entry(Some("http://192.168.1.10:10000"), Some("Qwen/Qwen3-0.6B"))
2764 .validate()
2765 .unwrap_err();
2766 assert!(
2767 err.to_string().contains("localhost"),
2768 "error should mention localhost restriction: {err}"
2769 );
2770 }
2771
2772 #[test]
2773 fn test_cocoon_url_validation_rejects_non_http_scheme() {
2774 let err = cocoon_entry(Some("ftp://localhost"), Some("Qwen/Qwen3-0.6B"))
2775 .validate()
2776 .unwrap_err();
2777 assert!(
2778 err.to_string().contains("ftp"),
2779 "error should mention the bad scheme: {err}"
2780 );
2781 }
2782
2783 #[test]
2784 fn test_cocoon_url_validation_rejects_invalid_url() {
2785 let err = cocoon_entry(Some("not-a-url"), Some("Qwen/Qwen3-0.6B"))
2786 .validate()
2787 .unwrap_err();
2788 assert!(
2789 err.to_string().contains("not-a-url"),
2790 "error should mention the bad value: {err}"
2791 );
2792 }
2793
2794 #[test]
2795 fn test_cocoon_url_none_passes() {
2796 assert!(
2797 cocoon_entry(None, Some("Qwen/Qwen3-0.6B"))
2798 .validate()
2799 .is_ok()
2800 );
2801 }
2802
2803 #[test]
2804 fn test_cocoon_model_empty_rejected() {
2805 let err = cocoon_entry(Some("http://localhost:10000"), Some(""))
2806 .validate()
2807 .unwrap_err();
2808 assert!(
2809 err.to_string().contains("empty"),
2810 "error should mention 'empty': {err}"
2811 );
2812 }
2813
2814 #[test]
2815 fn test_cocoon_model_none_passes() {
2816 assert!(
2817 cocoon_entry(Some("http://localhost:10000"), None)
2818 .validate()
2819 .is_ok()
2820 );
2821 }
2822
2823 #[test]
2824 fn validate_cocoon_pricing_negative_prompt_errors() {
2825 let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2826 e.cocoon_pricing = Some(CocoonPricing {
2827 prompt_cents_per_1k: -1.0,
2828 completion_cents_per_1k: 0.03,
2829 });
2830 assert!(e.validate().is_err());
2831 }
2832
2833 #[test]
2834 fn validate_cocoon_pricing_negative_completion_errors() {
2835 let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2836 e.cocoon_pricing = Some(CocoonPricing {
2837 prompt_cents_per_1k: 0.01,
2838 completion_cents_per_1k: -0.5,
2839 });
2840 assert!(e.validate().is_err());
2841 }
2842
2843 #[test]
2844 fn validate_cocoon_pricing_valid_passes() {
2845 let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2846 e.cocoon_pricing = Some(CocoonPricing {
2847 prompt_cents_per_1k: 0.01,
2848 completion_cents_per_1k: 0.03,
2849 });
2850 assert!(e.validate().is_ok());
2851 }
2852}