1use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
13#[serde(tag = "mode", rename_all = "snake_case")]
14pub enum ThinkingConfig {
15 Extended {
17 budget_tokens: u32,
19 },
20 Adaptive {
22 #[serde(default, skip_serializing_if = "Option::is_none")]
24 effort: Option<ThinkingEffort>,
25 },
26}
27
28#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
30#[serde(rename_all = "lowercase")]
31#[non_exhaustive]
32pub enum ThinkingEffort {
33 Low,
35 #[default]
37 Medium,
38 High,
40}
41
42#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Default)]
48#[serde(rename_all = "snake_case")]
49pub enum CacheTtl {
50 #[default]
52 Ephemeral,
53 #[serde(rename = "1h")]
56 OneHour,
57}
58
59impl CacheTtl {
60 #[must_use]
63 pub fn requires_beta(self) -> bool {
64 match self {
65 Self::OneHour => true,
66 Self::Ephemeral => false,
67 }
68 }
69}
70
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
76#[serde(rename_all = "lowercase")]
77#[non_exhaustive]
78pub enum GeminiThinkingLevel {
79 Minimal,
81 Low,
83 Medium,
85 High,
87}
88
89pub use zeph_common::ProviderName;
90
91fn default_response_cache_ttl_secs() -> u64 {
92 3600
93}
94
95fn default_semantic_cache_threshold() -> f32 {
96 0.95
97}
98
99fn default_semantic_cache_max_candidates() -> u32 {
100 10
101}
102
103fn default_router_ema_alpha() -> f64 {
104 0.1
105}
106
107fn default_router_reorder_interval() -> u64 {
108 10
109}
110
111fn default_embedding_model() -> String {
112 "qwen3-embedding".into()
113}
114
115fn default_candle_source() -> String {
116 "huggingface".into()
117}
118
119fn default_chat_template() -> String {
120 "chatml".into()
121}
122
123fn default_candle_device() -> String {
124 "cpu".into()
125}
126
127fn default_temperature() -> f64 {
128 0.7
129}
130
131fn default_max_tokens() -> usize {
132 2048
133}
134
135fn default_seed() -> u64 {
136 42
137}
138
139fn default_repeat_penalty() -> f32 {
140 1.1
141}
142
143fn default_repeat_last_n() -> usize {
144 64
145}
146
147fn default_cascade_quality_threshold() -> f64 {
148 0.5
149}
150
151fn default_cascade_max_escalations() -> u8 {
152 2
153}
154
155fn default_cascade_window_size() -> usize {
156 50
157}
158
159fn default_cascade_judge_timeout_ms() -> u64 {
160 5_000
161}
162
163fn default_reputation_decay_factor() -> f64 {
164 0.95
165}
166
167fn default_reputation_weight() -> f64 {
168 0.3
169}
170
171fn default_reputation_min_observations() -> u64 {
172 5
173}
174
175#[must_use]
177pub fn default_stt_provider() -> String {
178 String::new()
179}
180
181#[must_use]
183pub fn default_stt_language() -> String {
184 "auto".into()
185}
186
187#[must_use]
189pub(crate) fn get_default_embedding_model() -> String {
190 default_embedding_model()
191}
192
193#[must_use]
195pub(crate) fn get_default_response_cache_ttl_secs() -> u64 {
196 default_response_cache_ttl_secs()
197}
198
199#[must_use]
201pub(crate) fn get_default_router_ema_alpha() -> f64 {
202 default_router_ema_alpha()
203}
204
205#[must_use]
207pub(crate) fn get_default_router_reorder_interval() -> u64 {
208 default_router_reorder_interval()
209}
210
211#[non_exhaustive]
224#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
225#[serde(rename_all = "lowercase")]
226pub enum ProviderKind {
227 Ollama,
229 Claude,
231 OpenAi,
233 Gemini,
235 Candle,
237 Compatible,
239 Gonka,
241 Cocoon,
243}
244
245impl ProviderKind {
246 #[must_use]
257 pub fn as_str(self) -> &'static str {
258 match self {
259 Self::Ollama => "ollama",
260 Self::Claude => "claude",
261 Self::OpenAi => "openai",
262 Self::Gemini => "gemini",
263 Self::Candle => "candle",
264 Self::Compatible => "compatible",
265 Self::Gonka => "gonka",
266 Self::Cocoon => "cocoon",
267 }
268 }
269}
270
271impl std::fmt::Display for ProviderKind {
272 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
273 f.write_str(self.as_str())
274 }
275}
276
277#[derive(Debug, Deserialize, Serialize)]
301pub struct LlmConfig {
302 #[serde(default, skip_serializing_if = "Vec::is_empty")]
304 pub providers: Vec<ProviderEntry>,
305
306 #[serde(default, skip_serializing_if = "is_routing_none")]
308 pub routing: LlmRoutingStrategy,
309
310 #[serde(default = "default_embedding_model_opt")]
311 pub embedding_model: String,
312 #[serde(default, skip_serializing_if = "Option::is_none")]
313 pub candle: Option<CandleConfig>,
314 #[serde(default)]
315 pub stt: Option<SttConfig>,
316 #[serde(default)]
317 pub response_cache_enabled: bool,
318 #[serde(default = "default_response_cache_ttl_secs")]
319 pub response_cache_ttl_secs: u64,
320 #[serde(default)]
322 pub semantic_cache_enabled: bool,
323 #[serde(default = "default_semantic_cache_threshold")]
329 pub semantic_cache_threshold: f32,
330 #[serde(default = "default_semantic_cache_max_candidates")]
343 pub semantic_cache_max_candidates: u32,
344 #[serde(default)]
345 pub router_ema_enabled: bool,
346 #[serde(default = "default_router_ema_alpha")]
347 pub router_ema_alpha: f64,
348 #[serde(default = "default_router_reorder_interval")]
349 pub router_reorder_interval: u64,
350 #[serde(default, skip_serializing_if = "Option::is_none")]
352 pub router: Option<RouterConfig>,
353 #[serde(default, skip_serializing_if = "Option::is_none")]
356 pub instruction_file: Option<std::path::PathBuf>,
357 #[serde(default, skip_serializing_if = "Option::is_none")]
361 pub summary_model: Option<String>,
362 #[serde(default, skip_serializing_if = "Option::is_none")]
364 pub summary_provider: Option<ProviderEntry>,
365
366 #[serde(default, skip_serializing_if = "Option::is_none")]
368 pub complexity_routing: Option<ComplexityRoutingConfig>,
369
370 #[serde(default, skip_serializing_if = "Option::is_none")]
372 pub coe: Option<CoeConfig>,
373}
374
375fn default_embedding_model_opt() -> String {
376 default_embedding_model()
377}
378
379#[allow(clippy::trivially_copy_pass_by_ref)]
380fn is_routing_none(s: &LlmRoutingStrategy) -> bool {
381 *s == LlmRoutingStrategy::None
382}
383
384impl LlmConfig {
385 #[must_use]
387 pub fn effective_provider(&self) -> ProviderKind {
388 self.providers
389 .first()
390 .map_or(ProviderKind::Ollama, |e| e.provider_type)
391 }
392
393 #[must_use]
395 pub fn effective_base_url(&self) -> &str {
396 self.providers
397 .first()
398 .and_then(|e| e.base_url.as_deref())
399 .unwrap_or("http://localhost:11434")
400 }
401
402 #[must_use]
408 pub fn effective_model(&self) -> &str {
409 self.providers
410 .iter()
411 .find(|e| !e.embed)
412 .and_then(|e| e.model.as_deref())
413 .unwrap_or("qwen3:8b")
414 }
415
416 #[must_use]
424 pub fn stt_provider_entry(&self) -> Option<&ProviderEntry> {
425 let name_hint = self.stt.as_ref().map_or("", |s| s.provider.as_str());
426 if name_hint.is_empty() {
427 self.providers.iter().find(|p| p.stt_model.is_some())
428 } else {
429 self.providers
430 .iter()
431 .find(|p| p.effective_name() == name_hint && p.stt_model.is_some())
432 }
433 }
434
435 pub fn check_legacy_format(&self) -> Result<(), crate::error::ConfigError> {
441 Ok(())
442 }
443
444 pub fn validate_stt(&self) -> Result<(), crate::error::ConfigError> {
450 use crate::error::ConfigError;
451
452 let Some(stt) = &self.stt else {
453 return Ok(());
454 };
455 if stt.provider.is_empty() {
456 return Ok(());
457 }
458 let found = self
459 .providers
460 .iter()
461 .find(|p| p.effective_name() == stt.provider);
462 match found {
463 None => {
464 return Err(ConfigError::Validation(format!(
465 "[llm.stt].provider = {:?} does not match any [[llm.providers]] entry",
466 stt.provider
467 )));
468 }
469 Some(entry) if entry.stt_model.is_none() => {
470 tracing::warn!(
471 provider = stt.provider,
472 "[[llm.providers]] entry exists but has no `stt_model` — STT will not be activated"
473 );
474 }
475 _ => {}
476 }
477 Ok(())
478 }
479
480 pub fn warn_non_fast_tier_provider(
504 &self,
505 provider_name: &ProviderName,
506 feature_label: &str,
507 extra_allowlist: &[String],
508 ) {
509 if provider_name.is_empty() {
510 return;
511 }
512 let name = provider_name.as_str();
513 let Some(entry) = self.providers.iter().find(|p| p.effective_name() == name) else {
514 tracing::warn!(
515 provider = name,
516 "{feature_label} provider '{name}' not found in [[llm.providers]]"
517 );
518 return;
519 };
520 let model = entry.model.as_deref().unwrap_or("");
521 if model.is_empty() {
522 return;
523 }
524 let lower = model.to_lowercase();
525 let in_hints = FAST_TIER_MODEL_HINTS.iter().any(|h| lower.contains(h));
526 let in_extra = extra_allowlist.iter().any(|h| lower.contains(h.as_str()));
527 if !in_hints && !in_extra {
528 tracing::warn!(
529 provider = name,
530 actual = model,
531 "{feature_label} provider '{name}' uses model '{model}' \
532 which may not be fast-tier; prefer a fast model to bound distillation cost"
533 );
534 }
535 }
536}
537
538pub const FAST_TIER_MODEL_HINTS: &[&str] = &[
543 "gpt-4o-mini",
544 "gpt-4.1-mini",
545 "gpt-5-mini",
546 "gpt-5-nano",
547 "claude-haiku",
548 "claude-3-haiku",
549 "claude-3-5-haiku",
550 "qwen3:8b",
551 "qwen2.5:7b",
552 "qwen2:7b",
553 "llama3.2:3b",
554 "llama3.1:8b",
555 "gemma3:4b",
556 "gemma3:8b",
557 "phi4:mini",
558 "mistral:7b",
559];
560
561#[derive(Debug, Clone, Deserialize, Serialize)]
574pub struct SttConfig {
575 #[serde(default = "default_stt_provider")]
578 pub provider: String,
579 #[serde(default = "default_stt_language")]
581 pub language: String,
582}
583
584#[non_exhaustive]
586#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
587#[serde(rename_all = "lowercase")]
588pub enum RouterStrategyConfig {
589 #[default]
591 Ema,
592 Thompson,
594 Cascade,
596 Bandit,
598}
599
600#[derive(Debug, Clone, Deserialize, Serialize)]
613pub struct AsiConfig {
614 #[serde(default)]
616 pub enabled: bool,
617
618 #[serde(default = "default_asi_window")]
620 pub window: usize,
621
622 #[serde(default = "default_asi_coherence_threshold")]
624 pub coherence_threshold: f32,
625
626 #[serde(default = "default_asi_penalty_weight")]
631 pub penalty_weight: f32,
632}
633
634fn default_asi_window() -> usize {
635 5
636}
637
638fn default_asi_coherence_threshold() -> f32 {
639 0.7
640}
641
642fn default_asi_penalty_weight() -> f32 {
643 0.3
644}
645
646impl Default for AsiConfig {
647 fn default() -> Self {
648 Self {
649 enabled: false,
650 window: default_asi_window(),
651 coherence_threshold: default_asi_coherence_threshold(),
652 penalty_weight: default_asi_penalty_weight(),
653 }
654 }
655}
656
657#[derive(Debug, Clone, Deserialize, Serialize)]
659pub struct RouterConfig {
660 #[serde(default)]
662 pub strategy: RouterStrategyConfig,
663 #[serde(default)]
671 pub thompson_state_path: Option<String>,
672 #[serde(default)]
674 pub cascade: Option<CascadeConfig>,
675 #[serde(default)]
677 pub reputation: Option<ReputationConfig>,
678 #[serde(default)]
680 pub bandit: Option<BanditConfig>,
681 #[serde(default)]
690 pub quality_gate: Option<f32>,
691 #[serde(default)]
693 pub asi: Option<AsiConfig>,
694 #[serde(default = "default_embed_concurrency")]
700 pub embed_concurrency: usize,
701}
702
703fn default_embed_concurrency() -> usize {
704 4
705}
706
707#[derive(Debug, Clone, Deserialize, Serialize)]
714pub struct ReputationConfig {
715 #[serde(default)]
717 pub enabled: bool,
718 #[serde(default = "default_reputation_decay_factor")]
721 pub decay_factor: f64,
722 #[serde(default = "default_reputation_weight")]
729 pub weight: f64,
730 #[serde(default = "default_reputation_min_observations")]
732 pub min_observations: u64,
733 #[serde(default)]
735 pub state_path: Option<String>,
736}
737
738#[derive(Debug, Clone, Deserialize, Serialize)]
749pub struct CascadeConfig {
750 #[serde(default = "default_cascade_quality_threshold")]
753 pub quality_threshold: f64,
754
755 #[serde(default = "default_cascade_max_escalations")]
759 pub max_escalations: u8,
760
761 #[serde(default)]
765 pub classifier_mode: CascadeClassifierMode,
766
767 #[serde(default = "default_cascade_window_size")]
769 pub window_size: usize,
770
771 #[serde(default)]
775 pub max_cascade_tokens: Option<u32>,
776
777 #[serde(default, skip_serializing_if = "Option::is_none")]
782 pub cost_tiers: Option<Vec<String>>,
783
784 #[serde(default = "default_cascade_judge_timeout_ms")]
788 pub judge_timeout_ms: u64,
789}
790
791impl Default for CascadeConfig {
792 fn default() -> Self {
793 Self {
794 quality_threshold: default_cascade_quality_threshold(),
795 max_escalations: default_cascade_max_escalations(),
796 classifier_mode: CascadeClassifierMode::default(),
797 window_size: default_cascade_window_size(),
798 max_cascade_tokens: None,
799 cost_tiers: None,
800 judge_timeout_ms: default_cascade_judge_timeout_ms(),
801 }
802 }
803}
804
805#[non_exhaustive]
807#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
808#[serde(rename_all = "lowercase")]
809pub enum CascadeClassifierMode {
810 #[default]
813 Heuristic,
814 Judge,
817}
818
819fn default_bandit_alpha() -> f32 {
820 1.0
821}
822
823fn default_bandit_dim() -> usize {
824 32
825}
826
827fn default_bandit_cost_weight() -> f32 {
828 0.1
829}
830
831fn default_bandit_decay_factor() -> f32 {
832 1.0
833}
834
835fn default_bandit_embedding_timeout_ms() -> u64 {
836 50
837}
838
839fn default_bandit_cache_size() -> usize {
840 512
841}
842
843#[derive(Debug, Clone, Deserialize, Serialize)]
856pub struct BanditConfig {
857 #[serde(default = "default_bandit_alpha")]
860 pub alpha: f32,
861
862 #[serde(default = "default_bandit_dim")]
869 pub dim: usize,
870
871 #[serde(default = "default_bandit_cost_weight")]
874 pub cost_weight: f32,
875
876 #[serde(default = "default_bandit_decay_factor")]
879 pub decay_factor: f32,
880
881 #[serde(default)]
887 pub embedding_provider: ProviderName,
888
889 #[serde(default = "default_bandit_embedding_timeout_ms")]
892 pub embedding_timeout_ms: u64,
893
894 #[serde(default = "default_bandit_cache_size")]
896 pub cache_size: usize,
897
898 #[serde(default)]
905 pub state_path: Option<String>,
906
907 #[serde(default = "default_bandit_memory_confidence_threshold")]
913 pub memory_confidence_threshold: f32,
914
915 #[serde(default)]
921 pub warmup_queries: Option<u64>,
922}
923
924fn default_bandit_memory_confidence_threshold() -> f32 {
925 0.9
926}
927
928impl Default for BanditConfig {
929 fn default() -> Self {
930 Self {
931 alpha: default_bandit_alpha(),
932 dim: default_bandit_dim(),
933 cost_weight: default_bandit_cost_weight(),
934 decay_factor: default_bandit_decay_factor(),
935 embedding_provider: ProviderName::default(),
936 embedding_timeout_ms: default_bandit_embedding_timeout_ms(),
937 cache_size: default_bandit_cache_size(),
938 state_path: None,
939 memory_confidence_threshold: default_bandit_memory_confidence_threshold(),
940 warmup_queries: None,
941 }
942 }
943}
944
945#[derive(Debug, Deserialize, Serialize)]
946pub struct CandleConfig {
947 #[serde(default = "default_candle_source")]
948 pub source: String,
949 #[serde(default)]
950 pub local_path: String,
951 #[serde(default)]
952 pub filename: Option<String>,
953 #[serde(default = "default_chat_template")]
954 pub chat_template: String,
955 #[serde(default = "default_candle_device")]
956 pub device: String,
957 #[serde(default)]
958 pub embedding_repo: Option<String>,
959 #[serde(default)]
963 pub hf_token: Option<String>,
964 #[serde(default)]
965 pub generation: GenerationParams,
966 #[serde(default = "default_inference_timeout_secs")]
975 pub inference_timeout_secs: u64,
976}
977
978fn default_inference_timeout_secs() -> u64 {
979 120
980}
981
982#[derive(Debug, Clone, Deserialize, Serialize)]
986pub struct GenerationParams {
987 #[serde(default = "default_temperature")]
989 pub temperature: f64,
990 #[serde(default)]
993 pub top_p: Option<f64>,
994 #[serde(default)]
997 pub top_k: Option<usize>,
998 #[serde(default = "default_max_tokens")]
1001 pub max_tokens: usize,
1002 #[serde(default = "default_seed")]
1004 pub seed: u64,
1005 #[serde(default = "default_repeat_penalty")]
1007 pub repeat_penalty: f32,
1008 #[serde(default = "default_repeat_last_n")]
1010 pub repeat_last_n: usize,
1011}
1012
1013pub const MAX_TOKENS_CAP: usize = 32768;
1015
1016impl GenerationParams {
1017 #[must_use]
1028 pub fn capped_max_tokens(&self) -> usize {
1029 self.max_tokens.min(MAX_TOKENS_CAP)
1030 }
1031}
1032
1033impl Default for GenerationParams {
1034 fn default() -> Self {
1035 Self {
1036 temperature: default_temperature(),
1037 top_p: None,
1038 top_k: None,
1039 max_tokens: default_max_tokens(),
1040 seed: default_seed(),
1041 repeat_penalty: default_repeat_penalty(),
1042 repeat_last_n: default_repeat_last_n(),
1043 }
1044 }
1045}
1046
1047#[non_exhaustive]
1051#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
1052#[serde(rename_all = "lowercase")]
1053pub enum LlmRoutingStrategy {
1054 #[default]
1056 None,
1057 Ema,
1059 Thompson,
1061 Cascade,
1063 Triage,
1065 Bandit,
1067}
1068
1069fn default_triage_timeout_secs() -> u64 {
1070 5
1071}
1072
1073fn default_max_triage_tokens() -> u32 {
1074 50
1075}
1076
1077fn default_true() -> bool {
1078 true
1079}
1080
1081#[allow(clippy::trivially_copy_pass_by_ref)]
1082fn is_true(v: &bool) -> bool {
1083 *v
1084}
1085
1086#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1088pub struct TierMapping {
1089 pub simple: Option<String>,
1090 pub medium: Option<String>,
1091 pub complex: Option<String>,
1092 pub expert: Option<String>,
1093}
1094
1095#[derive(Debug, Clone, Deserialize, Serialize)]
1116pub struct ComplexityRoutingConfig {
1117 #[serde(default)]
1119 pub triage_provider: Option<ProviderName>,
1120
1121 #[serde(default = "default_true")]
1123 pub bypass_single_provider: bool,
1124
1125 #[serde(default)]
1127 pub tiers: TierMapping,
1128
1129 #[serde(default = "default_max_triage_tokens")]
1131 pub max_triage_tokens: u32,
1132
1133 #[serde(default = "default_triage_timeout_secs")]
1136 pub triage_timeout_secs: u64,
1137
1138 #[serde(default)]
1141 pub fallback_strategy: Option<String>,
1142}
1143
1144impl Default for ComplexityRoutingConfig {
1145 fn default() -> Self {
1146 Self {
1147 triage_provider: None,
1148 bypass_single_provider: true,
1149 tiers: TierMapping::default(),
1150 max_triage_tokens: default_max_triage_tokens(),
1151 triage_timeout_secs: default_triage_timeout_secs(),
1152 fallback_strategy: None,
1153 }
1154 }
1155}
1156
1157#[derive(Debug, Clone, Deserialize, Serialize)]
1175#[serde(default)]
1176pub struct CoeConfig {
1177 pub enabled: bool,
1179 pub intra_threshold: f64,
1181 pub inter_threshold: f64,
1183 pub shadow_sample_rate: f64,
1185 pub secondary_provider: ProviderName,
1187 pub embedding_provider: ProviderName,
1189}
1190
1191impl Default for CoeConfig {
1192 fn default() -> Self {
1193 Self {
1194 enabled: false,
1195 intra_threshold: 0.8,
1196 inter_threshold: 0.20,
1197 shadow_sample_rate: 0.1,
1198 secondary_provider: ProviderName::default(),
1199 embedding_provider: ProviderName::default(),
1200 }
1201 }
1202}
1203
1204#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
1209pub struct GonkaNode {
1210 pub url: String,
1212 pub address: String,
1217 #[serde(default, skip_serializing_if = "Option::is_none")]
1219 pub name: Option<String>,
1220}
1221
1222#[derive(Debug, Clone, Deserialize, Serialize)]
1225pub struct CandleInlineConfig {
1226 #[serde(default = "default_candle_source")]
1227 pub source: String,
1228 #[serde(default)]
1229 pub local_path: String,
1230 #[serde(default)]
1231 pub filename: Option<String>,
1232 #[serde(default = "default_chat_template")]
1233 pub chat_template: String,
1234 #[serde(default = "default_candle_device")]
1235 pub device: String,
1236 #[serde(default)]
1237 pub embedding_repo: Option<String>,
1238 #[serde(default)]
1240 pub hf_token: Option<String>,
1241 #[serde(default)]
1242 pub generation: GenerationParams,
1243 #[serde(default = "default_inference_timeout_secs")]
1248 pub inference_timeout_secs: u64,
1249}
1250
1251impl Default for CandleInlineConfig {
1252 fn default() -> Self {
1253 Self {
1254 source: default_candle_source(),
1255 local_path: String::new(),
1256 filename: None,
1257 chat_template: default_chat_template(),
1258 device: default_candle_device(),
1259 embedding_repo: None,
1260 hf_token: None,
1261 generation: GenerationParams::default(),
1262 inference_timeout_secs: default_inference_timeout_secs(),
1263 }
1264 }
1265}
1266
1267#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
1276pub struct CocoonPricing {
1277 #[serde(default)]
1279 pub prompt_cents_per_1k: f64,
1280 #[serde(default)]
1283 pub completion_cents_per_1k: f64,
1284}
1285
1286#[derive(Debug, Clone, Deserialize, Serialize)]
1292#[allow(clippy::struct_excessive_bools)] pub struct ProviderEntry {
1294 #[serde(rename = "type")]
1296 pub provider_type: ProviderKind,
1297
1298 #[serde(default)]
1300 pub name: Option<String>,
1301
1302 #[serde(default)]
1304 pub model: Option<String>,
1305
1306 #[serde(default)]
1308 pub base_url: Option<String>,
1309
1310 #[serde(default)]
1312 pub max_tokens: Option<u32>,
1313
1314 #[serde(default)]
1316 pub embedding_model: Option<String>,
1317
1318 #[serde(default)]
1321 pub stt_model: Option<String>,
1322
1323 #[serde(default)]
1325 pub embed: bool,
1326
1327 #[serde(default)]
1329 pub default: bool,
1330
1331 #[serde(default)]
1333 pub thinking: Option<ThinkingConfig>,
1334 #[serde(default)]
1335 pub server_compaction: bool,
1336 #[serde(default)]
1337 pub enable_extended_context: bool,
1338 #[serde(default)]
1341 pub prompt_cache_ttl: Option<CacheTtl>,
1342
1343 #[serde(default)]
1345 pub reasoning_effort: Option<String>,
1346
1347 #[serde(default)]
1349 pub thinking_level: Option<GeminiThinkingLevel>,
1350 #[serde(default)]
1351 pub thinking_budget: Option<i32>,
1352 #[serde(default)]
1353 pub include_thoughts: Option<bool>,
1354
1355 #[serde(default)]
1357 pub api_key: Option<String>,
1358
1359 #[serde(default)]
1361 pub candle: Option<CandleInlineConfig>,
1362
1363 #[serde(default)]
1365 pub vision_model: Option<String>,
1366
1367 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1370 pub gonka_nodes: Vec<GonkaNode>,
1371 #[serde(default, skip_serializing_if = "Option::is_none")]
1373 pub gonka_chain_prefix: Option<String>,
1374
1375 #[serde(default, skip_serializing_if = "Option::is_none")]
1378 pub cocoon_client_url: Option<String>,
1379 #[serde(default, skip_serializing_if = "Option::is_none")]
1382 pub cocoon_access_hash: Option<String>,
1383 #[serde(default = "default_true", skip_serializing_if = "is_true")]
1385 pub cocoon_health_check: bool,
1386 #[serde(default, skip_serializing_if = "Option::is_none")]
1399 pub cocoon_pricing: Option<CocoonPricing>,
1400
1401 #[serde(default)]
1403 pub instruction_file: Option<std::path::PathBuf>,
1404
1405 #[serde(default, skip_serializing_if = "Option::is_none")]
1423 pub max_concurrent: Option<u32>,
1424}
1425
1426impl Default for ProviderEntry {
1427 fn default() -> Self {
1428 Self {
1429 provider_type: ProviderKind::Ollama,
1430 name: None,
1431 model: None,
1432 base_url: None,
1433 max_tokens: None,
1434 embedding_model: None,
1435 stt_model: None,
1436 embed: false,
1437 default: false,
1438 thinking: None,
1439 server_compaction: false,
1440 enable_extended_context: false,
1441 prompt_cache_ttl: None,
1442 reasoning_effort: None,
1443 thinking_level: None,
1444 thinking_budget: None,
1445 include_thoughts: None,
1446 api_key: None,
1447 candle: None,
1448 vision_model: None,
1449 gonka_nodes: Vec::new(),
1450 gonka_chain_prefix: None,
1451 cocoon_client_url: None,
1452 cocoon_access_hash: None,
1453 cocoon_health_check: true,
1454 cocoon_pricing: None,
1455 instruction_file: None,
1456 max_concurrent: None,
1457 }
1458 }
1459}
1460
1461impl ProviderEntry {
1462 #[must_use]
1464 pub fn effective_name(&self) -> String {
1465 self.name
1466 .clone()
1467 .unwrap_or_else(|| self.provider_type.as_str().to_owned())
1468 }
1469
1470 #[must_use]
1475 pub fn effective_model(&self) -> String {
1476 if let Some(ref m) = self.model {
1477 return m.clone();
1478 }
1479 match self.provider_type {
1480 ProviderKind::Ollama => "qwen3:8b".to_owned(),
1481 ProviderKind::Claude => "claude-haiku-4-5-20251001".to_owned(),
1482 ProviderKind::OpenAi => "gpt-4o-mini".to_owned(),
1483 ProviderKind::Gemini => "gemini-2.0-flash".to_owned(),
1484 ProviderKind::Compatible | ProviderKind::Candle | ProviderKind::Gonka => String::new(),
1487 ProviderKind::Cocoon => "Qwen/Qwen3-0.6B".to_owned(),
1488 }
1489 }
1490
1491 pub fn validate(&self) -> Result<(), crate::error::ConfigError> {
1498 use crate::error::ConfigError;
1499
1500 if self.provider_type == ProviderKind::Compatible && self.name.is_none() {
1502 return Err(ConfigError::Validation(
1503 "[[llm.providers]] entry with type=\"compatible\" must set `name`".into(),
1504 ));
1505 }
1506
1507 if self.provider_type == ProviderKind::Gonka {
1509 if self.name.is_none() {
1510 return Err(ConfigError::Validation(
1511 "[[llm.providers]] entry with type=\"gonka\" must set `name`".into(),
1512 ));
1513 }
1514 self.validate_gonka_nodes()?;
1515 }
1516
1517 if self.provider_type == ProviderKind::Cocoon
1519 && self.name.as_ref().is_none_or(String::is_empty)
1520 {
1521 return Err(ConfigError::Validation(
1522 "[[llm.providers]] entry with type=\"cocoon\" must set `name`".into(),
1523 ));
1524 }
1525
1526 if self.provider_type == ProviderKind::Cocoon {
1528 let name = self.effective_name();
1529 if let Some(ref url_str) = self.cocoon_client_url {
1530 match url::Url::parse(url_str) {
1531 Err(_) => {
1532 return Err(ConfigError::Validation(format!(
1533 "[[llm.providers]] entry '{name}': cocoon_client_url \
1534 '{url_str}' is not a valid URL; expected format: \
1535 http://localhost:10000"
1536 )));
1537 }
1538 Ok(u) if !matches!(u.host_str(), Some("localhost" | "127.0.0.1" | "::1")) => {
1539 return Err(ConfigError::Validation(format!(
1540 "[[llm.providers]] entry '{name}': cocoon_client_url host must be \
1541 localhost or 127.0.0.1, got '{}'",
1542 u.host_str().unwrap_or("<none>")
1543 )));
1544 }
1545 Ok(u) if u.scheme() != "http" && u.scheme() != "https" => {
1546 return Err(ConfigError::Validation(format!(
1547 "[[llm.providers]] entry '{name}': cocoon_client_url \
1548 scheme must be http or https, got '{}'",
1549 u.scheme()
1550 )));
1551 }
1552 _ => {}
1553 }
1554 }
1555 if self.model.as_deref().is_some_and(|m| m.trim().is_empty()) {
1556 return Err(ConfigError::Validation(format!(
1557 "[[llm.providers]] entry '{name}': model must not be empty \
1558 for cocoon provider"
1559 )));
1560 }
1561 if let Some(ref p) = self.cocoon_pricing {
1562 if !p.prompt_cents_per_1k.is_finite() || p.prompt_cents_per_1k < 0.0 {
1563 return Err(ConfigError::Validation(format!(
1564 "[[llm.providers]] entry '{name}': cocoon_pricing.prompt_cents_per_1k \
1565 must be a finite non-negative number"
1566 )));
1567 }
1568 if !p.completion_cents_per_1k.is_finite() || p.completion_cents_per_1k < 0.0 {
1569 return Err(ConfigError::Validation(format!(
1570 "[[llm.providers]] entry '{name}': \
1571 cocoon_pricing.completion_cents_per_1k \
1572 must be a finite non-negative number"
1573 )));
1574 }
1575 }
1576 }
1577
1578 self.warn_irrelevant_fields();
1580
1581 if self.stt_model.is_some() && self.provider_type == ProviderKind::Ollama {
1584 tracing::warn!(
1585 provider = self.effective_name(),
1586 "field `stt_model` is set on an Ollama provider; Ollama does not support the \
1587 Whisper STT API — use OpenAI, compatible, or candle instead"
1588 );
1589 }
1590
1591 Ok(())
1592 }
1593
1594 #[must_use]
1596 pub fn effective_gonka_chain_prefix(&self) -> &str {
1597 self.gonka_chain_prefix.as_deref().unwrap_or("gonka")
1598 }
1599
1600 fn warn_irrelevant_fields(&self) {
1601 let name = self.effective_name();
1602 match self.provider_type {
1603 ProviderKind::Ollama => {
1604 if self.thinking.is_some() {
1605 tracing::warn!(
1606 provider = name,
1607 "field `thinking` is only used by Claude providers"
1608 );
1609 }
1610 if self.reasoning_effort.is_some() {
1611 tracing::warn!(
1612 provider = name,
1613 "field `reasoning_effort` is only used by OpenAI providers"
1614 );
1615 }
1616 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1617 tracing::warn!(
1618 provider = name,
1619 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1620 );
1621 }
1622 }
1623 ProviderKind::Claude => {
1624 if self.reasoning_effort.is_some() {
1625 tracing::warn!(
1626 provider = name,
1627 "field `reasoning_effort` is only used by OpenAI providers"
1628 );
1629 }
1630 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1631 tracing::warn!(
1632 provider = name,
1633 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1634 );
1635 }
1636 }
1637 ProviderKind::OpenAi => {
1638 if self.thinking.is_some() {
1639 tracing::warn!(
1640 provider = name,
1641 "field `thinking` is only used by Claude providers"
1642 );
1643 }
1644 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1645 tracing::warn!(
1646 provider = name,
1647 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1648 );
1649 }
1650 }
1651 ProviderKind::Gemini => {
1652 if self.thinking.is_some() {
1653 tracing::warn!(
1654 provider = name,
1655 "field `thinking` is only used by Claude providers"
1656 );
1657 }
1658 if self.reasoning_effort.is_some() {
1659 tracing::warn!(
1660 provider = name,
1661 "field `reasoning_effort` is only used by OpenAI providers"
1662 );
1663 }
1664 }
1665 ProviderKind::Gonka => {
1666 if self.thinking.is_some() {
1667 tracing::warn!(
1668 provider = name,
1669 "field `thinking` is only used by Claude providers"
1670 );
1671 }
1672 if self.reasoning_effort.is_some() {
1673 tracing::warn!(
1674 provider = name,
1675 "field `reasoning_effort` is only used by OpenAI providers"
1676 );
1677 }
1678 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1679 tracing::warn!(
1680 provider = name,
1681 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1682 );
1683 }
1684 }
1685 ProviderKind::Compatible | ProviderKind::Candle => {}
1686 ProviderKind::Cocoon => {
1687 if self.base_url.is_some() {
1688 tracing::warn!(
1689 provider = name,
1690 "field `base_url` is ignored for cocoon providers; use `cocoon_client_url` instead"
1691 );
1692 }
1693 }
1694 }
1695 }
1696
1697 fn validate_gonka_nodes(&self) -> Result<(), crate::error::ConfigError> {
1698 use crate::error::ConfigError;
1699 if self.gonka_nodes.is_empty() {
1700 return Err(ConfigError::Validation(format!(
1701 "[[llm.providers]] entry '{}' with type=\"gonka\" must set non-empty `gonka_nodes`",
1702 self.effective_name()
1703 )));
1704 }
1705 for (i, node) in self.gonka_nodes.iter().enumerate() {
1706 if node.url.is_empty() {
1707 return Err(ConfigError::Validation(format!(
1708 "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must not be empty",
1709 self.effective_name()
1710 )));
1711 }
1712 if !node.url.starts_with("http://") && !node.url.starts_with("https://") {
1713 return Err(ConfigError::Validation(format!(
1714 "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must start with http:// or https://",
1715 self.effective_name()
1716 )));
1717 }
1718 }
1719 Ok(())
1720 }
1721}
1722
1723pub fn validate_pool(entries: &[ProviderEntry]) -> Result<(), crate::error::ConfigError> {
1733 use crate::error::ConfigError;
1734 use std::collections::HashSet;
1735
1736 if entries.is_empty() {
1737 return Err(ConfigError::Validation(
1738 "at least one LLM provider must be configured in [[llm.providers]]".into(),
1739 ));
1740 }
1741
1742 let default_count = entries.iter().filter(|e| e.default).count();
1743 if default_count > 1 {
1744 return Err(ConfigError::Validation(
1745 "only one [[llm.providers]] entry can be marked `default = true`".into(),
1746 ));
1747 }
1748
1749 let mut seen_names: HashSet<String> = HashSet::new();
1750 for entry in entries {
1751 let name = entry.effective_name();
1752 if !seen_names.insert(name.clone()) {
1753 return Err(ConfigError::Validation(format!(
1754 "duplicate provider name \"{name}\" in [[llm.providers]]"
1755 )));
1756 }
1757 entry.validate()?;
1758 }
1759
1760 Ok(())
1761}
1762
1763#[cfg(test)]
1764mod tests {
1765 use super::*;
1766
1767 fn ollama_entry() -> ProviderEntry {
1768 ProviderEntry {
1769 provider_type: ProviderKind::Ollama,
1770 name: Some("ollama".into()),
1771 model: Some("qwen3:8b".into()),
1772 ..Default::default()
1773 }
1774 }
1775
1776 fn claude_entry() -> ProviderEntry {
1777 ProviderEntry {
1778 provider_type: ProviderKind::Claude,
1779 name: Some("claude".into()),
1780 model: Some("claude-sonnet-4-6".into()),
1781 max_tokens: Some(8192),
1782 ..Default::default()
1783 }
1784 }
1785
1786 #[test]
1789 fn validate_ollama_valid() {
1790 assert!(ollama_entry().validate().is_ok());
1791 }
1792
1793 #[test]
1794 fn validate_claude_valid() {
1795 assert!(claude_entry().validate().is_ok());
1796 }
1797
1798 #[test]
1799 fn validate_compatible_without_name_errors() {
1800 let entry = ProviderEntry {
1801 provider_type: ProviderKind::Compatible,
1802 name: None,
1803 ..Default::default()
1804 };
1805 let err = entry.validate().unwrap_err();
1806 assert!(
1807 err.to_string().contains("compatible"),
1808 "error should mention compatible: {err}"
1809 );
1810 }
1811
1812 #[test]
1813 fn validate_compatible_with_name_ok() {
1814 let entry = ProviderEntry {
1815 provider_type: ProviderKind::Compatible,
1816 name: Some("my-proxy".into()),
1817 base_url: Some("http://localhost:8080".into()),
1818 model: Some("gpt-4o".into()),
1819 max_tokens: Some(4096),
1820 ..Default::default()
1821 };
1822 assert!(entry.validate().is_ok());
1823 }
1824
1825 #[test]
1826 fn validate_openai_valid() {
1827 let entry = ProviderEntry {
1828 provider_type: ProviderKind::OpenAi,
1829 name: Some("openai".into()),
1830 model: Some("gpt-4o".into()),
1831 max_tokens: Some(4096),
1832 ..Default::default()
1833 };
1834 assert!(entry.validate().is_ok());
1835 }
1836
1837 #[test]
1838 fn validate_gemini_valid() {
1839 let entry = ProviderEntry {
1840 provider_type: ProviderKind::Gemini,
1841 name: Some("gemini".into()),
1842 model: Some("gemini-2.0-flash".into()),
1843 ..Default::default()
1844 };
1845 assert!(entry.validate().is_ok());
1846 }
1847
1848 #[test]
1851 fn validate_pool_empty_errors() {
1852 let err = validate_pool(&[]).unwrap_err();
1853 assert!(err.to_string().contains("at least one"), "{err}");
1854 }
1855
1856 #[test]
1857 fn validate_pool_single_entry_ok() {
1858 assert!(validate_pool(&[ollama_entry()]).is_ok());
1859 }
1860
1861 #[test]
1862 fn validate_pool_duplicate_names_errors() {
1863 let a = ollama_entry();
1864 let b = ollama_entry(); let err = validate_pool(&[a, b]).unwrap_err();
1866 assert!(err.to_string().contains("duplicate"), "{err}");
1867 }
1868
1869 #[test]
1870 fn validate_pool_multiple_defaults_errors() {
1871 let mut a = ollama_entry();
1872 let mut b = claude_entry();
1873 a.default = true;
1874 b.default = true;
1875 let err = validate_pool(&[a, b]).unwrap_err();
1876 assert!(err.to_string().contains("default"), "{err}");
1877 }
1878
1879 #[test]
1880 fn validate_pool_two_different_providers_ok() {
1881 assert!(validate_pool(&[ollama_entry(), claude_entry()]).is_ok());
1882 }
1883
1884 #[test]
1885 fn validate_pool_propagates_entry_error() {
1886 let bad = ProviderEntry {
1887 provider_type: ProviderKind::Compatible,
1888 name: None, ..Default::default()
1890 };
1891 assert!(validate_pool(&[bad]).is_err());
1892 }
1893
1894 #[test]
1897 fn effective_model_returns_explicit_when_set() {
1898 let entry = ProviderEntry {
1899 provider_type: ProviderKind::Claude,
1900 model: Some("claude-sonnet-4-6".into()),
1901 ..Default::default()
1902 };
1903 assert_eq!(entry.effective_model(), "claude-sonnet-4-6");
1904 }
1905
1906 #[test]
1907 fn effective_model_ollama_default_when_none() {
1908 let entry = ProviderEntry {
1909 provider_type: ProviderKind::Ollama,
1910 model: None,
1911 ..Default::default()
1912 };
1913 assert_eq!(entry.effective_model(), "qwen3:8b");
1914 }
1915
1916 #[test]
1917 fn effective_model_claude_default_when_none() {
1918 let entry = ProviderEntry {
1919 provider_type: ProviderKind::Claude,
1920 model: None,
1921 ..Default::default()
1922 };
1923 assert_eq!(entry.effective_model(), "claude-haiku-4-5-20251001");
1924 }
1925
1926 #[test]
1927 fn effective_model_openai_default_when_none() {
1928 let entry = ProviderEntry {
1929 provider_type: ProviderKind::OpenAi,
1930 model: None,
1931 ..Default::default()
1932 };
1933 assert_eq!(entry.effective_model(), "gpt-4o-mini");
1934 }
1935
1936 #[test]
1937 fn effective_model_gemini_default_when_none() {
1938 let entry = ProviderEntry {
1939 provider_type: ProviderKind::Gemini,
1940 model: None,
1941 ..Default::default()
1942 };
1943 assert_eq!(entry.effective_model(), "gemini-2.0-flash");
1944 }
1945
1946 fn parse_llm(toml: &str) -> LlmConfig {
1950 #[derive(serde::Deserialize)]
1951 struct Wrapper {
1952 llm: LlmConfig,
1953 }
1954 toml::from_str::<Wrapper>(toml).unwrap().llm
1955 }
1956
1957 #[test]
1958 fn check_legacy_format_new_format_ok() {
1959 let cfg = parse_llm(
1960 r#"
1961[llm]
1962
1963[[llm.providers]]
1964type = "ollama"
1965model = "qwen3:8b"
1966"#,
1967 );
1968 assert!(cfg.check_legacy_format().is_ok());
1969 }
1970
1971 #[test]
1972 fn check_legacy_format_empty_providers_no_legacy_ok() {
1973 let cfg = parse_llm("[llm]\n");
1975 assert!(cfg.check_legacy_format().is_ok());
1976 }
1977
1978 #[test]
1981 fn effective_provider_falls_back_to_ollama_when_no_providers() {
1982 let cfg = parse_llm("[llm]\n");
1983 assert_eq!(cfg.effective_provider(), ProviderKind::Ollama);
1984 }
1985
1986 #[test]
1987 fn effective_provider_reads_from_providers_first() {
1988 let cfg = parse_llm(
1989 r#"
1990[llm]
1991
1992[[llm.providers]]
1993type = "claude"
1994model = "claude-sonnet-4-6"
1995"#,
1996 );
1997 assert_eq!(cfg.effective_provider(), ProviderKind::Claude);
1998 }
1999
2000 #[test]
2001 fn effective_model_reads_from_providers_first() {
2002 let cfg = parse_llm(
2003 r#"
2004[llm]
2005
2006[[llm.providers]]
2007type = "ollama"
2008model = "qwen3:8b"
2009"#,
2010 );
2011 assert_eq!(cfg.effective_model(), "qwen3:8b");
2012 }
2013
2014 #[test]
2015 fn effective_model_skips_embed_only_provider() {
2016 let cfg = parse_llm(
2017 r#"
2018[llm]
2019
2020[[llm.providers]]
2021type = "ollama"
2022model = "gemma4:26b"
2023embed = true
2024
2025[[llm.providers]]
2026type = "openai"
2027model = "gpt-4o-mini"
2028"#,
2029 );
2030 assert_eq!(cfg.effective_model(), "gpt-4o-mini");
2031 }
2032
2033 #[test]
2034 fn effective_base_url_default_when_absent() {
2035 let cfg = parse_llm("[llm]\n");
2036 assert_eq!(cfg.effective_base_url(), "http://localhost:11434");
2037 }
2038
2039 #[test]
2040 fn effective_base_url_from_providers_entry() {
2041 let cfg = parse_llm(
2042 r#"
2043[llm]
2044
2045[[llm.providers]]
2046type = "ollama"
2047base_url = "http://myhost:11434"
2048"#,
2049 );
2050 assert_eq!(cfg.effective_base_url(), "http://myhost:11434");
2051 }
2052
2053 #[test]
2056 fn complexity_routing_defaults() {
2057 let cr = ComplexityRoutingConfig::default();
2058 assert!(
2059 cr.bypass_single_provider,
2060 "bypass_single_provider must default to true"
2061 );
2062 assert_eq!(cr.triage_timeout_secs, 5);
2063 assert_eq!(cr.max_triage_tokens, 50);
2064 assert!(cr.triage_provider.is_none());
2065 assert!(cr.tiers.simple.is_none());
2066 }
2067
2068 #[test]
2069 fn complexity_routing_toml_round_trip() {
2070 let cfg = parse_llm(
2071 r#"
2072[llm]
2073routing = "triage"
2074
2075[llm.complexity_routing]
2076triage_provider = "fast"
2077bypass_single_provider = false
2078triage_timeout_secs = 10
2079max_triage_tokens = 100
2080
2081[llm.complexity_routing.tiers]
2082simple = "fast"
2083medium = "medium"
2084complex = "large"
2085expert = "opus"
2086"#,
2087 );
2088 assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2089 let cr = cfg
2090 .complexity_routing
2091 .expect("complexity_routing must be present");
2092 assert_eq!(
2093 cr.triage_provider.as_ref().map(ProviderName::as_str),
2094 Some("fast")
2095 );
2096 assert!(!cr.bypass_single_provider);
2097 assert_eq!(cr.triage_timeout_secs, 10);
2098 assert_eq!(cr.max_triage_tokens, 100);
2099 assert_eq!(cr.tiers.simple.as_deref(), Some("fast"));
2100 assert_eq!(cr.tiers.medium.as_deref(), Some("medium"));
2101 assert_eq!(cr.tiers.complex.as_deref(), Some("large"));
2102 assert_eq!(cr.tiers.expert.as_deref(), Some("opus"));
2103 }
2104
2105 #[test]
2106 fn complexity_routing_partial_tiers_toml() {
2107 let cfg = parse_llm(
2109 r#"
2110[llm]
2111routing = "triage"
2112
2113[llm.complexity_routing.tiers]
2114simple = "haiku"
2115complex = "sonnet"
2116"#,
2117 );
2118 let cr = cfg
2119 .complexity_routing
2120 .expect("complexity_routing must be present");
2121 assert_eq!(cr.tiers.simple.as_deref(), Some("haiku"));
2122 assert!(cr.tiers.medium.is_none());
2123 assert_eq!(cr.tiers.complex.as_deref(), Some("sonnet"));
2124 assert!(cr.tiers.expert.is_none());
2125 assert!(cr.bypass_single_provider);
2127 assert_eq!(cr.triage_timeout_secs, 5);
2128 }
2129
2130 #[test]
2131 fn routing_strategy_triage_deserialized() {
2132 let cfg = parse_llm(
2133 r#"
2134[llm]
2135routing = "triage"
2136"#,
2137 );
2138 assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2139 }
2140
2141 #[test]
2144 fn stt_provider_entry_by_name_match() {
2145 let cfg = parse_llm(
2146 r#"
2147[llm]
2148
2149[[llm.providers]]
2150type = "openai"
2151name = "quality"
2152model = "gpt-5.4"
2153stt_model = "gpt-4o-mini-transcribe"
2154
2155[llm.stt]
2156provider = "quality"
2157"#,
2158 );
2159 let entry = cfg.stt_provider_entry().expect("should find stt provider");
2160 assert_eq!(entry.effective_name(), "quality");
2161 assert_eq!(entry.stt_model.as_deref(), Some("gpt-4o-mini-transcribe"));
2162 }
2163
2164 #[test]
2165 fn stt_provider_entry_auto_detect_when_provider_empty() {
2166 let cfg = parse_llm(
2167 r#"
2168[llm]
2169
2170[[llm.providers]]
2171type = "openai"
2172name = "openai-stt"
2173stt_model = "whisper-1"
2174
2175[llm.stt]
2176provider = ""
2177"#,
2178 );
2179 let entry = cfg.stt_provider_entry().expect("should auto-detect");
2180 assert_eq!(entry.effective_name(), "openai-stt");
2181 }
2182
2183 #[test]
2184 fn stt_provider_entry_auto_detect_no_stt_section() {
2185 let cfg = parse_llm(
2186 r#"
2187[llm]
2188
2189[[llm.providers]]
2190type = "openai"
2191name = "openai-stt"
2192stt_model = "whisper-1"
2193"#,
2194 );
2195 let entry = cfg.stt_provider_entry().expect("should auto-detect");
2197 assert_eq!(entry.effective_name(), "openai-stt");
2198 }
2199
2200 #[test]
2201 fn stt_provider_entry_none_when_no_stt_model() {
2202 let cfg = parse_llm(
2203 r#"
2204[llm]
2205
2206[[llm.providers]]
2207type = "openai"
2208name = "quality"
2209model = "gpt-5.4"
2210"#,
2211 );
2212 assert!(cfg.stt_provider_entry().is_none());
2213 }
2214
2215 #[test]
2216 fn stt_provider_entry_name_mismatch_falls_back_to_none() {
2217 let cfg = parse_llm(
2219 r#"
2220[llm]
2221
2222[[llm.providers]]
2223type = "openai"
2224name = "quality"
2225model = "gpt-5.4"
2226
2227[[llm.providers]]
2228type = "openai"
2229name = "openai-stt"
2230stt_model = "whisper-1"
2231
2232[llm.stt]
2233provider = "quality"
2234"#,
2235 );
2236 assert!(cfg.stt_provider_entry().is_none());
2238 }
2239
2240 #[test]
2241 fn stt_config_deserializes_new_slim_format() {
2242 let cfg = parse_llm(
2243 r#"
2244[llm]
2245
2246[[llm.providers]]
2247type = "openai"
2248name = "quality"
2249stt_model = "whisper-1"
2250
2251[llm.stt]
2252provider = "quality"
2253language = "en"
2254"#,
2255 );
2256 let stt = cfg.stt.as_ref().expect("stt section present");
2257 assert_eq!(stt.provider, "quality");
2258 assert_eq!(stt.language, "en");
2259 }
2260
2261 #[test]
2262 fn stt_config_default_provider_is_empty() {
2263 assert_eq!(default_stt_provider(), "");
2265 }
2266
2267 #[test]
2268 fn validate_stt_missing_provider_ok() {
2269 let cfg = parse_llm("[llm]\n");
2270 assert!(cfg.validate_stt().is_ok());
2271 }
2272
2273 #[test]
2274 fn validate_stt_valid_reference() {
2275 let cfg = parse_llm(
2276 r#"
2277[llm]
2278
2279[[llm.providers]]
2280type = "openai"
2281name = "quality"
2282stt_model = "whisper-1"
2283
2284[llm.stt]
2285provider = "quality"
2286"#,
2287 );
2288 assert!(cfg.validate_stt().is_ok());
2289 }
2290
2291 #[test]
2292 fn validate_stt_nonexistent_provider_errors() {
2293 let cfg = parse_llm(
2294 r#"
2295[llm]
2296
2297[[llm.providers]]
2298type = "openai"
2299name = "quality"
2300model = "gpt-5.4"
2301
2302[llm.stt]
2303provider = "nonexistent"
2304"#,
2305 );
2306 assert!(cfg.validate_stt().is_err());
2307 }
2308
2309 #[test]
2310 fn validate_stt_provider_exists_but_no_stt_model_returns_ok_with_warn() {
2311 let cfg = parse_llm(
2313 r#"
2314[llm]
2315
2316[[llm.providers]]
2317type = "openai"
2318name = "quality"
2319model = "gpt-5.4"
2320
2321[llm.stt]
2322provider = "quality"
2323"#,
2324 );
2325 assert!(cfg.validate_stt().is_ok());
2327 assert!(
2329 cfg.stt_provider_entry().is_none(),
2330 "stt_provider_entry must be None when provider has no stt_model"
2331 );
2332 }
2333
2334 #[test]
2337 fn bandit_warmup_queries_explicit_value_is_deserialized() {
2338 let cfg = parse_llm(
2339 r#"
2340[llm]
2341
2342[llm.router]
2343strategy = "bandit"
2344
2345[llm.router.bandit]
2346warmup_queries = 50
2347"#,
2348 );
2349 let bandit = cfg
2350 .router
2351 .expect("router section must be present")
2352 .bandit
2353 .expect("bandit section must be present");
2354 assert_eq!(
2355 bandit.warmup_queries,
2356 Some(50),
2357 "warmup_queries = 50 must deserialize to Some(50)"
2358 );
2359 }
2360
2361 #[test]
2362 fn bandit_warmup_queries_explicit_null_is_none() {
2363 let cfg = parse_llm(
2366 r#"
2367[llm]
2368
2369[llm.router]
2370strategy = "bandit"
2371
2372[llm.router.bandit]
2373warmup_queries = 0
2374"#,
2375 );
2376 let bandit = cfg
2377 .router
2378 .expect("router section must be present")
2379 .bandit
2380 .expect("bandit section must be present");
2381 assert_eq!(
2383 bandit.warmup_queries,
2384 Some(0),
2385 "warmup_queries = 0 must deserialize to Some(0)"
2386 );
2387 }
2388
2389 #[test]
2390 fn bandit_warmup_queries_missing_field_defaults_to_none() {
2391 let cfg = parse_llm(
2393 r#"
2394[llm]
2395
2396[llm.router]
2397strategy = "bandit"
2398
2399[llm.router.bandit]
2400alpha = 1.5
2401"#,
2402 );
2403 let bandit = cfg
2404 .router
2405 .expect("router section must be present")
2406 .bandit
2407 .expect("bandit section must be present");
2408 assert_eq!(
2409 bandit.warmup_queries, None,
2410 "omitted warmup_queries must default to None"
2411 );
2412 }
2413
2414 #[test]
2415 fn provider_name_new_and_as_str() {
2416 let n = ProviderName::new("fast");
2417 assert_eq!(n.as_str(), "fast");
2418 assert!(!n.is_empty());
2419 }
2420
2421 #[test]
2422 fn provider_name_default_is_empty() {
2423 let n = ProviderName::default();
2424 assert!(n.is_empty());
2425 assert_eq!(n.as_str(), "");
2426 }
2427
2428 #[test]
2429 fn provider_name_partial_eq_str() {
2430 let n = ProviderName::new("fast");
2431 assert_eq!(n, "fast");
2432 assert_ne!(n, "slow");
2433 }
2434
2435 #[test]
2436 fn provider_name_serde_roundtrip() {
2437 let n = ProviderName::new("my-provider");
2438 let json = serde_json::to_string(&n).expect("serialize");
2439 assert_eq!(json, "\"my-provider\"");
2440 let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2441 assert_eq!(back, n);
2442 }
2443
2444 #[test]
2445 fn provider_name_serde_empty_roundtrip() {
2446 let n = ProviderName::default();
2447 let json = serde_json::to_string(&n).expect("serialize");
2448 assert_eq!(json, "\"\"");
2449 let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2450 assert_eq!(back, n);
2451 assert!(back.is_empty());
2452 }
2453
2454 fn gonka_entry_with_nodes(nodes: Vec<GonkaNode>) -> ProviderEntry {
2457 ProviderEntry {
2458 provider_type: ProviderKind::Gonka,
2459 name: Some("my-gonka".into()),
2460 gonka_nodes: nodes,
2461 ..Default::default()
2462 }
2463 }
2464
2465 fn valid_gonka_nodes() -> Vec<GonkaNode> {
2466 vec![
2467 GonkaNode {
2468 url: "https://node1.gonka.ai".into(),
2469 address: "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6".into(),
2470 name: Some("node1".into()),
2471 },
2472 GonkaNode {
2473 url: "https://node2.gonka.ai".into(),
2474 address: "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum".into(),
2475 name: Some("node2".into()),
2476 },
2477 GonkaNode {
2478 url: "http://node3.internal".into(),
2479 address: "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg".into(),
2480 name: None,
2481 },
2482 ]
2483 }
2484
2485 #[test]
2486 fn validate_gonka_valid() {
2487 let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2488 assert!(entry.validate().is_ok());
2489 }
2490
2491 #[test]
2492 fn validate_gonka_empty_nodes_errors() {
2493 let entry = gonka_entry_with_nodes(vec![]);
2494 let err = entry.validate().unwrap_err();
2495 assert!(
2496 err.to_string().contains("gonka_nodes"),
2497 "error should mention gonka_nodes: {err}"
2498 );
2499 }
2500
2501 #[test]
2502 fn validate_gonka_node_empty_url_errors() {
2503 let entry = gonka_entry_with_nodes(vec![GonkaNode {
2504 url: String::new(),
2505 address: "gonka1test".into(),
2506 name: None,
2507 }]);
2508 let err = entry.validate().unwrap_err();
2509 assert!(err.to_string().contains("url"), "{err}");
2510 }
2511
2512 #[test]
2513 fn validate_gonka_node_invalid_scheme_errors() {
2514 let entry = gonka_entry_with_nodes(vec![GonkaNode {
2515 url: "ftp://node.gonka.ai".into(),
2516 address: "gonka1test".into(),
2517 name: None,
2518 }]);
2519 let err = entry.validate().unwrap_err();
2520 assert!(err.to_string().contains("http"), "{err}");
2521 }
2522
2523 #[test]
2524 fn validate_gonka_without_name_errors() {
2525 let entry = ProviderEntry {
2526 provider_type: ProviderKind::Gonka,
2527 name: None,
2528 gonka_nodes: valid_gonka_nodes(),
2529 ..Default::default()
2530 };
2531 let err = entry.validate().unwrap_err();
2532 assert!(err.to_string().contains("gonka"), "{err}");
2533 }
2534
2535 #[test]
2536 fn gonka_toml_round_trip() {
2537 let toml = r#"
2538[llm]
2539
2540[[llm.providers]]
2541type = "gonka"
2542name = "my-gonka"
2543gonka_chain_prefix = "custom-chain"
2544
2545[[llm.providers.gonka_nodes]]
2546url = "https://node1.gonka.ai"
2547address = "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2548name = "node1"
2549
2550[[llm.providers.gonka_nodes]]
2551url = "https://node2.gonka.ai"
2552address = "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum"
2553name = "node2"
2554
2555[[llm.providers.gonka_nodes]]
2556url = "https://node3.gonka.ai"
2557address = "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg"
2558"#;
2559 let cfg = parse_llm(toml);
2560 assert_eq!(cfg.providers.len(), 1);
2561 let entry = &cfg.providers[0];
2562 assert_eq!(entry.provider_type, ProviderKind::Gonka);
2563 assert_eq!(entry.name.as_deref(), Some("my-gonka"));
2564 let nodes = &entry.gonka_nodes;
2565 assert_eq!(nodes.len(), 3);
2566 assert_eq!(nodes[0].url, "https://node1.gonka.ai");
2567 assert_eq!(
2568 nodes[0].address,
2569 "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2570 );
2571 assert_eq!(nodes[0].name.as_deref(), Some("node1"));
2572 assert_eq!(nodes[2].name, None);
2573 assert_eq!(entry.gonka_chain_prefix.as_deref(), Some("custom-chain"));
2574 }
2575
2576 #[test]
2577 fn gonka_default_chain_prefix() {
2578 let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2579 assert_eq!(entry.effective_gonka_chain_prefix(), "gonka");
2580 }
2581
2582 #[test]
2583 fn gonka_explicit_chain_prefix() {
2584 let entry = ProviderEntry {
2585 provider_type: ProviderKind::Gonka,
2586 name: Some("my-gonka".into()),
2587 gonka_nodes: valid_gonka_nodes(),
2588 gonka_chain_prefix: Some("my-chain".into()),
2589 ..Default::default()
2590 };
2591 assert_eq!(entry.effective_gonka_chain_prefix(), "my-chain");
2592 }
2593
2594 #[test]
2595 fn effective_model_gonka_is_empty() {
2596 let entry = ProviderEntry {
2597 provider_type: ProviderKind::Gonka,
2598 model: None,
2599 ..Default::default()
2600 };
2601 assert_eq!(entry.effective_model(), "");
2602 }
2603
2604 #[test]
2605 fn existing_configs_still_parse() {
2606 let toml = r#"
2607[llm]
2608
2609[[llm.providers]]
2610type = "ollama"
2611model = "qwen3:8b"
2612
2613[[llm.providers]]
2614type = "claude"
2615name = "claude"
2616model = "claude-sonnet-4-6"
2617"#;
2618 let cfg = parse_llm(toml);
2619 assert_eq!(cfg.providers.len(), 2);
2620 assert_eq!(cfg.providers[0].provider_type, ProviderKind::Ollama);
2621 assert_eq!(cfg.providers[1].provider_type, ProviderKind::Claude);
2622 }
2623
2624 fn cocoon_entry(url: Option<&str>, model: Option<&str>) -> ProviderEntry {
2627 ProviderEntry {
2628 provider_type: ProviderKind::Cocoon,
2629 name: Some("cocoon".into()),
2630 cocoon_client_url: url.map(str::to_owned),
2631 model: model.map(str::to_owned),
2632 ..Default::default()
2633 }
2634 }
2635
2636 #[test]
2637 fn test_cocoon_url_validation_accepts_http() {
2638 assert!(
2639 cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2640 .validate()
2641 .is_ok()
2642 );
2643 }
2644
2645 #[test]
2646 fn test_cocoon_url_validation_accepts_https_localhost() {
2647 assert!(
2648 cocoon_entry(Some("https://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2649 .validate()
2650 .is_ok()
2651 );
2652 }
2653
2654 #[test]
2655 fn test_cocoon_url_validation_rejects_non_localhost() {
2656 let err = cocoon_entry(Some("http://192.168.1.10:10000"), Some("Qwen/Qwen3-0.6B"))
2657 .validate()
2658 .unwrap_err();
2659 assert!(
2660 err.to_string().contains("localhost"),
2661 "error should mention localhost restriction: {err}"
2662 );
2663 }
2664
2665 #[test]
2666 fn test_cocoon_url_validation_rejects_non_http_scheme() {
2667 let err = cocoon_entry(Some("ftp://localhost"), Some("Qwen/Qwen3-0.6B"))
2668 .validate()
2669 .unwrap_err();
2670 assert!(
2671 err.to_string().contains("ftp"),
2672 "error should mention the bad scheme: {err}"
2673 );
2674 }
2675
2676 #[test]
2677 fn test_cocoon_url_validation_rejects_invalid_url() {
2678 let err = cocoon_entry(Some("not-a-url"), Some("Qwen/Qwen3-0.6B"))
2679 .validate()
2680 .unwrap_err();
2681 assert!(
2682 err.to_string().contains("not-a-url"),
2683 "error should mention the bad value: {err}"
2684 );
2685 }
2686
2687 #[test]
2688 fn test_cocoon_url_none_passes() {
2689 assert!(
2690 cocoon_entry(None, Some("Qwen/Qwen3-0.6B"))
2691 .validate()
2692 .is_ok()
2693 );
2694 }
2695
2696 #[test]
2697 fn test_cocoon_model_empty_rejected() {
2698 let err = cocoon_entry(Some("http://localhost:10000"), Some(""))
2699 .validate()
2700 .unwrap_err();
2701 assert!(
2702 err.to_string().contains("empty"),
2703 "error should mention 'empty': {err}"
2704 );
2705 }
2706
2707 #[test]
2708 fn test_cocoon_model_none_passes() {
2709 assert!(
2710 cocoon_entry(Some("http://localhost:10000"), None)
2711 .validate()
2712 .is_ok()
2713 );
2714 }
2715
2716 #[test]
2717 fn validate_cocoon_pricing_negative_prompt_errors() {
2718 let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2719 e.cocoon_pricing = Some(CocoonPricing {
2720 prompt_cents_per_1k: -1.0,
2721 completion_cents_per_1k: 0.03,
2722 });
2723 assert!(e.validate().is_err());
2724 }
2725
2726 #[test]
2727 fn validate_cocoon_pricing_negative_completion_errors() {
2728 let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2729 e.cocoon_pricing = Some(CocoonPricing {
2730 prompt_cents_per_1k: 0.01,
2731 completion_cents_per_1k: -0.5,
2732 });
2733 assert!(e.validate().is_err());
2734 }
2735
2736 #[test]
2737 fn validate_cocoon_pricing_valid_passes() {
2738 let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2739 e.cocoon_pricing = Some(CocoonPricing {
2740 prompt_cents_per_1k: 0.01,
2741 completion_cents_per_1k: 0.03,
2742 });
2743 assert!(e.validate().is_ok());
2744 }
2745}