1use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
13#[serde(tag = "mode", rename_all = "snake_case")]
14pub enum ThinkingConfig {
15 Extended {
17 budget_tokens: u32,
19 },
20 Adaptive {
22 #[serde(default, skip_serializing_if = "Option::is_none")]
24 effort: Option<ThinkingEffort>,
25 },
26}
27
28#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
30#[serde(rename_all = "lowercase")]
31pub enum ThinkingEffort {
32 Low,
34 #[default]
36 Medium,
37 High,
39}
40
41#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Default)]
47#[serde(rename_all = "snake_case")]
48pub enum CacheTtl {
49 #[default]
51 Ephemeral,
52 #[serde(rename = "1h")]
55 OneHour,
56}
57
58impl CacheTtl {
59 #[must_use]
62 pub fn requires_beta(self) -> bool {
63 match self {
64 Self::OneHour => true,
65 Self::Ephemeral => false,
66 }
67 }
68}
69
70#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
75#[serde(rename_all = "lowercase")]
76pub enum GeminiThinkingLevel {
77 Minimal,
79 Low,
81 Medium,
83 High,
85}
86
87pub use zeph_common::ProviderName;
88
89fn default_response_cache_ttl_secs() -> u64 {
90 3600
91}
92
93fn default_semantic_cache_threshold() -> f32 {
94 0.95
95}
96
97fn default_semantic_cache_max_candidates() -> u32 {
98 10
99}
100
101fn default_router_ema_alpha() -> f64 {
102 0.1
103}
104
105fn default_router_reorder_interval() -> u64 {
106 10
107}
108
109fn default_embedding_model() -> String {
110 "qwen3-embedding".into()
111}
112
113fn default_candle_source() -> String {
114 "huggingface".into()
115}
116
117fn default_chat_template() -> String {
118 "chatml".into()
119}
120
121fn default_candle_device() -> String {
122 "cpu".into()
123}
124
125fn default_temperature() -> f64 {
126 0.7
127}
128
129fn default_max_tokens() -> usize {
130 2048
131}
132
133fn default_seed() -> u64 {
134 42
135}
136
137fn default_repeat_penalty() -> f32 {
138 1.1
139}
140
141fn default_repeat_last_n() -> usize {
142 64
143}
144
145fn default_cascade_quality_threshold() -> f64 {
146 0.5
147}
148
149fn default_cascade_max_escalations() -> u8 {
150 2
151}
152
153fn default_cascade_window_size() -> usize {
154 50
155}
156
157fn default_cascade_judge_timeout_ms() -> u64 {
158 5_000
159}
160
161fn default_reputation_decay_factor() -> f64 {
162 0.95
163}
164
165fn default_reputation_weight() -> f64 {
166 0.3
167}
168
169fn default_reputation_min_observations() -> u64 {
170 5
171}
172
173#[must_use]
175pub fn default_stt_provider() -> String {
176 String::new()
177}
178
179#[must_use]
181pub fn default_stt_language() -> String {
182 "auto".into()
183}
184
185#[must_use]
187pub(crate) fn get_default_embedding_model() -> String {
188 default_embedding_model()
189}
190
191#[must_use]
193pub(crate) fn get_default_response_cache_ttl_secs() -> u64 {
194 default_response_cache_ttl_secs()
195}
196
197#[must_use]
199pub(crate) fn get_default_router_ema_alpha() -> f64 {
200 default_router_ema_alpha()
201}
202
203#[must_use]
205pub(crate) fn get_default_router_reorder_interval() -> u64 {
206 default_router_reorder_interval()
207}
208
209#[non_exhaustive]
222#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
223#[serde(rename_all = "lowercase")]
224pub enum ProviderKind {
225 Ollama,
227 Claude,
229 OpenAi,
231 Gemini,
233 Candle,
235 Compatible,
237 Gonka,
239 Cocoon,
241}
242
243impl ProviderKind {
244 #[must_use]
255 pub fn as_str(self) -> &'static str {
256 match self {
257 Self::Ollama => "ollama",
258 Self::Claude => "claude",
259 Self::OpenAi => "openai",
260 Self::Gemini => "gemini",
261 Self::Candle => "candle",
262 Self::Compatible => "compatible",
263 Self::Gonka => "gonka",
264 Self::Cocoon => "cocoon",
265 }
266 }
267}
268
269impl std::fmt::Display for ProviderKind {
270 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
271 f.write_str(self.as_str())
272 }
273}
274
275#[derive(Debug, Deserialize, Serialize)]
299pub struct LlmConfig {
300 #[serde(default, skip_serializing_if = "Vec::is_empty")]
302 pub providers: Vec<ProviderEntry>,
303
304 #[serde(default, skip_serializing_if = "is_routing_none")]
306 pub routing: LlmRoutingStrategy,
307
308 #[serde(default = "default_embedding_model_opt")]
309 pub embedding_model: String,
310 #[serde(default, skip_serializing_if = "Option::is_none")]
311 pub candle: Option<CandleConfig>,
312 #[serde(default)]
313 pub stt: Option<SttConfig>,
314 #[serde(default)]
315 pub response_cache_enabled: bool,
316 #[serde(default = "default_response_cache_ttl_secs")]
317 pub response_cache_ttl_secs: u64,
318 #[serde(default)]
320 pub semantic_cache_enabled: bool,
321 #[serde(default = "default_semantic_cache_threshold")]
327 pub semantic_cache_threshold: f32,
328 #[serde(default = "default_semantic_cache_max_candidates")]
341 pub semantic_cache_max_candidates: u32,
342 #[serde(default)]
343 pub router_ema_enabled: bool,
344 #[serde(default = "default_router_ema_alpha")]
345 pub router_ema_alpha: f64,
346 #[serde(default = "default_router_reorder_interval")]
347 pub router_reorder_interval: u64,
348 #[serde(default, skip_serializing_if = "Option::is_none")]
350 pub router: Option<RouterConfig>,
351 #[serde(default, skip_serializing_if = "Option::is_none")]
354 pub instruction_file: Option<std::path::PathBuf>,
355 #[serde(default, skip_serializing_if = "Option::is_none")]
359 pub summary_model: Option<String>,
360 #[serde(default, skip_serializing_if = "Option::is_none")]
362 pub summary_provider: Option<ProviderEntry>,
363
364 #[serde(default, skip_serializing_if = "Option::is_none")]
366 pub complexity_routing: Option<ComplexityRoutingConfig>,
367
368 #[serde(default, skip_serializing_if = "Option::is_none")]
370 pub coe: Option<CoeConfig>,
371}
372
373fn default_embedding_model_opt() -> String {
374 default_embedding_model()
375}
376
377#[allow(clippy::trivially_copy_pass_by_ref)]
378fn is_routing_none(s: &LlmRoutingStrategy) -> bool {
379 *s == LlmRoutingStrategy::None
380}
381
382impl LlmConfig {
383 #[must_use]
385 pub fn effective_provider(&self) -> ProviderKind {
386 self.providers
387 .first()
388 .map_or(ProviderKind::Ollama, |e| e.provider_type)
389 }
390
391 #[must_use]
393 pub fn effective_base_url(&self) -> &str {
394 self.providers
395 .first()
396 .and_then(|e| e.base_url.as_deref())
397 .unwrap_or("http://localhost:11434")
398 }
399
400 #[must_use]
406 pub fn effective_model(&self) -> &str {
407 self.providers
408 .iter()
409 .find(|e| !e.embed)
410 .and_then(|e| e.model.as_deref())
411 .unwrap_or("qwen3:8b")
412 }
413
414 #[must_use]
422 pub fn stt_provider_entry(&self) -> Option<&ProviderEntry> {
423 let name_hint = self.stt.as_ref().map_or("", |s| s.provider.as_str());
424 if name_hint.is_empty() {
425 self.providers.iter().find(|p| p.stt_model.is_some())
426 } else {
427 self.providers
428 .iter()
429 .find(|p| p.effective_name() == name_hint && p.stt_model.is_some())
430 }
431 }
432
433 pub fn check_legacy_format(&self) -> Result<(), crate::error::ConfigError> {
439 Ok(())
440 }
441
442 pub fn validate_stt(&self) -> Result<(), crate::error::ConfigError> {
448 use crate::error::ConfigError;
449
450 let Some(stt) = &self.stt else {
451 return Ok(());
452 };
453 if stt.provider.is_empty() {
454 return Ok(());
455 }
456 let found = self
457 .providers
458 .iter()
459 .find(|p| p.effective_name() == stt.provider);
460 match found {
461 None => {
462 return Err(ConfigError::Validation(format!(
463 "[llm.stt].provider = {:?} does not match any [[llm.providers]] entry",
464 stt.provider
465 )));
466 }
467 Some(entry) if entry.stt_model.is_none() => {
468 tracing::warn!(
469 provider = stt.provider,
470 "[[llm.providers]] entry exists but has no `stt_model` — STT will not be activated"
471 );
472 }
473 _ => {}
474 }
475 Ok(())
476 }
477
478 pub fn warn_non_fast_tier_provider(
502 &self,
503 provider_name: &ProviderName,
504 feature_label: &str,
505 extra_allowlist: &[String],
506 ) {
507 if provider_name.is_empty() {
508 return;
509 }
510 let name = provider_name.as_str();
511 let Some(entry) = self.providers.iter().find(|p| p.effective_name() == name) else {
512 tracing::warn!(
513 provider = name,
514 "{feature_label} provider '{name}' not found in [[llm.providers]]"
515 );
516 return;
517 };
518 let model = entry.model.as_deref().unwrap_or("");
519 if model.is_empty() {
520 return;
521 }
522 let lower = model.to_lowercase();
523 let in_hints = FAST_TIER_MODEL_HINTS.iter().any(|h| lower.contains(h));
524 let in_extra = extra_allowlist.iter().any(|h| lower.contains(h.as_str()));
525 if !in_hints && !in_extra {
526 tracing::warn!(
527 provider = name,
528 actual = model,
529 "{feature_label} provider '{name}' uses model '{model}' \
530 which may not be fast-tier; prefer a fast model to bound distillation cost"
531 );
532 }
533 }
534}
535
536pub const FAST_TIER_MODEL_HINTS: &[&str] = &[
541 "gpt-4o-mini",
542 "gpt-4.1-mini",
543 "gpt-5-mini",
544 "gpt-5-nano",
545 "claude-haiku",
546 "claude-3-haiku",
547 "claude-3-5-haiku",
548 "qwen3:8b",
549 "qwen2.5:7b",
550 "qwen2:7b",
551 "llama3.2:3b",
552 "llama3.1:8b",
553 "gemma3:4b",
554 "gemma3:8b",
555 "phi4:mini",
556 "mistral:7b",
557];
558
559#[derive(Debug, Clone, Deserialize, Serialize)]
572pub struct SttConfig {
573 #[serde(default = "default_stt_provider")]
576 pub provider: String,
577 #[serde(default = "default_stt_language")]
579 pub language: String,
580}
581
582#[non_exhaustive]
584#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
585#[serde(rename_all = "lowercase")]
586pub enum RouterStrategyConfig {
587 #[default]
589 Ema,
590 Thompson,
592 Cascade,
594 Bandit,
596}
597
598#[derive(Debug, Clone, Deserialize, Serialize)]
611pub struct AsiConfig {
612 #[serde(default)]
614 pub enabled: bool,
615
616 #[serde(default = "default_asi_window")]
618 pub window: usize,
619
620 #[serde(default = "default_asi_coherence_threshold")]
622 pub coherence_threshold: f32,
623
624 #[serde(default = "default_asi_penalty_weight")]
629 pub penalty_weight: f32,
630}
631
632fn default_asi_window() -> usize {
633 5
634}
635
636fn default_asi_coherence_threshold() -> f32 {
637 0.7
638}
639
640fn default_asi_penalty_weight() -> f32 {
641 0.3
642}
643
644impl Default for AsiConfig {
645 fn default() -> Self {
646 Self {
647 enabled: false,
648 window: default_asi_window(),
649 coherence_threshold: default_asi_coherence_threshold(),
650 penalty_weight: default_asi_penalty_weight(),
651 }
652 }
653}
654
655#[derive(Debug, Clone, Deserialize, Serialize)]
657pub struct RouterConfig {
658 #[serde(default)]
660 pub strategy: RouterStrategyConfig,
661 #[serde(default)]
669 pub thompson_state_path: Option<String>,
670 #[serde(default)]
672 pub cascade: Option<CascadeConfig>,
673 #[serde(default)]
675 pub reputation: Option<ReputationConfig>,
676 #[serde(default)]
678 pub bandit: Option<BanditConfig>,
679 #[serde(default)]
688 pub quality_gate: Option<f32>,
689 #[serde(default)]
691 pub asi: Option<AsiConfig>,
692 #[serde(default = "default_embed_concurrency")]
698 pub embed_concurrency: usize,
699}
700
701fn default_embed_concurrency() -> usize {
702 4
703}
704
705#[derive(Debug, Clone, Deserialize, Serialize)]
712pub struct ReputationConfig {
713 #[serde(default)]
715 pub enabled: bool,
716 #[serde(default = "default_reputation_decay_factor")]
719 pub decay_factor: f64,
720 #[serde(default = "default_reputation_weight")]
727 pub weight: f64,
728 #[serde(default = "default_reputation_min_observations")]
730 pub min_observations: u64,
731 #[serde(default)]
733 pub state_path: Option<String>,
734}
735
736#[derive(Debug, Clone, Deserialize, Serialize)]
747pub struct CascadeConfig {
748 #[serde(default = "default_cascade_quality_threshold")]
751 pub quality_threshold: f64,
752
753 #[serde(default = "default_cascade_max_escalations")]
757 pub max_escalations: u8,
758
759 #[serde(default)]
763 pub classifier_mode: CascadeClassifierMode,
764
765 #[serde(default = "default_cascade_window_size")]
767 pub window_size: usize,
768
769 #[serde(default)]
773 pub max_cascade_tokens: Option<u32>,
774
775 #[serde(default, skip_serializing_if = "Option::is_none")]
780 pub cost_tiers: Option<Vec<String>>,
781
782 #[serde(default = "default_cascade_judge_timeout_ms")]
786 pub judge_timeout_ms: u64,
787}
788
789impl Default for CascadeConfig {
790 fn default() -> Self {
791 Self {
792 quality_threshold: default_cascade_quality_threshold(),
793 max_escalations: default_cascade_max_escalations(),
794 classifier_mode: CascadeClassifierMode::default(),
795 window_size: default_cascade_window_size(),
796 max_cascade_tokens: None,
797 cost_tiers: None,
798 judge_timeout_ms: default_cascade_judge_timeout_ms(),
799 }
800 }
801}
802
803#[non_exhaustive]
805#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
806#[serde(rename_all = "lowercase")]
807pub enum CascadeClassifierMode {
808 #[default]
811 Heuristic,
812 Judge,
815}
816
817fn default_bandit_alpha() -> f32 {
818 1.0
819}
820
821fn default_bandit_dim() -> usize {
822 32
823}
824
825fn default_bandit_cost_weight() -> f32 {
826 0.1
827}
828
829fn default_bandit_decay_factor() -> f32 {
830 1.0
831}
832
833fn default_bandit_embedding_timeout_ms() -> u64 {
834 50
835}
836
837fn default_bandit_cache_size() -> usize {
838 512
839}
840
841#[derive(Debug, Clone, Deserialize, Serialize)]
854pub struct BanditConfig {
855 #[serde(default = "default_bandit_alpha")]
858 pub alpha: f32,
859
860 #[serde(default = "default_bandit_dim")]
867 pub dim: usize,
868
869 #[serde(default = "default_bandit_cost_weight")]
872 pub cost_weight: f32,
873
874 #[serde(default = "default_bandit_decay_factor")]
877 pub decay_factor: f32,
878
879 #[serde(default)]
885 pub embedding_provider: ProviderName,
886
887 #[serde(default = "default_bandit_embedding_timeout_ms")]
890 pub embedding_timeout_ms: u64,
891
892 #[serde(default = "default_bandit_cache_size")]
894 pub cache_size: usize,
895
896 #[serde(default)]
903 pub state_path: Option<String>,
904
905 #[serde(default = "default_bandit_memory_confidence_threshold")]
911 pub memory_confidence_threshold: f32,
912
913 #[serde(default)]
919 pub warmup_queries: Option<u64>,
920}
921
922fn default_bandit_memory_confidence_threshold() -> f32 {
923 0.9
924}
925
926impl Default for BanditConfig {
927 fn default() -> Self {
928 Self {
929 alpha: default_bandit_alpha(),
930 dim: default_bandit_dim(),
931 cost_weight: default_bandit_cost_weight(),
932 decay_factor: default_bandit_decay_factor(),
933 embedding_provider: ProviderName::default(),
934 embedding_timeout_ms: default_bandit_embedding_timeout_ms(),
935 cache_size: default_bandit_cache_size(),
936 state_path: None,
937 memory_confidence_threshold: default_bandit_memory_confidence_threshold(),
938 warmup_queries: None,
939 }
940 }
941}
942
943#[derive(Debug, Deserialize, Serialize)]
944pub struct CandleConfig {
945 #[serde(default = "default_candle_source")]
946 pub source: String,
947 #[serde(default)]
948 pub local_path: String,
949 #[serde(default)]
950 pub filename: Option<String>,
951 #[serde(default = "default_chat_template")]
952 pub chat_template: String,
953 #[serde(default = "default_candle_device")]
954 pub device: String,
955 #[serde(default)]
956 pub embedding_repo: Option<String>,
957 #[serde(default)]
961 pub hf_token: Option<String>,
962 #[serde(default)]
963 pub generation: GenerationParams,
964 #[serde(default = "default_inference_timeout_secs")]
973 pub inference_timeout_secs: u64,
974}
975
976fn default_inference_timeout_secs() -> u64 {
977 120
978}
979
980#[derive(Debug, Clone, Deserialize, Serialize)]
984pub struct GenerationParams {
985 #[serde(default = "default_temperature")]
987 pub temperature: f64,
988 #[serde(default)]
991 pub top_p: Option<f64>,
992 #[serde(default)]
995 pub top_k: Option<usize>,
996 #[serde(default = "default_max_tokens")]
999 pub max_tokens: usize,
1000 #[serde(default = "default_seed")]
1002 pub seed: u64,
1003 #[serde(default = "default_repeat_penalty")]
1005 pub repeat_penalty: f32,
1006 #[serde(default = "default_repeat_last_n")]
1008 pub repeat_last_n: usize,
1009}
1010
1011pub const MAX_TOKENS_CAP: usize = 32768;
1013
1014impl GenerationParams {
1015 #[must_use]
1026 pub fn capped_max_tokens(&self) -> usize {
1027 self.max_tokens.min(MAX_TOKENS_CAP)
1028 }
1029}
1030
1031impl Default for GenerationParams {
1032 fn default() -> Self {
1033 Self {
1034 temperature: default_temperature(),
1035 top_p: None,
1036 top_k: None,
1037 max_tokens: default_max_tokens(),
1038 seed: default_seed(),
1039 repeat_penalty: default_repeat_penalty(),
1040 repeat_last_n: default_repeat_last_n(),
1041 }
1042 }
1043}
1044
1045#[non_exhaustive]
1049#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
1050#[serde(rename_all = "lowercase")]
1051pub enum LlmRoutingStrategy {
1052 #[default]
1054 None,
1055 Ema,
1057 Thompson,
1059 Cascade,
1061 Triage,
1063 Bandit,
1065}
1066
1067fn default_triage_timeout_secs() -> u64 {
1068 5
1069}
1070
1071fn default_max_triage_tokens() -> u32 {
1072 50
1073}
1074
1075fn default_true() -> bool {
1076 true
1077}
1078
1079#[allow(clippy::trivially_copy_pass_by_ref)]
1080fn is_true(v: &bool) -> bool {
1081 *v
1082}
1083
1084#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1086pub struct TierMapping {
1087 pub simple: Option<String>,
1088 pub medium: Option<String>,
1089 pub complex: Option<String>,
1090 pub expert: Option<String>,
1091}
1092
1093#[derive(Debug, Clone, Deserialize, Serialize)]
1114pub struct ComplexityRoutingConfig {
1115 #[serde(default)]
1117 pub triage_provider: Option<ProviderName>,
1118
1119 #[serde(default = "default_true")]
1121 pub bypass_single_provider: bool,
1122
1123 #[serde(default)]
1125 pub tiers: TierMapping,
1126
1127 #[serde(default = "default_max_triage_tokens")]
1129 pub max_triage_tokens: u32,
1130
1131 #[serde(default = "default_triage_timeout_secs")]
1134 pub triage_timeout_secs: u64,
1135
1136 #[serde(default)]
1139 pub fallback_strategy: Option<String>,
1140}
1141
1142impl Default for ComplexityRoutingConfig {
1143 fn default() -> Self {
1144 Self {
1145 triage_provider: None,
1146 bypass_single_provider: true,
1147 tiers: TierMapping::default(),
1148 max_triage_tokens: default_max_triage_tokens(),
1149 triage_timeout_secs: default_triage_timeout_secs(),
1150 fallback_strategy: None,
1151 }
1152 }
1153}
1154
1155#[derive(Debug, Clone, Deserialize, Serialize)]
1173#[serde(default)]
1174pub struct CoeConfig {
1175 pub enabled: bool,
1177 pub intra_threshold: f64,
1179 pub inter_threshold: f64,
1181 pub shadow_sample_rate: f64,
1183 pub secondary_provider: ProviderName,
1185 pub embed_provider: ProviderName,
1187}
1188
1189impl Default for CoeConfig {
1190 fn default() -> Self {
1191 Self {
1192 enabled: false,
1193 intra_threshold: 0.8,
1194 inter_threshold: 0.20,
1195 shadow_sample_rate: 0.1,
1196 secondary_provider: ProviderName::default(),
1197 embed_provider: ProviderName::default(),
1198 }
1199 }
1200}
1201
1202#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
1207pub struct GonkaNode {
1208 pub url: String,
1210 pub address: String,
1215 #[serde(default, skip_serializing_if = "Option::is_none")]
1217 pub name: Option<String>,
1218}
1219
1220#[derive(Debug, Clone, Deserialize, Serialize)]
1223pub struct CandleInlineConfig {
1224 #[serde(default = "default_candle_source")]
1225 pub source: String,
1226 #[serde(default)]
1227 pub local_path: String,
1228 #[serde(default)]
1229 pub filename: Option<String>,
1230 #[serde(default = "default_chat_template")]
1231 pub chat_template: String,
1232 #[serde(default = "default_candle_device")]
1233 pub device: String,
1234 #[serde(default)]
1235 pub embedding_repo: Option<String>,
1236 #[serde(default)]
1238 pub hf_token: Option<String>,
1239 #[serde(default)]
1240 pub generation: GenerationParams,
1241 #[serde(default = "default_inference_timeout_secs")]
1246 pub inference_timeout_secs: u64,
1247}
1248
1249impl Default for CandleInlineConfig {
1250 fn default() -> Self {
1251 Self {
1252 source: default_candle_source(),
1253 local_path: String::new(),
1254 filename: None,
1255 chat_template: default_chat_template(),
1256 device: default_candle_device(),
1257 embedding_repo: None,
1258 hf_token: None,
1259 generation: GenerationParams::default(),
1260 inference_timeout_secs: default_inference_timeout_secs(),
1261 }
1262 }
1263}
1264
1265#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
1274pub struct CocoonPricing {
1275 #[serde(default)]
1277 pub prompt_cents_per_1k: f64,
1278 #[serde(default)]
1281 pub completion_cents_per_1k: f64,
1282}
1283
1284#[derive(Debug, Clone, Deserialize, Serialize)]
1290#[allow(clippy::struct_excessive_bools)] pub struct ProviderEntry {
1292 #[serde(rename = "type")]
1294 pub provider_type: ProviderKind,
1295
1296 #[serde(default)]
1298 pub name: Option<String>,
1299
1300 #[serde(default)]
1302 pub model: Option<String>,
1303
1304 #[serde(default)]
1306 pub base_url: Option<String>,
1307
1308 #[serde(default)]
1310 pub max_tokens: Option<u32>,
1311
1312 #[serde(default)]
1314 pub embedding_model: Option<String>,
1315
1316 #[serde(default)]
1319 pub stt_model: Option<String>,
1320
1321 #[serde(default)]
1323 pub embed: bool,
1324
1325 #[serde(default)]
1327 pub default: bool,
1328
1329 #[serde(default)]
1331 pub thinking: Option<ThinkingConfig>,
1332 #[serde(default)]
1333 pub server_compaction: bool,
1334 #[serde(default)]
1335 pub enable_extended_context: bool,
1336 #[serde(default)]
1339 pub prompt_cache_ttl: Option<CacheTtl>,
1340
1341 #[serde(default)]
1343 pub reasoning_effort: Option<String>,
1344
1345 #[serde(default)]
1347 pub thinking_level: Option<GeminiThinkingLevel>,
1348 #[serde(default)]
1349 pub thinking_budget: Option<i32>,
1350 #[serde(default)]
1351 pub include_thoughts: Option<bool>,
1352
1353 #[serde(default)]
1355 pub api_key: Option<String>,
1356
1357 #[serde(default)]
1359 pub candle: Option<CandleInlineConfig>,
1360
1361 #[serde(default)]
1363 pub vision_model: Option<String>,
1364
1365 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1368 pub gonka_nodes: Vec<GonkaNode>,
1369 #[serde(default, skip_serializing_if = "Option::is_none")]
1371 pub gonka_chain_prefix: Option<String>,
1372
1373 #[serde(default, skip_serializing_if = "Option::is_none")]
1376 pub cocoon_client_url: Option<String>,
1377 #[serde(default, skip_serializing_if = "Option::is_none")]
1380 pub cocoon_access_hash: Option<String>,
1381 #[serde(default = "default_true", skip_serializing_if = "is_true")]
1383 pub cocoon_health_check: bool,
1384 #[serde(default, skip_serializing_if = "Option::is_none")]
1397 pub cocoon_pricing: Option<CocoonPricing>,
1398
1399 #[serde(default)]
1401 pub instruction_file: Option<std::path::PathBuf>,
1402
1403 #[serde(default, skip_serializing_if = "Option::is_none")]
1421 pub max_concurrent: Option<u32>,
1422}
1423
1424impl Default for ProviderEntry {
1425 fn default() -> Self {
1426 Self {
1427 provider_type: ProviderKind::Ollama,
1428 name: None,
1429 model: None,
1430 base_url: None,
1431 max_tokens: None,
1432 embedding_model: None,
1433 stt_model: None,
1434 embed: false,
1435 default: false,
1436 thinking: None,
1437 server_compaction: false,
1438 enable_extended_context: false,
1439 prompt_cache_ttl: None,
1440 reasoning_effort: None,
1441 thinking_level: None,
1442 thinking_budget: None,
1443 include_thoughts: None,
1444 api_key: None,
1445 candle: None,
1446 vision_model: None,
1447 gonka_nodes: Vec::new(),
1448 gonka_chain_prefix: None,
1449 cocoon_client_url: None,
1450 cocoon_access_hash: None,
1451 cocoon_health_check: true,
1452 cocoon_pricing: None,
1453 instruction_file: None,
1454 max_concurrent: None,
1455 }
1456 }
1457}
1458
1459impl ProviderEntry {
1460 #[must_use]
1462 pub fn effective_name(&self) -> String {
1463 self.name
1464 .clone()
1465 .unwrap_or_else(|| self.provider_type.as_str().to_owned())
1466 }
1467
1468 #[must_use]
1473 pub fn effective_model(&self) -> String {
1474 if let Some(ref m) = self.model {
1475 return m.clone();
1476 }
1477 match self.provider_type {
1478 ProviderKind::Ollama => "qwen3:8b".to_owned(),
1479 ProviderKind::Claude => "claude-haiku-4-5-20251001".to_owned(),
1480 ProviderKind::OpenAi => "gpt-4o-mini".to_owned(),
1481 ProviderKind::Gemini => "gemini-2.0-flash".to_owned(),
1482 ProviderKind::Compatible | ProviderKind::Candle | ProviderKind::Gonka => String::new(),
1485 ProviderKind::Cocoon => "Qwen/Qwen3-0.6B".to_owned(),
1486 }
1487 }
1488
1489 pub fn validate(&self) -> Result<(), crate::error::ConfigError> {
1496 use crate::error::ConfigError;
1497
1498 if self.provider_type == ProviderKind::Compatible && self.name.is_none() {
1500 return Err(ConfigError::Validation(
1501 "[[llm.providers]] entry with type=\"compatible\" must set `name`".into(),
1502 ));
1503 }
1504
1505 if self.provider_type == ProviderKind::Gonka {
1507 if self.name.is_none() {
1508 return Err(ConfigError::Validation(
1509 "[[llm.providers]] entry with type=\"gonka\" must set `name`".into(),
1510 ));
1511 }
1512 self.validate_gonka_nodes()?;
1513 }
1514
1515 if self.provider_type == ProviderKind::Cocoon
1517 && self.name.as_ref().is_none_or(String::is_empty)
1518 {
1519 return Err(ConfigError::Validation(
1520 "[[llm.providers]] entry with type=\"cocoon\" must set `name`".into(),
1521 ));
1522 }
1523
1524 if self.provider_type == ProviderKind::Cocoon {
1526 let name = self.effective_name();
1527 if let Some(ref url_str) = self.cocoon_client_url {
1528 match url::Url::parse(url_str) {
1529 Err(_) => {
1530 return Err(ConfigError::Validation(format!(
1531 "[[llm.providers]] entry '{name}': cocoon_client_url \
1532 '{url_str}' is not a valid URL; expected format: \
1533 http://localhost:10000"
1534 )));
1535 }
1536 Ok(u) if !matches!(u.host_str(), Some("localhost" | "127.0.0.1" | "::1")) => {
1537 return Err(ConfigError::Validation(format!(
1538 "[[llm.providers]] entry '{name}': cocoon_client_url host must be \
1539 localhost or 127.0.0.1, got '{}'",
1540 u.host_str().unwrap_or("<none>")
1541 )));
1542 }
1543 Ok(u) if u.scheme() != "http" && u.scheme() != "https" => {
1544 return Err(ConfigError::Validation(format!(
1545 "[[llm.providers]] entry '{name}': cocoon_client_url \
1546 scheme must be http or https, got '{}'",
1547 u.scheme()
1548 )));
1549 }
1550 _ => {}
1551 }
1552 }
1553 if self.model.as_deref().is_some_and(|m| m.trim().is_empty()) {
1554 return Err(ConfigError::Validation(format!(
1555 "[[llm.providers]] entry '{name}': model must not be empty \
1556 for cocoon provider"
1557 )));
1558 }
1559 if let Some(ref p) = self.cocoon_pricing {
1560 if !p.prompt_cents_per_1k.is_finite() || p.prompt_cents_per_1k < 0.0 {
1561 return Err(ConfigError::Validation(format!(
1562 "[[llm.providers]] entry '{name}': cocoon_pricing.prompt_cents_per_1k \
1563 must be a finite non-negative number"
1564 )));
1565 }
1566 if !p.completion_cents_per_1k.is_finite() || p.completion_cents_per_1k < 0.0 {
1567 return Err(ConfigError::Validation(format!(
1568 "[[llm.providers]] entry '{name}': \
1569 cocoon_pricing.completion_cents_per_1k \
1570 must be a finite non-negative number"
1571 )));
1572 }
1573 }
1574 }
1575
1576 self.warn_irrelevant_fields();
1578
1579 if self.stt_model.is_some() && self.provider_type == ProviderKind::Ollama {
1582 tracing::warn!(
1583 provider = self.effective_name(),
1584 "field `stt_model` is set on an Ollama provider; Ollama does not support the \
1585 Whisper STT API — use OpenAI, compatible, or candle instead"
1586 );
1587 }
1588
1589 Ok(())
1590 }
1591
1592 #[must_use]
1594 pub fn effective_gonka_chain_prefix(&self) -> &str {
1595 self.gonka_chain_prefix.as_deref().unwrap_or("gonka")
1596 }
1597
1598 fn warn_irrelevant_fields(&self) {
1599 let name = self.effective_name();
1600 match self.provider_type {
1601 ProviderKind::Ollama => {
1602 if self.thinking.is_some() {
1603 tracing::warn!(
1604 provider = name,
1605 "field `thinking` is only used by Claude providers"
1606 );
1607 }
1608 if self.reasoning_effort.is_some() {
1609 tracing::warn!(
1610 provider = name,
1611 "field `reasoning_effort` is only used by OpenAI providers"
1612 );
1613 }
1614 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1615 tracing::warn!(
1616 provider = name,
1617 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1618 );
1619 }
1620 }
1621 ProviderKind::Claude => {
1622 if self.reasoning_effort.is_some() {
1623 tracing::warn!(
1624 provider = name,
1625 "field `reasoning_effort` is only used by OpenAI providers"
1626 );
1627 }
1628 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1629 tracing::warn!(
1630 provider = name,
1631 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1632 );
1633 }
1634 }
1635 ProviderKind::OpenAi => {
1636 if self.thinking.is_some() {
1637 tracing::warn!(
1638 provider = name,
1639 "field `thinking` is only used by Claude providers"
1640 );
1641 }
1642 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1643 tracing::warn!(
1644 provider = name,
1645 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1646 );
1647 }
1648 }
1649 ProviderKind::Gemini => {
1650 if self.thinking.is_some() {
1651 tracing::warn!(
1652 provider = name,
1653 "field `thinking` is only used by Claude providers"
1654 );
1655 }
1656 if self.reasoning_effort.is_some() {
1657 tracing::warn!(
1658 provider = name,
1659 "field `reasoning_effort` is only used by OpenAI providers"
1660 );
1661 }
1662 }
1663 ProviderKind::Gonka => {
1664 if self.thinking.is_some() {
1665 tracing::warn!(
1666 provider = name,
1667 "field `thinking` is only used by Claude providers"
1668 );
1669 }
1670 if self.reasoning_effort.is_some() {
1671 tracing::warn!(
1672 provider = name,
1673 "field `reasoning_effort` is only used by OpenAI providers"
1674 );
1675 }
1676 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1677 tracing::warn!(
1678 provider = name,
1679 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1680 );
1681 }
1682 }
1683 ProviderKind::Compatible | ProviderKind::Candle => {}
1684 ProviderKind::Cocoon => {
1685 if self.base_url.is_some() {
1686 tracing::warn!(
1687 provider = name,
1688 "field `base_url` is ignored for cocoon providers; use `cocoon_client_url` instead"
1689 );
1690 }
1691 }
1692 }
1693 }
1694
1695 fn validate_gonka_nodes(&self) -> Result<(), crate::error::ConfigError> {
1696 use crate::error::ConfigError;
1697 if self.gonka_nodes.is_empty() {
1698 return Err(ConfigError::Validation(format!(
1699 "[[llm.providers]] entry '{}' with type=\"gonka\" must set non-empty `gonka_nodes`",
1700 self.effective_name()
1701 )));
1702 }
1703 for (i, node) in self.gonka_nodes.iter().enumerate() {
1704 if node.url.is_empty() {
1705 return Err(ConfigError::Validation(format!(
1706 "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must not be empty",
1707 self.effective_name()
1708 )));
1709 }
1710 if !node.url.starts_with("http://") && !node.url.starts_with("https://") {
1711 return Err(ConfigError::Validation(format!(
1712 "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must start with http:// or https://",
1713 self.effective_name()
1714 )));
1715 }
1716 }
1717 Ok(())
1718 }
1719}
1720
1721pub fn validate_pool(entries: &[ProviderEntry]) -> Result<(), crate::error::ConfigError> {
1731 use crate::error::ConfigError;
1732 use std::collections::HashSet;
1733
1734 if entries.is_empty() {
1735 return Err(ConfigError::Validation(
1736 "at least one LLM provider must be configured in [[llm.providers]]".into(),
1737 ));
1738 }
1739
1740 let default_count = entries.iter().filter(|e| e.default).count();
1741 if default_count > 1 {
1742 return Err(ConfigError::Validation(
1743 "only one [[llm.providers]] entry can be marked `default = true`".into(),
1744 ));
1745 }
1746
1747 let mut seen_names: HashSet<String> = HashSet::new();
1748 for entry in entries {
1749 let name = entry.effective_name();
1750 if !seen_names.insert(name.clone()) {
1751 return Err(ConfigError::Validation(format!(
1752 "duplicate provider name \"{name}\" in [[llm.providers]]"
1753 )));
1754 }
1755 entry.validate()?;
1756 }
1757
1758 Ok(())
1759}
1760
1761#[cfg(test)]
1762mod tests {
1763 use super::*;
1764
1765 fn ollama_entry() -> ProviderEntry {
1766 ProviderEntry {
1767 provider_type: ProviderKind::Ollama,
1768 name: Some("ollama".into()),
1769 model: Some("qwen3:8b".into()),
1770 ..Default::default()
1771 }
1772 }
1773
1774 fn claude_entry() -> ProviderEntry {
1775 ProviderEntry {
1776 provider_type: ProviderKind::Claude,
1777 name: Some("claude".into()),
1778 model: Some("claude-sonnet-4-6".into()),
1779 max_tokens: Some(8192),
1780 ..Default::default()
1781 }
1782 }
1783
1784 #[test]
1787 fn validate_ollama_valid() {
1788 assert!(ollama_entry().validate().is_ok());
1789 }
1790
1791 #[test]
1792 fn validate_claude_valid() {
1793 assert!(claude_entry().validate().is_ok());
1794 }
1795
1796 #[test]
1797 fn validate_compatible_without_name_errors() {
1798 let entry = ProviderEntry {
1799 provider_type: ProviderKind::Compatible,
1800 name: None,
1801 ..Default::default()
1802 };
1803 let err = entry.validate().unwrap_err();
1804 assert!(
1805 err.to_string().contains("compatible"),
1806 "error should mention compatible: {err}"
1807 );
1808 }
1809
1810 #[test]
1811 fn validate_compatible_with_name_ok() {
1812 let entry = ProviderEntry {
1813 provider_type: ProviderKind::Compatible,
1814 name: Some("my-proxy".into()),
1815 base_url: Some("http://localhost:8080".into()),
1816 model: Some("gpt-4o".into()),
1817 max_tokens: Some(4096),
1818 ..Default::default()
1819 };
1820 assert!(entry.validate().is_ok());
1821 }
1822
1823 #[test]
1824 fn validate_openai_valid() {
1825 let entry = ProviderEntry {
1826 provider_type: ProviderKind::OpenAi,
1827 name: Some("openai".into()),
1828 model: Some("gpt-4o".into()),
1829 max_tokens: Some(4096),
1830 ..Default::default()
1831 };
1832 assert!(entry.validate().is_ok());
1833 }
1834
1835 #[test]
1836 fn validate_gemini_valid() {
1837 let entry = ProviderEntry {
1838 provider_type: ProviderKind::Gemini,
1839 name: Some("gemini".into()),
1840 model: Some("gemini-2.0-flash".into()),
1841 ..Default::default()
1842 };
1843 assert!(entry.validate().is_ok());
1844 }
1845
1846 #[test]
1849 fn validate_pool_empty_errors() {
1850 let err = validate_pool(&[]).unwrap_err();
1851 assert!(err.to_string().contains("at least one"), "{err}");
1852 }
1853
1854 #[test]
1855 fn validate_pool_single_entry_ok() {
1856 assert!(validate_pool(&[ollama_entry()]).is_ok());
1857 }
1858
1859 #[test]
1860 fn validate_pool_duplicate_names_errors() {
1861 let a = ollama_entry();
1862 let b = ollama_entry(); let err = validate_pool(&[a, b]).unwrap_err();
1864 assert!(err.to_string().contains("duplicate"), "{err}");
1865 }
1866
1867 #[test]
1868 fn validate_pool_multiple_defaults_errors() {
1869 let mut a = ollama_entry();
1870 let mut b = claude_entry();
1871 a.default = true;
1872 b.default = true;
1873 let err = validate_pool(&[a, b]).unwrap_err();
1874 assert!(err.to_string().contains("default"), "{err}");
1875 }
1876
1877 #[test]
1878 fn validate_pool_two_different_providers_ok() {
1879 assert!(validate_pool(&[ollama_entry(), claude_entry()]).is_ok());
1880 }
1881
1882 #[test]
1883 fn validate_pool_propagates_entry_error() {
1884 let bad = ProviderEntry {
1885 provider_type: ProviderKind::Compatible,
1886 name: None, ..Default::default()
1888 };
1889 assert!(validate_pool(&[bad]).is_err());
1890 }
1891
1892 #[test]
1895 fn effective_model_returns_explicit_when_set() {
1896 let entry = ProviderEntry {
1897 provider_type: ProviderKind::Claude,
1898 model: Some("claude-sonnet-4-6".into()),
1899 ..Default::default()
1900 };
1901 assert_eq!(entry.effective_model(), "claude-sonnet-4-6");
1902 }
1903
1904 #[test]
1905 fn effective_model_ollama_default_when_none() {
1906 let entry = ProviderEntry {
1907 provider_type: ProviderKind::Ollama,
1908 model: None,
1909 ..Default::default()
1910 };
1911 assert_eq!(entry.effective_model(), "qwen3:8b");
1912 }
1913
1914 #[test]
1915 fn effective_model_claude_default_when_none() {
1916 let entry = ProviderEntry {
1917 provider_type: ProviderKind::Claude,
1918 model: None,
1919 ..Default::default()
1920 };
1921 assert_eq!(entry.effective_model(), "claude-haiku-4-5-20251001");
1922 }
1923
1924 #[test]
1925 fn effective_model_openai_default_when_none() {
1926 let entry = ProviderEntry {
1927 provider_type: ProviderKind::OpenAi,
1928 model: None,
1929 ..Default::default()
1930 };
1931 assert_eq!(entry.effective_model(), "gpt-4o-mini");
1932 }
1933
1934 #[test]
1935 fn effective_model_gemini_default_when_none() {
1936 let entry = ProviderEntry {
1937 provider_type: ProviderKind::Gemini,
1938 model: None,
1939 ..Default::default()
1940 };
1941 assert_eq!(entry.effective_model(), "gemini-2.0-flash");
1942 }
1943
1944 fn parse_llm(toml: &str) -> LlmConfig {
1948 #[derive(serde::Deserialize)]
1949 struct Wrapper {
1950 llm: LlmConfig,
1951 }
1952 toml::from_str::<Wrapper>(toml).unwrap().llm
1953 }
1954
1955 #[test]
1956 fn check_legacy_format_new_format_ok() {
1957 let cfg = parse_llm(
1958 r#"
1959[llm]
1960
1961[[llm.providers]]
1962type = "ollama"
1963model = "qwen3:8b"
1964"#,
1965 );
1966 assert!(cfg.check_legacy_format().is_ok());
1967 }
1968
1969 #[test]
1970 fn check_legacy_format_empty_providers_no_legacy_ok() {
1971 let cfg = parse_llm("[llm]\n");
1973 assert!(cfg.check_legacy_format().is_ok());
1974 }
1975
1976 #[test]
1979 fn effective_provider_falls_back_to_ollama_when_no_providers() {
1980 let cfg = parse_llm("[llm]\n");
1981 assert_eq!(cfg.effective_provider(), ProviderKind::Ollama);
1982 }
1983
1984 #[test]
1985 fn effective_provider_reads_from_providers_first() {
1986 let cfg = parse_llm(
1987 r#"
1988[llm]
1989
1990[[llm.providers]]
1991type = "claude"
1992model = "claude-sonnet-4-6"
1993"#,
1994 );
1995 assert_eq!(cfg.effective_provider(), ProviderKind::Claude);
1996 }
1997
1998 #[test]
1999 fn effective_model_reads_from_providers_first() {
2000 let cfg = parse_llm(
2001 r#"
2002[llm]
2003
2004[[llm.providers]]
2005type = "ollama"
2006model = "qwen3:8b"
2007"#,
2008 );
2009 assert_eq!(cfg.effective_model(), "qwen3:8b");
2010 }
2011
2012 #[test]
2013 fn effective_model_skips_embed_only_provider() {
2014 let cfg = parse_llm(
2015 r#"
2016[llm]
2017
2018[[llm.providers]]
2019type = "ollama"
2020model = "gemma4:26b"
2021embed = true
2022
2023[[llm.providers]]
2024type = "openai"
2025model = "gpt-4o-mini"
2026"#,
2027 );
2028 assert_eq!(cfg.effective_model(), "gpt-4o-mini");
2029 }
2030
2031 #[test]
2032 fn effective_base_url_default_when_absent() {
2033 let cfg = parse_llm("[llm]\n");
2034 assert_eq!(cfg.effective_base_url(), "http://localhost:11434");
2035 }
2036
2037 #[test]
2038 fn effective_base_url_from_providers_entry() {
2039 let cfg = parse_llm(
2040 r#"
2041[llm]
2042
2043[[llm.providers]]
2044type = "ollama"
2045base_url = "http://myhost:11434"
2046"#,
2047 );
2048 assert_eq!(cfg.effective_base_url(), "http://myhost:11434");
2049 }
2050
2051 #[test]
2054 fn complexity_routing_defaults() {
2055 let cr = ComplexityRoutingConfig::default();
2056 assert!(
2057 cr.bypass_single_provider,
2058 "bypass_single_provider must default to true"
2059 );
2060 assert_eq!(cr.triage_timeout_secs, 5);
2061 assert_eq!(cr.max_triage_tokens, 50);
2062 assert!(cr.triage_provider.is_none());
2063 assert!(cr.tiers.simple.is_none());
2064 }
2065
2066 #[test]
2067 fn complexity_routing_toml_round_trip() {
2068 let cfg = parse_llm(
2069 r#"
2070[llm]
2071routing = "triage"
2072
2073[llm.complexity_routing]
2074triage_provider = "fast"
2075bypass_single_provider = false
2076triage_timeout_secs = 10
2077max_triage_tokens = 100
2078
2079[llm.complexity_routing.tiers]
2080simple = "fast"
2081medium = "medium"
2082complex = "large"
2083expert = "opus"
2084"#,
2085 );
2086 assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2087 let cr = cfg
2088 .complexity_routing
2089 .expect("complexity_routing must be present");
2090 assert_eq!(
2091 cr.triage_provider.as_ref().map(ProviderName::as_str),
2092 Some("fast")
2093 );
2094 assert!(!cr.bypass_single_provider);
2095 assert_eq!(cr.triage_timeout_secs, 10);
2096 assert_eq!(cr.max_triage_tokens, 100);
2097 assert_eq!(cr.tiers.simple.as_deref(), Some("fast"));
2098 assert_eq!(cr.tiers.medium.as_deref(), Some("medium"));
2099 assert_eq!(cr.tiers.complex.as_deref(), Some("large"));
2100 assert_eq!(cr.tiers.expert.as_deref(), Some("opus"));
2101 }
2102
2103 #[test]
2104 fn complexity_routing_partial_tiers_toml() {
2105 let cfg = parse_llm(
2107 r#"
2108[llm]
2109routing = "triage"
2110
2111[llm.complexity_routing.tiers]
2112simple = "haiku"
2113complex = "sonnet"
2114"#,
2115 );
2116 let cr = cfg
2117 .complexity_routing
2118 .expect("complexity_routing must be present");
2119 assert_eq!(cr.tiers.simple.as_deref(), Some("haiku"));
2120 assert!(cr.tiers.medium.is_none());
2121 assert_eq!(cr.tiers.complex.as_deref(), Some("sonnet"));
2122 assert!(cr.tiers.expert.is_none());
2123 assert!(cr.bypass_single_provider);
2125 assert_eq!(cr.triage_timeout_secs, 5);
2126 }
2127
2128 #[test]
2129 fn routing_strategy_triage_deserialized() {
2130 let cfg = parse_llm(
2131 r#"
2132[llm]
2133routing = "triage"
2134"#,
2135 );
2136 assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2137 }
2138
2139 #[test]
2142 fn stt_provider_entry_by_name_match() {
2143 let cfg = parse_llm(
2144 r#"
2145[llm]
2146
2147[[llm.providers]]
2148type = "openai"
2149name = "quality"
2150model = "gpt-5.4"
2151stt_model = "gpt-4o-mini-transcribe"
2152
2153[llm.stt]
2154provider = "quality"
2155"#,
2156 );
2157 let entry = cfg.stt_provider_entry().expect("should find stt provider");
2158 assert_eq!(entry.effective_name(), "quality");
2159 assert_eq!(entry.stt_model.as_deref(), Some("gpt-4o-mini-transcribe"));
2160 }
2161
2162 #[test]
2163 fn stt_provider_entry_auto_detect_when_provider_empty() {
2164 let cfg = parse_llm(
2165 r#"
2166[llm]
2167
2168[[llm.providers]]
2169type = "openai"
2170name = "openai-stt"
2171stt_model = "whisper-1"
2172
2173[llm.stt]
2174provider = ""
2175"#,
2176 );
2177 let entry = cfg.stt_provider_entry().expect("should auto-detect");
2178 assert_eq!(entry.effective_name(), "openai-stt");
2179 }
2180
2181 #[test]
2182 fn stt_provider_entry_auto_detect_no_stt_section() {
2183 let cfg = parse_llm(
2184 r#"
2185[llm]
2186
2187[[llm.providers]]
2188type = "openai"
2189name = "openai-stt"
2190stt_model = "whisper-1"
2191"#,
2192 );
2193 let entry = cfg.stt_provider_entry().expect("should auto-detect");
2195 assert_eq!(entry.effective_name(), "openai-stt");
2196 }
2197
2198 #[test]
2199 fn stt_provider_entry_none_when_no_stt_model() {
2200 let cfg = parse_llm(
2201 r#"
2202[llm]
2203
2204[[llm.providers]]
2205type = "openai"
2206name = "quality"
2207model = "gpt-5.4"
2208"#,
2209 );
2210 assert!(cfg.stt_provider_entry().is_none());
2211 }
2212
2213 #[test]
2214 fn stt_provider_entry_name_mismatch_falls_back_to_none() {
2215 let cfg = parse_llm(
2217 r#"
2218[llm]
2219
2220[[llm.providers]]
2221type = "openai"
2222name = "quality"
2223model = "gpt-5.4"
2224
2225[[llm.providers]]
2226type = "openai"
2227name = "openai-stt"
2228stt_model = "whisper-1"
2229
2230[llm.stt]
2231provider = "quality"
2232"#,
2233 );
2234 assert!(cfg.stt_provider_entry().is_none());
2236 }
2237
2238 #[test]
2239 fn stt_config_deserializes_new_slim_format() {
2240 let cfg = parse_llm(
2241 r#"
2242[llm]
2243
2244[[llm.providers]]
2245type = "openai"
2246name = "quality"
2247stt_model = "whisper-1"
2248
2249[llm.stt]
2250provider = "quality"
2251language = "en"
2252"#,
2253 );
2254 let stt = cfg.stt.as_ref().expect("stt section present");
2255 assert_eq!(stt.provider, "quality");
2256 assert_eq!(stt.language, "en");
2257 }
2258
2259 #[test]
2260 fn stt_config_default_provider_is_empty() {
2261 assert_eq!(default_stt_provider(), "");
2263 }
2264
2265 #[test]
2266 fn validate_stt_missing_provider_ok() {
2267 let cfg = parse_llm("[llm]\n");
2268 assert!(cfg.validate_stt().is_ok());
2269 }
2270
2271 #[test]
2272 fn validate_stt_valid_reference() {
2273 let cfg = parse_llm(
2274 r#"
2275[llm]
2276
2277[[llm.providers]]
2278type = "openai"
2279name = "quality"
2280stt_model = "whisper-1"
2281
2282[llm.stt]
2283provider = "quality"
2284"#,
2285 );
2286 assert!(cfg.validate_stt().is_ok());
2287 }
2288
2289 #[test]
2290 fn validate_stt_nonexistent_provider_errors() {
2291 let cfg = parse_llm(
2292 r#"
2293[llm]
2294
2295[[llm.providers]]
2296type = "openai"
2297name = "quality"
2298model = "gpt-5.4"
2299
2300[llm.stt]
2301provider = "nonexistent"
2302"#,
2303 );
2304 assert!(cfg.validate_stt().is_err());
2305 }
2306
2307 #[test]
2308 fn validate_stt_provider_exists_but_no_stt_model_returns_ok_with_warn() {
2309 let cfg = parse_llm(
2311 r#"
2312[llm]
2313
2314[[llm.providers]]
2315type = "openai"
2316name = "quality"
2317model = "gpt-5.4"
2318
2319[llm.stt]
2320provider = "quality"
2321"#,
2322 );
2323 assert!(cfg.validate_stt().is_ok());
2325 assert!(
2327 cfg.stt_provider_entry().is_none(),
2328 "stt_provider_entry must be None when provider has no stt_model"
2329 );
2330 }
2331
2332 #[test]
2335 fn bandit_warmup_queries_explicit_value_is_deserialized() {
2336 let cfg = parse_llm(
2337 r#"
2338[llm]
2339
2340[llm.router]
2341strategy = "bandit"
2342
2343[llm.router.bandit]
2344warmup_queries = 50
2345"#,
2346 );
2347 let bandit = cfg
2348 .router
2349 .expect("router section must be present")
2350 .bandit
2351 .expect("bandit section must be present");
2352 assert_eq!(
2353 bandit.warmup_queries,
2354 Some(50),
2355 "warmup_queries = 50 must deserialize to Some(50)"
2356 );
2357 }
2358
2359 #[test]
2360 fn bandit_warmup_queries_explicit_null_is_none() {
2361 let cfg = parse_llm(
2364 r#"
2365[llm]
2366
2367[llm.router]
2368strategy = "bandit"
2369
2370[llm.router.bandit]
2371warmup_queries = 0
2372"#,
2373 );
2374 let bandit = cfg
2375 .router
2376 .expect("router section must be present")
2377 .bandit
2378 .expect("bandit section must be present");
2379 assert_eq!(
2381 bandit.warmup_queries,
2382 Some(0),
2383 "warmup_queries = 0 must deserialize to Some(0)"
2384 );
2385 }
2386
2387 #[test]
2388 fn bandit_warmup_queries_missing_field_defaults_to_none() {
2389 let cfg = parse_llm(
2391 r#"
2392[llm]
2393
2394[llm.router]
2395strategy = "bandit"
2396
2397[llm.router.bandit]
2398alpha = 1.5
2399"#,
2400 );
2401 let bandit = cfg
2402 .router
2403 .expect("router section must be present")
2404 .bandit
2405 .expect("bandit section must be present");
2406 assert_eq!(
2407 bandit.warmup_queries, None,
2408 "omitted warmup_queries must default to None"
2409 );
2410 }
2411
2412 #[test]
2413 fn provider_name_new_and_as_str() {
2414 let n = ProviderName::new("fast");
2415 assert_eq!(n.as_str(), "fast");
2416 assert!(!n.is_empty());
2417 }
2418
2419 #[test]
2420 fn provider_name_default_is_empty() {
2421 let n = ProviderName::default();
2422 assert!(n.is_empty());
2423 assert_eq!(n.as_str(), "");
2424 }
2425
2426 #[test]
2427 fn provider_name_partial_eq_str() {
2428 let n = ProviderName::new("fast");
2429 assert_eq!(n, "fast");
2430 assert_ne!(n, "slow");
2431 }
2432
2433 #[test]
2434 fn provider_name_serde_roundtrip() {
2435 let n = ProviderName::new("my-provider");
2436 let json = serde_json::to_string(&n).expect("serialize");
2437 assert_eq!(json, "\"my-provider\"");
2438 let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2439 assert_eq!(back, n);
2440 }
2441
2442 #[test]
2443 fn provider_name_serde_empty_roundtrip() {
2444 let n = ProviderName::default();
2445 let json = serde_json::to_string(&n).expect("serialize");
2446 assert_eq!(json, "\"\"");
2447 let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2448 assert_eq!(back, n);
2449 assert!(back.is_empty());
2450 }
2451
2452 fn gonka_entry_with_nodes(nodes: Vec<GonkaNode>) -> ProviderEntry {
2455 ProviderEntry {
2456 provider_type: ProviderKind::Gonka,
2457 name: Some("my-gonka".into()),
2458 gonka_nodes: nodes,
2459 ..Default::default()
2460 }
2461 }
2462
2463 fn valid_gonka_nodes() -> Vec<GonkaNode> {
2464 vec![
2465 GonkaNode {
2466 url: "https://node1.gonka.ai".into(),
2467 address: "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6".into(),
2468 name: Some("node1".into()),
2469 },
2470 GonkaNode {
2471 url: "https://node2.gonka.ai".into(),
2472 address: "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum".into(),
2473 name: Some("node2".into()),
2474 },
2475 GonkaNode {
2476 url: "http://node3.internal".into(),
2477 address: "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg".into(),
2478 name: None,
2479 },
2480 ]
2481 }
2482
2483 #[test]
2484 fn validate_gonka_valid() {
2485 let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2486 assert!(entry.validate().is_ok());
2487 }
2488
2489 #[test]
2490 fn validate_gonka_empty_nodes_errors() {
2491 let entry = gonka_entry_with_nodes(vec![]);
2492 let err = entry.validate().unwrap_err();
2493 assert!(
2494 err.to_string().contains("gonka_nodes"),
2495 "error should mention gonka_nodes: {err}"
2496 );
2497 }
2498
2499 #[test]
2500 fn validate_gonka_node_empty_url_errors() {
2501 let entry = gonka_entry_with_nodes(vec![GonkaNode {
2502 url: String::new(),
2503 address: "gonka1test".into(),
2504 name: None,
2505 }]);
2506 let err = entry.validate().unwrap_err();
2507 assert!(err.to_string().contains("url"), "{err}");
2508 }
2509
2510 #[test]
2511 fn validate_gonka_node_invalid_scheme_errors() {
2512 let entry = gonka_entry_with_nodes(vec![GonkaNode {
2513 url: "ftp://node.gonka.ai".into(),
2514 address: "gonka1test".into(),
2515 name: None,
2516 }]);
2517 let err = entry.validate().unwrap_err();
2518 assert!(err.to_string().contains("http"), "{err}");
2519 }
2520
2521 #[test]
2522 fn validate_gonka_without_name_errors() {
2523 let entry = ProviderEntry {
2524 provider_type: ProviderKind::Gonka,
2525 name: None,
2526 gonka_nodes: valid_gonka_nodes(),
2527 ..Default::default()
2528 };
2529 let err = entry.validate().unwrap_err();
2530 assert!(err.to_string().contains("gonka"), "{err}");
2531 }
2532
2533 #[test]
2534 fn gonka_toml_round_trip() {
2535 let toml = r#"
2536[llm]
2537
2538[[llm.providers]]
2539type = "gonka"
2540name = "my-gonka"
2541gonka_chain_prefix = "custom-chain"
2542
2543[[llm.providers.gonka_nodes]]
2544url = "https://node1.gonka.ai"
2545address = "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2546name = "node1"
2547
2548[[llm.providers.gonka_nodes]]
2549url = "https://node2.gonka.ai"
2550address = "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum"
2551name = "node2"
2552
2553[[llm.providers.gonka_nodes]]
2554url = "https://node3.gonka.ai"
2555address = "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg"
2556"#;
2557 let cfg = parse_llm(toml);
2558 assert_eq!(cfg.providers.len(), 1);
2559 let entry = &cfg.providers[0];
2560 assert_eq!(entry.provider_type, ProviderKind::Gonka);
2561 assert_eq!(entry.name.as_deref(), Some("my-gonka"));
2562 let nodes = &entry.gonka_nodes;
2563 assert_eq!(nodes.len(), 3);
2564 assert_eq!(nodes[0].url, "https://node1.gonka.ai");
2565 assert_eq!(
2566 nodes[0].address,
2567 "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2568 );
2569 assert_eq!(nodes[0].name.as_deref(), Some("node1"));
2570 assert_eq!(nodes[2].name, None);
2571 assert_eq!(entry.gonka_chain_prefix.as_deref(), Some("custom-chain"));
2572 }
2573
2574 #[test]
2575 fn gonka_default_chain_prefix() {
2576 let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2577 assert_eq!(entry.effective_gonka_chain_prefix(), "gonka");
2578 }
2579
2580 #[test]
2581 fn gonka_explicit_chain_prefix() {
2582 let entry = ProviderEntry {
2583 provider_type: ProviderKind::Gonka,
2584 name: Some("my-gonka".into()),
2585 gonka_nodes: valid_gonka_nodes(),
2586 gonka_chain_prefix: Some("my-chain".into()),
2587 ..Default::default()
2588 };
2589 assert_eq!(entry.effective_gonka_chain_prefix(), "my-chain");
2590 }
2591
2592 #[test]
2593 fn effective_model_gonka_is_empty() {
2594 let entry = ProviderEntry {
2595 provider_type: ProviderKind::Gonka,
2596 model: None,
2597 ..Default::default()
2598 };
2599 assert_eq!(entry.effective_model(), "");
2600 }
2601
2602 #[test]
2603 fn existing_configs_still_parse() {
2604 let toml = r#"
2605[llm]
2606
2607[[llm.providers]]
2608type = "ollama"
2609model = "qwen3:8b"
2610
2611[[llm.providers]]
2612type = "claude"
2613name = "claude"
2614model = "claude-sonnet-4-6"
2615"#;
2616 let cfg = parse_llm(toml);
2617 assert_eq!(cfg.providers.len(), 2);
2618 assert_eq!(cfg.providers[0].provider_type, ProviderKind::Ollama);
2619 assert_eq!(cfg.providers[1].provider_type, ProviderKind::Claude);
2620 }
2621
2622 fn cocoon_entry(url: Option<&str>, model: Option<&str>) -> ProviderEntry {
2625 ProviderEntry {
2626 provider_type: ProviderKind::Cocoon,
2627 name: Some("cocoon".into()),
2628 cocoon_client_url: url.map(str::to_owned),
2629 model: model.map(str::to_owned),
2630 ..Default::default()
2631 }
2632 }
2633
2634 #[test]
2635 fn test_cocoon_url_validation_accepts_http() {
2636 assert!(
2637 cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2638 .validate()
2639 .is_ok()
2640 );
2641 }
2642
2643 #[test]
2644 fn test_cocoon_url_validation_accepts_https_localhost() {
2645 assert!(
2646 cocoon_entry(Some("https://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2647 .validate()
2648 .is_ok()
2649 );
2650 }
2651
2652 #[test]
2653 fn test_cocoon_url_validation_rejects_non_localhost() {
2654 let err = cocoon_entry(Some("http://192.168.1.10:10000"), Some("Qwen/Qwen3-0.6B"))
2655 .validate()
2656 .unwrap_err();
2657 assert!(
2658 err.to_string().contains("localhost"),
2659 "error should mention localhost restriction: {err}"
2660 );
2661 }
2662
2663 #[test]
2664 fn test_cocoon_url_validation_rejects_non_http_scheme() {
2665 let err = cocoon_entry(Some("ftp://localhost"), Some("Qwen/Qwen3-0.6B"))
2666 .validate()
2667 .unwrap_err();
2668 assert!(
2669 err.to_string().contains("ftp"),
2670 "error should mention the bad scheme: {err}"
2671 );
2672 }
2673
2674 #[test]
2675 fn test_cocoon_url_validation_rejects_invalid_url() {
2676 let err = cocoon_entry(Some("not-a-url"), Some("Qwen/Qwen3-0.6B"))
2677 .validate()
2678 .unwrap_err();
2679 assert!(
2680 err.to_string().contains("not-a-url"),
2681 "error should mention the bad value: {err}"
2682 );
2683 }
2684
2685 #[test]
2686 fn test_cocoon_url_none_passes() {
2687 assert!(
2688 cocoon_entry(None, Some("Qwen/Qwen3-0.6B"))
2689 .validate()
2690 .is_ok()
2691 );
2692 }
2693
2694 #[test]
2695 fn test_cocoon_model_empty_rejected() {
2696 let err = cocoon_entry(Some("http://localhost:10000"), Some(""))
2697 .validate()
2698 .unwrap_err();
2699 assert!(
2700 err.to_string().contains("empty"),
2701 "error should mention 'empty': {err}"
2702 );
2703 }
2704
2705 #[test]
2706 fn test_cocoon_model_none_passes() {
2707 assert!(
2708 cocoon_entry(Some("http://localhost:10000"), None)
2709 .validate()
2710 .is_ok()
2711 );
2712 }
2713
2714 #[test]
2715 fn validate_cocoon_pricing_negative_prompt_errors() {
2716 let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2717 e.cocoon_pricing = Some(CocoonPricing {
2718 prompt_cents_per_1k: -1.0,
2719 completion_cents_per_1k: 0.03,
2720 });
2721 assert!(e.validate().is_err());
2722 }
2723
2724 #[test]
2725 fn validate_cocoon_pricing_negative_completion_errors() {
2726 let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2727 e.cocoon_pricing = Some(CocoonPricing {
2728 prompt_cents_per_1k: 0.01,
2729 completion_cents_per_1k: -0.5,
2730 });
2731 assert!(e.validate().is_err());
2732 }
2733
2734 #[test]
2735 fn validate_cocoon_pricing_valid_passes() {
2736 let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2737 e.cocoon_pricing = Some(CocoonPricing {
2738 prompt_cents_per_1k: 0.01,
2739 completion_cents_per_1k: 0.03,
2740 });
2741 assert!(e.validate().is_ok());
2742 }
2743}