1use std::fmt;
5
6use serde::{Deserialize, Serialize};
7
8#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
15#[serde(tag = "mode", rename_all = "snake_case")]
16pub enum ThinkingConfig {
17 Extended {
19 budget_tokens: u32,
21 },
22 Adaptive {
24 #[serde(default, skip_serializing_if = "Option::is_none")]
26 effort: Option<ThinkingEffort>,
27 },
28}
29
30#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
32#[serde(rename_all = "lowercase")]
33pub enum ThinkingEffort {
34 Low,
36 #[default]
38 Medium,
39 High,
41}
42
43#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Default)]
49#[serde(rename_all = "snake_case")]
50pub enum CacheTtl {
51 #[default]
53 Ephemeral,
54 #[serde(rename = "1h")]
57 OneHour,
58}
59
60impl CacheTtl {
61 #[must_use]
64 pub fn requires_beta(self) -> bool {
65 match self {
66 Self::OneHour => true,
67 Self::Ephemeral => false,
68 }
69 }
70}
71
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
77#[serde(rename_all = "lowercase")]
78pub enum GeminiThinkingLevel {
79 Minimal,
81 Low,
83 Medium,
85 High,
87}
88
89#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
102#[serde(transparent)]
103pub struct ProviderName(String);
104
105impl ProviderName {
106 #[must_use]
120 pub fn new(name: impl Into<String>) -> Self {
121 Self(name.into())
122 }
123
124 #[must_use]
135 pub fn is_empty(&self) -> bool {
136 self.0.is_empty()
137 }
138
139 #[must_use]
150 pub fn as_str(&self) -> &str {
151 &self.0
152 }
153
154 #[must_use]
168 pub fn as_non_empty(&self) -> Option<&str> {
169 if self.0.is_empty() {
170 None
171 } else {
172 Some(&self.0)
173 }
174 }
175}
176
177impl fmt::Display for ProviderName {
178 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179 self.0.fmt(f)
180 }
181}
182
183impl AsRef<str> for ProviderName {
184 fn as_ref(&self) -> &str {
185 &self.0
186 }
187}
188
189impl std::ops::Deref for ProviderName {
190 type Target = str;
191
192 fn deref(&self) -> &str {
193 &self.0
194 }
195}
196
197impl PartialEq<str> for ProviderName {
198 fn eq(&self, other: &str) -> bool {
199 self.0 == other
200 }
201}
202
203impl PartialEq<&str> for ProviderName {
204 fn eq(&self, other: &&str) -> bool {
205 self.0 == *other
206 }
207}
208
209fn default_response_cache_ttl_secs() -> u64 {
210 3600
211}
212
213fn default_semantic_cache_threshold() -> f32 {
214 0.95
215}
216
217fn default_semantic_cache_max_candidates() -> u32 {
218 10
219}
220
221fn default_router_ema_alpha() -> f64 {
222 0.1
223}
224
225fn default_router_reorder_interval() -> u64 {
226 10
227}
228
229fn default_embedding_model() -> String {
230 "qwen3-embedding".into()
231}
232
233fn default_candle_source() -> String {
234 "huggingface".into()
235}
236
237fn default_chat_template() -> String {
238 "chatml".into()
239}
240
241fn default_candle_device() -> String {
242 "cpu".into()
243}
244
245fn default_temperature() -> f64 {
246 0.7
247}
248
249fn default_max_tokens() -> usize {
250 2048
251}
252
253fn default_seed() -> u64 {
254 42
255}
256
257fn default_repeat_penalty() -> f32 {
258 1.1
259}
260
261fn default_repeat_last_n() -> usize {
262 64
263}
264
265fn default_cascade_quality_threshold() -> f64 {
266 0.5
267}
268
269fn default_cascade_max_escalations() -> u8 {
270 2
271}
272
273fn default_cascade_window_size() -> usize {
274 50
275}
276
277fn default_reputation_decay_factor() -> f64 {
278 0.95
279}
280
281fn default_reputation_weight() -> f64 {
282 0.3
283}
284
285fn default_reputation_min_observations() -> u64 {
286 5
287}
288
289#[must_use]
291pub fn default_stt_provider() -> String {
292 String::new()
293}
294
295#[must_use]
297pub fn default_stt_language() -> String {
298 "auto".into()
299}
300
301#[must_use]
303pub fn get_default_embedding_model() -> String {
304 default_embedding_model()
305}
306
307#[must_use]
309pub fn get_default_response_cache_ttl_secs() -> u64 {
310 default_response_cache_ttl_secs()
311}
312
313#[must_use]
315pub fn get_default_router_ema_alpha() -> f64 {
316 default_router_ema_alpha()
317}
318
319#[must_use]
321pub fn get_default_router_reorder_interval() -> u64 {
322 default_router_reorder_interval()
323}
324
325#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
338#[serde(rename_all = "lowercase")]
339pub enum ProviderKind {
340 Ollama,
342 Claude,
344 OpenAi,
346 Gemini,
348 Candle,
350 Compatible,
352 Gonka,
354}
355
356impl ProviderKind {
357 #[must_use]
368 pub fn as_str(self) -> &'static str {
369 match self {
370 Self::Ollama => "ollama",
371 Self::Claude => "claude",
372 Self::OpenAi => "openai",
373 Self::Gemini => "gemini",
374 Self::Candle => "candle",
375 Self::Compatible => "compatible",
376 Self::Gonka => "gonka",
377 }
378 }
379}
380
381impl std::fmt::Display for ProviderKind {
382 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
383 f.write_str(self.as_str())
384 }
385}
386
387#[derive(Debug, Deserialize, Serialize)]
411pub struct LlmConfig {
412 #[serde(default, skip_serializing_if = "Vec::is_empty")]
414 pub providers: Vec<ProviderEntry>,
415
416 #[serde(default, skip_serializing_if = "is_routing_none")]
418 pub routing: LlmRoutingStrategy,
419
420 #[serde(default = "default_embedding_model_opt")]
421 pub embedding_model: String,
422 #[serde(default, skip_serializing_if = "Option::is_none")]
423 pub candle: Option<CandleConfig>,
424 #[serde(default)]
425 pub stt: Option<SttConfig>,
426 #[serde(default)]
427 pub response_cache_enabled: bool,
428 #[serde(default = "default_response_cache_ttl_secs")]
429 pub response_cache_ttl_secs: u64,
430 #[serde(default)]
432 pub semantic_cache_enabled: bool,
433 #[serde(default = "default_semantic_cache_threshold")]
439 pub semantic_cache_threshold: f32,
440 #[serde(default = "default_semantic_cache_max_candidates")]
453 pub semantic_cache_max_candidates: u32,
454 #[serde(default)]
455 pub router_ema_enabled: bool,
456 #[serde(default = "default_router_ema_alpha")]
457 pub router_ema_alpha: f64,
458 #[serde(default = "default_router_reorder_interval")]
459 pub router_reorder_interval: u64,
460 #[serde(default, skip_serializing_if = "Option::is_none")]
462 pub router: Option<RouterConfig>,
463 #[serde(default, skip_serializing_if = "Option::is_none")]
466 pub instruction_file: Option<std::path::PathBuf>,
467 #[serde(default, skip_serializing_if = "Option::is_none")]
471 pub summary_model: Option<String>,
472 #[serde(default, skip_serializing_if = "Option::is_none")]
474 pub summary_provider: Option<ProviderEntry>,
475
476 #[serde(default, skip_serializing_if = "Option::is_none")]
478 pub complexity_routing: Option<ComplexityRoutingConfig>,
479
480 #[serde(default, skip_serializing_if = "Option::is_none")]
482 pub coe: Option<CoeConfig>,
483}
484
485fn default_embedding_model_opt() -> String {
486 default_embedding_model()
487}
488
489#[allow(clippy::trivially_copy_pass_by_ref)]
490fn is_routing_none(s: &LlmRoutingStrategy) -> bool {
491 *s == LlmRoutingStrategy::None
492}
493
494impl LlmConfig {
495 #[must_use]
497 pub fn effective_provider(&self) -> ProviderKind {
498 self.providers
499 .first()
500 .map_or(ProviderKind::Ollama, |e| e.provider_type)
501 }
502
503 #[must_use]
505 pub fn effective_base_url(&self) -> &str {
506 self.providers
507 .first()
508 .and_then(|e| e.base_url.as_deref())
509 .unwrap_or("http://localhost:11434")
510 }
511
512 #[must_use]
518 pub fn effective_model(&self) -> &str {
519 self.providers
520 .iter()
521 .find(|e| !e.embed)
522 .and_then(|e| e.model.as_deref())
523 .unwrap_or("qwen3:8b")
524 }
525
526 #[must_use]
534 pub fn stt_provider_entry(&self) -> Option<&ProviderEntry> {
535 let name_hint = self.stt.as_ref().map_or("", |s| s.provider.as_str());
536 if name_hint.is_empty() {
537 self.providers.iter().find(|p| p.stt_model.is_some())
538 } else {
539 self.providers
540 .iter()
541 .find(|p| p.effective_name() == name_hint && p.stt_model.is_some())
542 }
543 }
544
545 pub fn check_legacy_format(&self) -> Result<(), crate::error::ConfigError> {
551 Ok(())
552 }
553
554 pub fn validate_stt(&self) -> Result<(), crate::error::ConfigError> {
560 use crate::error::ConfigError;
561
562 let Some(stt) = &self.stt else {
563 return Ok(());
564 };
565 if stt.provider.is_empty() {
566 return Ok(());
567 }
568 let found = self
569 .providers
570 .iter()
571 .find(|p| p.effective_name() == stt.provider);
572 match found {
573 None => {
574 return Err(ConfigError::Validation(format!(
575 "[llm.stt].provider = {:?} does not match any [[llm.providers]] entry",
576 stt.provider
577 )));
578 }
579 Some(entry) if entry.stt_model.is_none() => {
580 tracing::warn!(
581 provider = stt.provider,
582 "[[llm.providers]] entry exists but has no `stt_model` — STT will not be activated"
583 );
584 }
585 _ => {}
586 }
587 Ok(())
588 }
589
590 pub fn warn_non_fast_tier_provider(
614 &self,
615 provider_name: &ProviderName,
616 feature_label: &str,
617 extra_allowlist: &[String],
618 ) {
619 if provider_name.is_empty() {
620 return;
621 }
622 let name = provider_name.as_str();
623 let Some(entry) = self.providers.iter().find(|p| p.effective_name() == name) else {
624 tracing::warn!(
625 provider = name,
626 "{feature_label} provider '{name}' not found in [[llm.providers]]"
627 );
628 return;
629 };
630 let model = entry.model.as_deref().unwrap_or("");
631 if model.is_empty() {
632 return;
633 }
634 let lower = model.to_lowercase();
635 let in_hints = FAST_TIER_MODEL_HINTS.iter().any(|h| lower.contains(h));
636 let in_extra = extra_allowlist.iter().any(|h| lower.contains(h.as_str()));
637 if !in_hints && !in_extra {
638 tracing::warn!(
639 provider = name,
640 actual = model,
641 "{feature_label} provider '{name}' uses model '{model}' \
642 which may not be fast-tier; prefer a fast model to bound distillation cost"
643 );
644 }
645 }
646}
647
648pub const FAST_TIER_MODEL_HINTS: &[&str] = &[
653 "gpt-4o-mini",
654 "gpt-4.1-mini",
655 "gpt-5-mini",
656 "gpt-5-nano",
657 "claude-haiku",
658 "claude-3-haiku",
659 "claude-3-5-haiku",
660 "qwen3:8b",
661 "qwen2.5:7b",
662 "qwen2:7b",
663 "llama3.2:3b",
664 "llama3.1:8b",
665 "gemma3:4b",
666 "gemma3:8b",
667 "phi4:mini",
668 "mistral:7b",
669];
670
671#[derive(Debug, Clone, Deserialize, Serialize)]
684pub struct SttConfig {
685 #[serde(default = "default_stt_provider")]
688 pub provider: String,
689 #[serde(default = "default_stt_language")]
691 pub language: String,
692}
693
694#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
696#[serde(rename_all = "lowercase")]
697pub enum RouterStrategyConfig {
698 #[default]
700 Ema,
701 Thompson,
703 Cascade,
705 Bandit,
707}
708
709#[derive(Debug, Clone, Deserialize, Serialize)]
722pub struct AsiConfig {
723 #[serde(default)]
725 pub enabled: bool,
726
727 #[serde(default = "default_asi_window")]
729 pub window: usize,
730
731 #[serde(default = "default_asi_coherence_threshold")]
733 pub coherence_threshold: f32,
734
735 #[serde(default = "default_asi_penalty_weight")]
740 pub penalty_weight: f32,
741}
742
743fn default_asi_window() -> usize {
744 5
745}
746
747fn default_asi_coherence_threshold() -> f32 {
748 0.7
749}
750
751fn default_asi_penalty_weight() -> f32 {
752 0.3
753}
754
755impl Default for AsiConfig {
756 fn default() -> Self {
757 Self {
758 enabled: false,
759 window: default_asi_window(),
760 coherence_threshold: default_asi_coherence_threshold(),
761 penalty_weight: default_asi_penalty_weight(),
762 }
763 }
764}
765
766#[derive(Debug, Clone, Deserialize, Serialize)]
768pub struct RouterConfig {
769 #[serde(default)]
771 pub strategy: RouterStrategyConfig,
772 #[serde(default)]
780 pub thompson_state_path: Option<String>,
781 #[serde(default)]
783 pub cascade: Option<CascadeConfig>,
784 #[serde(default)]
786 pub reputation: Option<ReputationConfig>,
787 #[serde(default)]
789 pub bandit: Option<BanditConfig>,
790 #[serde(default)]
799 pub quality_gate: Option<f32>,
800 #[serde(default)]
802 pub asi: Option<AsiConfig>,
803 #[serde(default = "default_embed_concurrency")]
809 pub embed_concurrency: usize,
810}
811
812fn default_embed_concurrency() -> usize {
813 4
814}
815
816#[derive(Debug, Clone, Deserialize, Serialize)]
823pub struct ReputationConfig {
824 #[serde(default)]
826 pub enabled: bool,
827 #[serde(default = "default_reputation_decay_factor")]
830 pub decay_factor: f64,
831 #[serde(default = "default_reputation_weight")]
838 pub weight: f64,
839 #[serde(default = "default_reputation_min_observations")]
841 pub min_observations: u64,
842 #[serde(default)]
844 pub state_path: Option<String>,
845}
846
847#[derive(Debug, Clone, Deserialize, Serialize)]
858pub struct CascadeConfig {
859 #[serde(default = "default_cascade_quality_threshold")]
862 pub quality_threshold: f64,
863
864 #[serde(default = "default_cascade_max_escalations")]
868 pub max_escalations: u8,
869
870 #[serde(default)]
874 pub classifier_mode: CascadeClassifierMode,
875
876 #[serde(default = "default_cascade_window_size")]
878 pub window_size: usize,
879
880 #[serde(default)]
884 pub max_cascade_tokens: Option<u32>,
885
886 #[serde(default, skip_serializing_if = "Option::is_none")]
891 pub cost_tiers: Option<Vec<String>>,
892}
893
894impl Default for CascadeConfig {
895 fn default() -> Self {
896 Self {
897 quality_threshold: default_cascade_quality_threshold(),
898 max_escalations: default_cascade_max_escalations(),
899 classifier_mode: CascadeClassifierMode::default(),
900 window_size: default_cascade_window_size(),
901 max_cascade_tokens: None,
902 cost_tiers: None,
903 }
904 }
905}
906
907#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
909#[serde(rename_all = "lowercase")]
910pub enum CascadeClassifierMode {
911 #[default]
914 Heuristic,
915 Judge,
918}
919
920fn default_bandit_alpha() -> f32 {
921 1.0
922}
923
924fn default_bandit_dim() -> usize {
925 32
926}
927
928fn default_bandit_cost_weight() -> f32 {
929 0.1
930}
931
932fn default_bandit_decay_factor() -> f32 {
933 1.0
934}
935
936fn default_bandit_embedding_timeout_ms() -> u64 {
937 50
938}
939
940fn default_bandit_cache_size() -> usize {
941 512
942}
943
944#[derive(Debug, Clone, Deserialize, Serialize)]
957pub struct BanditConfig {
958 #[serde(default = "default_bandit_alpha")]
961 pub alpha: f32,
962
963 #[serde(default = "default_bandit_dim")]
970 pub dim: usize,
971
972 #[serde(default = "default_bandit_cost_weight")]
975 pub cost_weight: f32,
976
977 #[serde(default = "default_bandit_decay_factor")]
980 pub decay_factor: f32,
981
982 #[serde(default)]
988 pub embedding_provider: ProviderName,
989
990 #[serde(default = "default_bandit_embedding_timeout_ms")]
993 pub embedding_timeout_ms: u64,
994
995 #[serde(default = "default_bandit_cache_size")]
997 pub cache_size: usize,
998
999 #[serde(default)]
1006 pub state_path: Option<String>,
1007
1008 #[serde(default = "default_bandit_memory_confidence_threshold")]
1014 pub memory_confidence_threshold: f32,
1015
1016 #[serde(default)]
1022 pub warmup_queries: Option<u64>,
1023}
1024
1025fn default_bandit_memory_confidence_threshold() -> f32 {
1026 0.9
1027}
1028
1029impl Default for BanditConfig {
1030 fn default() -> Self {
1031 Self {
1032 alpha: default_bandit_alpha(),
1033 dim: default_bandit_dim(),
1034 cost_weight: default_bandit_cost_weight(),
1035 decay_factor: default_bandit_decay_factor(),
1036 embedding_provider: ProviderName::default(),
1037 embedding_timeout_ms: default_bandit_embedding_timeout_ms(),
1038 cache_size: default_bandit_cache_size(),
1039 state_path: None,
1040 memory_confidence_threshold: default_bandit_memory_confidence_threshold(),
1041 warmup_queries: None,
1042 }
1043 }
1044}
1045
1046#[derive(Debug, Deserialize, Serialize)]
1047pub struct CandleConfig {
1048 #[serde(default = "default_candle_source")]
1049 pub source: String,
1050 #[serde(default)]
1051 pub local_path: String,
1052 #[serde(default)]
1053 pub filename: Option<String>,
1054 #[serde(default = "default_chat_template")]
1055 pub chat_template: String,
1056 #[serde(default = "default_candle_device")]
1057 pub device: String,
1058 #[serde(default)]
1059 pub embedding_repo: Option<String>,
1060 #[serde(default)]
1064 pub hf_token: Option<String>,
1065 #[serde(default)]
1066 pub generation: GenerationParams,
1067 #[serde(default = "default_inference_timeout_secs")]
1076 pub inference_timeout_secs: u64,
1077}
1078
1079fn default_inference_timeout_secs() -> u64 {
1080 120
1081}
1082
1083#[derive(Debug, Clone, Deserialize, Serialize)]
1087pub struct GenerationParams {
1088 #[serde(default = "default_temperature")]
1090 pub temperature: f64,
1091 #[serde(default)]
1094 pub top_p: Option<f64>,
1095 #[serde(default)]
1098 pub top_k: Option<usize>,
1099 #[serde(default = "default_max_tokens")]
1102 pub max_tokens: usize,
1103 #[serde(default = "default_seed")]
1105 pub seed: u64,
1106 #[serde(default = "default_repeat_penalty")]
1108 pub repeat_penalty: f32,
1109 #[serde(default = "default_repeat_last_n")]
1111 pub repeat_last_n: usize,
1112}
1113
1114pub const MAX_TOKENS_CAP: usize = 32768;
1116
1117impl GenerationParams {
1118 #[must_use]
1129 pub fn capped_max_tokens(&self) -> usize {
1130 self.max_tokens.min(MAX_TOKENS_CAP)
1131 }
1132}
1133
1134impl Default for GenerationParams {
1135 fn default() -> Self {
1136 Self {
1137 temperature: default_temperature(),
1138 top_p: None,
1139 top_k: None,
1140 max_tokens: default_max_tokens(),
1141 seed: default_seed(),
1142 repeat_penalty: default_repeat_penalty(),
1143 repeat_last_n: default_repeat_last_n(),
1144 }
1145 }
1146}
1147
1148#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
1152#[serde(rename_all = "lowercase")]
1153pub enum LlmRoutingStrategy {
1154 #[default]
1156 None,
1157 Ema,
1159 Thompson,
1161 Cascade,
1163 Triage,
1165 Bandit,
1167}
1168
1169fn default_triage_timeout_secs() -> u64 {
1170 5
1171}
1172
1173fn default_max_triage_tokens() -> u32 {
1174 50
1175}
1176
1177fn default_true() -> bool {
1178 true
1179}
1180
1181#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1183pub struct TierMapping {
1184 pub simple: Option<String>,
1185 pub medium: Option<String>,
1186 pub complex: Option<String>,
1187 pub expert: Option<String>,
1188}
1189
1190#[derive(Debug, Clone, Deserialize, Serialize)]
1211pub struct ComplexityRoutingConfig {
1212 #[serde(default)]
1214 pub triage_provider: Option<ProviderName>,
1215
1216 #[serde(default = "default_true")]
1218 pub bypass_single_provider: bool,
1219
1220 #[serde(default)]
1222 pub tiers: TierMapping,
1223
1224 #[serde(default = "default_max_triage_tokens")]
1226 pub max_triage_tokens: u32,
1227
1228 #[serde(default = "default_triage_timeout_secs")]
1231 pub triage_timeout_secs: u64,
1232
1233 #[serde(default)]
1236 pub fallback_strategy: Option<String>,
1237}
1238
1239impl Default for ComplexityRoutingConfig {
1240 fn default() -> Self {
1241 Self {
1242 triage_provider: None,
1243 bypass_single_provider: true,
1244 tiers: TierMapping::default(),
1245 max_triage_tokens: default_max_triage_tokens(),
1246 triage_timeout_secs: default_triage_timeout_secs(),
1247 fallback_strategy: None,
1248 }
1249 }
1250}
1251
1252#[derive(Debug, Clone, Deserialize, Serialize)]
1270#[serde(default)]
1271pub struct CoeConfig {
1272 pub enabled: bool,
1274 pub intra_threshold: f64,
1276 pub inter_threshold: f64,
1278 pub shadow_sample_rate: f64,
1280 pub secondary_provider: ProviderName,
1282 pub embed_provider: ProviderName,
1284}
1285
1286impl Default for CoeConfig {
1287 fn default() -> Self {
1288 Self {
1289 enabled: false,
1290 intra_threshold: 0.8,
1291 inter_threshold: 0.20,
1292 shadow_sample_rate: 0.1,
1293 secondary_provider: ProviderName::default(),
1294 embed_provider: ProviderName::default(),
1295 }
1296 }
1297}
1298
1299#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
1304pub struct GonkaNode {
1305 pub url: String,
1307 pub address: String,
1312 #[serde(default, skip_serializing_if = "Option::is_none")]
1314 pub name: Option<String>,
1315}
1316
1317#[derive(Debug, Clone, Deserialize, Serialize)]
1320pub struct CandleInlineConfig {
1321 #[serde(default = "default_candle_source")]
1322 pub source: String,
1323 #[serde(default)]
1324 pub local_path: String,
1325 #[serde(default)]
1326 pub filename: Option<String>,
1327 #[serde(default = "default_chat_template")]
1328 pub chat_template: String,
1329 #[serde(default = "default_candle_device")]
1330 pub device: String,
1331 #[serde(default)]
1332 pub embedding_repo: Option<String>,
1333 #[serde(default)]
1335 pub hf_token: Option<String>,
1336 #[serde(default)]
1337 pub generation: GenerationParams,
1338 #[serde(default = "default_inference_timeout_secs")]
1343 pub inference_timeout_secs: u64,
1344}
1345
1346impl Default for CandleInlineConfig {
1347 fn default() -> Self {
1348 Self {
1349 source: default_candle_source(),
1350 local_path: String::new(),
1351 filename: None,
1352 chat_template: default_chat_template(),
1353 device: default_candle_device(),
1354 embedding_repo: None,
1355 hf_token: None,
1356 generation: GenerationParams::default(),
1357 inference_timeout_secs: default_inference_timeout_secs(),
1358 }
1359 }
1360}
1361
1362#[derive(Debug, Clone, Deserialize, Serialize)]
1368#[allow(clippy::struct_excessive_bools)] pub struct ProviderEntry {
1370 #[serde(rename = "type")]
1372 pub provider_type: ProviderKind,
1373
1374 #[serde(default)]
1376 pub name: Option<String>,
1377
1378 #[serde(default)]
1380 pub model: Option<String>,
1381
1382 #[serde(default)]
1384 pub base_url: Option<String>,
1385
1386 #[serde(default)]
1388 pub max_tokens: Option<u32>,
1389
1390 #[serde(default)]
1392 pub embedding_model: Option<String>,
1393
1394 #[serde(default)]
1397 pub stt_model: Option<String>,
1398
1399 #[serde(default)]
1401 pub embed: bool,
1402
1403 #[serde(default)]
1405 pub default: bool,
1406
1407 #[serde(default)]
1409 pub thinking: Option<ThinkingConfig>,
1410 #[serde(default)]
1411 pub server_compaction: bool,
1412 #[serde(default)]
1413 pub enable_extended_context: bool,
1414 #[serde(default)]
1417 pub prompt_cache_ttl: Option<CacheTtl>,
1418
1419 #[serde(default)]
1421 pub reasoning_effort: Option<String>,
1422
1423 #[serde(default)]
1425 pub thinking_level: Option<GeminiThinkingLevel>,
1426 #[serde(default)]
1427 pub thinking_budget: Option<i32>,
1428 #[serde(default)]
1429 pub include_thoughts: Option<bool>,
1430
1431 #[serde(default)]
1433 pub api_key: Option<String>,
1434
1435 #[serde(default)]
1437 pub candle: Option<CandleInlineConfig>,
1438
1439 #[serde(default)]
1441 pub vision_model: Option<String>,
1442
1443 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1446 pub gonka_nodes: Vec<GonkaNode>,
1447 #[serde(default, skip_serializing_if = "Option::is_none")]
1449 pub gonka_chain_prefix: Option<String>,
1450
1451 #[serde(default)]
1453 pub instruction_file: Option<std::path::PathBuf>,
1454
1455 #[serde(default, skip_serializing_if = "Option::is_none")]
1473 pub max_concurrent: Option<u32>,
1474}
1475
1476impl Default for ProviderEntry {
1477 fn default() -> Self {
1478 Self {
1479 provider_type: ProviderKind::Ollama,
1480 name: None,
1481 model: None,
1482 base_url: None,
1483 max_tokens: None,
1484 embedding_model: None,
1485 stt_model: None,
1486 embed: false,
1487 default: false,
1488 thinking: None,
1489 server_compaction: false,
1490 enable_extended_context: false,
1491 prompt_cache_ttl: None,
1492 reasoning_effort: None,
1493 thinking_level: None,
1494 thinking_budget: None,
1495 include_thoughts: None,
1496 api_key: None,
1497 candle: None,
1498 vision_model: None,
1499 gonka_nodes: Vec::new(),
1500 gonka_chain_prefix: None,
1501 instruction_file: None,
1502 max_concurrent: None,
1503 }
1504 }
1505}
1506
1507impl ProviderEntry {
1508 #[must_use]
1510 pub fn effective_name(&self) -> String {
1511 self.name
1512 .clone()
1513 .unwrap_or_else(|| self.provider_type.as_str().to_owned())
1514 }
1515
1516 #[must_use]
1521 pub fn effective_model(&self) -> String {
1522 if let Some(ref m) = self.model {
1523 return m.clone();
1524 }
1525 match self.provider_type {
1526 ProviderKind::Ollama => "qwen3:8b".to_owned(),
1527 ProviderKind::Claude => "claude-haiku-4-5-20251001".to_owned(),
1528 ProviderKind::OpenAi => "gpt-4o-mini".to_owned(),
1529 ProviderKind::Gemini => "gemini-2.0-flash".to_owned(),
1530 ProviderKind::Compatible | ProviderKind::Candle | ProviderKind::Gonka => String::new(),
1533 }
1534 }
1535
1536 pub fn validate(&self) -> Result<(), crate::error::ConfigError> {
1543 use crate::error::ConfigError;
1544
1545 if self.provider_type == ProviderKind::Compatible && self.name.is_none() {
1547 return Err(ConfigError::Validation(
1548 "[[llm.providers]] entry with type=\"compatible\" must set `name`".into(),
1549 ));
1550 }
1551
1552 if self.provider_type == ProviderKind::Gonka {
1554 if self.name.is_none() {
1555 return Err(ConfigError::Validation(
1556 "[[llm.providers]] entry with type=\"gonka\" must set `name`".into(),
1557 ));
1558 }
1559 self.validate_gonka_nodes()?;
1560 }
1561
1562 self.warn_irrelevant_fields();
1564
1565 if self.stt_model.is_some() && self.provider_type == ProviderKind::Ollama {
1568 tracing::warn!(
1569 provider = self.effective_name(),
1570 "field `stt_model` is set on an Ollama provider; Ollama does not support the \
1571 Whisper STT API — use OpenAI, compatible, or candle instead"
1572 );
1573 }
1574
1575 Ok(())
1576 }
1577
1578 #[must_use]
1580 pub fn effective_gonka_chain_prefix(&self) -> &str {
1581 self.gonka_chain_prefix.as_deref().unwrap_or("gonka")
1582 }
1583
1584 fn warn_irrelevant_fields(&self) {
1585 let name = self.effective_name();
1586 match self.provider_type {
1587 ProviderKind::Ollama => {
1588 if self.thinking.is_some() {
1589 tracing::warn!(
1590 provider = name,
1591 "field `thinking` is only used by Claude providers"
1592 );
1593 }
1594 if self.reasoning_effort.is_some() {
1595 tracing::warn!(
1596 provider = name,
1597 "field `reasoning_effort` is only used by OpenAI providers"
1598 );
1599 }
1600 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1601 tracing::warn!(
1602 provider = name,
1603 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1604 );
1605 }
1606 }
1607 ProviderKind::Claude => {
1608 if self.reasoning_effort.is_some() {
1609 tracing::warn!(
1610 provider = name,
1611 "field `reasoning_effort` is only used by OpenAI providers"
1612 );
1613 }
1614 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1615 tracing::warn!(
1616 provider = name,
1617 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1618 );
1619 }
1620 }
1621 ProviderKind::OpenAi => {
1622 if self.thinking.is_some() {
1623 tracing::warn!(
1624 provider = name,
1625 "field `thinking` is only used by Claude providers"
1626 );
1627 }
1628 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1629 tracing::warn!(
1630 provider = name,
1631 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1632 );
1633 }
1634 }
1635 ProviderKind::Gemini => {
1636 if self.thinking.is_some() {
1637 tracing::warn!(
1638 provider = name,
1639 "field `thinking` is only used by Claude providers"
1640 );
1641 }
1642 if self.reasoning_effort.is_some() {
1643 tracing::warn!(
1644 provider = name,
1645 "field `reasoning_effort` is only used by OpenAI providers"
1646 );
1647 }
1648 }
1649 ProviderKind::Gonka => {
1650 if self.thinking.is_some() {
1651 tracing::warn!(
1652 provider = name,
1653 "field `thinking` is only used by Claude providers"
1654 );
1655 }
1656 if self.reasoning_effort.is_some() {
1657 tracing::warn!(
1658 provider = name,
1659 "field `reasoning_effort` is only used by OpenAI providers"
1660 );
1661 }
1662 if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1663 tracing::warn!(
1664 provider = name,
1665 "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1666 );
1667 }
1668 }
1669 ProviderKind::Compatible | ProviderKind::Candle => {}
1670 }
1671 }
1672
1673 fn validate_gonka_nodes(&self) -> Result<(), crate::error::ConfigError> {
1674 use crate::error::ConfigError;
1675 if self.gonka_nodes.is_empty() {
1676 return Err(ConfigError::Validation(format!(
1677 "[[llm.providers]] entry '{}' with type=\"gonka\" must set non-empty `gonka_nodes`",
1678 self.effective_name()
1679 )));
1680 }
1681 for (i, node) in self.gonka_nodes.iter().enumerate() {
1682 if node.url.is_empty() {
1683 return Err(ConfigError::Validation(format!(
1684 "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must not be empty",
1685 self.effective_name()
1686 )));
1687 }
1688 if !node.url.starts_with("http://") && !node.url.starts_with("https://") {
1689 return Err(ConfigError::Validation(format!(
1690 "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must start with http:// or https://",
1691 self.effective_name()
1692 )));
1693 }
1694 }
1695 Ok(())
1696 }
1697}
1698
1699pub fn validate_pool(entries: &[ProviderEntry]) -> Result<(), crate::error::ConfigError> {
1709 use crate::error::ConfigError;
1710 use std::collections::HashSet;
1711
1712 if entries.is_empty() {
1713 return Err(ConfigError::Validation(
1714 "at least one LLM provider must be configured in [[llm.providers]]".into(),
1715 ));
1716 }
1717
1718 let default_count = entries.iter().filter(|e| e.default).count();
1719 if default_count > 1 {
1720 return Err(ConfigError::Validation(
1721 "only one [[llm.providers]] entry can be marked `default = true`".into(),
1722 ));
1723 }
1724
1725 let mut seen_names: HashSet<String> = HashSet::new();
1726 for entry in entries {
1727 let name = entry.effective_name();
1728 if !seen_names.insert(name.clone()) {
1729 return Err(ConfigError::Validation(format!(
1730 "duplicate provider name \"{name}\" in [[llm.providers]]"
1731 )));
1732 }
1733 entry.validate()?;
1734 }
1735
1736 Ok(())
1737}
1738
1739#[cfg(test)]
1740mod tests {
1741 use super::*;
1742
1743 fn ollama_entry() -> ProviderEntry {
1744 ProviderEntry {
1745 provider_type: ProviderKind::Ollama,
1746 name: Some("ollama".into()),
1747 model: Some("qwen3:8b".into()),
1748 ..Default::default()
1749 }
1750 }
1751
1752 fn claude_entry() -> ProviderEntry {
1753 ProviderEntry {
1754 provider_type: ProviderKind::Claude,
1755 name: Some("claude".into()),
1756 model: Some("claude-sonnet-4-6".into()),
1757 max_tokens: Some(8192),
1758 ..Default::default()
1759 }
1760 }
1761
1762 #[test]
1765 fn validate_ollama_valid() {
1766 assert!(ollama_entry().validate().is_ok());
1767 }
1768
1769 #[test]
1770 fn validate_claude_valid() {
1771 assert!(claude_entry().validate().is_ok());
1772 }
1773
1774 #[test]
1775 fn validate_compatible_without_name_errors() {
1776 let entry = ProviderEntry {
1777 provider_type: ProviderKind::Compatible,
1778 name: None,
1779 ..Default::default()
1780 };
1781 let err = entry.validate().unwrap_err();
1782 assert!(
1783 err.to_string().contains("compatible"),
1784 "error should mention compatible: {err}"
1785 );
1786 }
1787
1788 #[test]
1789 fn validate_compatible_with_name_ok() {
1790 let entry = ProviderEntry {
1791 provider_type: ProviderKind::Compatible,
1792 name: Some("my-proxy".into()),
1793 base_url: Some("http://localhost:8080".into()),
1794 model: Some("gpt-4o".into()),
1795 max_tokens: Some(4096),
1796 ..Default::default()
1797 };
1798 assert!(entry.validate().is_ok());
1799 }
1800
1801 #[test]
1802 fn validate_openai_valid() {
1803 let entry = ProviderEntry {
1804 provider_type: ProviderKind::OpenAi,
1805 name: Some("openai".into()),
1806 model: Some("gpt-4o".into()),
1807 max_tokens: Some(4096),
1808 ..Default::default()
1809 };
1810 assert!(entry.validate().is_ok());
1811 }
1812
1813 #[test]
1814 fn validate_gemini_valid() {
1815 let entry = ProviderEntry {
1816 provider_type: ProviderKind::Gemini,
1817 name: Some("gemini".into()),
1818 model: Some("gemini-2.0-flash".into()),
1819 ..Default::default()
1820 };
1821 assert!(entry.validate().is_ok());
1822 }
1823
1824 #[test]
1827 fn validate_pool_empty_errors() {
1828 let err = validate_pool(&[]).unwrap_err();
1829 assert!(err.to_string().contains("at least one"), "{err}");
1830 }
1831
1832 #[test]
1833 fn validate_pool_single_entry_ok() {
1834 assert!(validate_pool(&[ollama_entry()]).is_ok());
1835 }
1836
1837 #[test]
1838 fn validate_pool_duplicate_names_errors() {
1839 let a = ollama_entry();
1840 let b = ollama_entry(); let err = validate_pool(&[a, b]).unwrap_err();
1842 assert!(err.to_string().contains("duplicate"), "{err}");
1843 }
1844
1845 #[test]
1846 fn validate_pool_multiple_defaults_errors() {
1847 let mut a = ollama_entry();
1848 let mut b = claude_entry();
1849 a.default = true;
1850 b.default = true;
1851 let err = validate_pool(&[a, b]).unwrap_err();
1852 assert!(err.to_string().contains("default"), "{err}");
1853 }
1854
1855 #[test]
1856 fn validate_pool_two_different_providers_ok() {
1857 assert!(validate_pool(&[ollama_entry(), claude_entry()]).is_ok());
1858 }
1859
1860 #[test]
1861 fn validate_pool_propagates_entry_error() {
1862 let bad = ProviderEntry {
1863 provider_type: ProviderKind::Compatible,
1864 name: None, ..Default::default()
1866 };
1867 assert!(validate_pool(&[bad]).is_err());
1868 }
1869
1870 #[test]
1873 fn effective_model_returns_explicit_when_set() {
1874 let entry = ProviderEntry {
1875 provider_type: ProviderKind::Claude,
1876 model: Some("claude-sonnet-4-6".into()),
1877 ..Default::default()
1878 };
1879 assert_eq!(entry.effective_model(), "claude-sonnet-4-6");
1880 }
1881
1882 #[test]
1883 fn effective_model_ollama_default_when_none() {
1884 let entry = ProviderEntry {
1885 provider_type: ProviderKind::Ollama,
1886 model: None,
1887 ..Default::default()
1888 };
1889 assert_eq!(entry.effective_model(), "qwen3:8b");
1890 }
1891
1892 #[test]
1893 fn effective_model_claude_default_when_none() {
1894 let entry = ProviderEntry {
1895 provider_type: ProviderKind::Claude,
1896 model: None,
1897 ..Default::default()
1898 };
1899 assert_eq!(entry.effective_model(), "claude-haiku-4-5-20251001");
1900 }
1901
1902 #[test]
1903 fn effective_model_openai_default_when_none() {
1904 let entry = ProviderEntry {
1905 provider_type: ProviderKind::OpenAi,
1906 model: None,
1907 ..Default::default()
1908 };
1909 assert_eq!(entry.effective_model(), "gpt-4o-mini");
1910 }
1911
1912 #[test]
1913 fn effective_model_gemini_default_when_none() {
1914 let entry = ProviderEntry {
1915 provider_type: ProviderKind::Gemini,
1916 model: None,
1917 ..Default::default()
1918 };
1919 assert_eq!(entry.effective_model(), "gemini-2.0-flash");
1920 }
1921
1922 fn parse_llm(toml: &str) -> LlmConfig {
1926 #[derive(serde::Deserialize)]
1927 struct Wrapper {
1928 llm: LlmConfig,
1929 }
1930 toml::from_str::<Wrapper>(toml).unwrap().llm
1931 }
1932
1933 #[test]
1934 fn check_legacy_format_new_format_ok() {
1935 let cfg = parse_llm(
1936 r#"
1937[llm]
1938
1939[[llm.providers]]
1940type = "ollama"
1941model = "qwen3:8b"
1942"#,
1943 );
1944 assert!(cfg.check_legacy_format().is_ok());
1945 }
1946
1947 #[test]
1948 fn check_legacy_format_empty_providers_no_legacy_ok() {
1949 let cfg = parse_llm("[llm]\n");
1951 assert!(cfg.check_legacy_format().is_ok());
1952 }
1953
1954 #[test]
1957 fn effective_provider_falls_back_to_ollama_when_no_providers() {
1958 let cfg = parse_llm("[llm]\n");
1959 assert_eq!(cfg.effective_provider(), ProviderKind::Ollama);
1960 }
1961
1962 #[test]
1963 fn effective_provider_reads_from_providers_first() {
1964 let cfg = parse_llm(
1965 r#"
1966[llm]
1967
1968[[llm.providers]]
1969type = "claude"
1970model = "claude-sonnet-4-6"
1971"#,
1972 );
1973 assert_eq!(cfg.effective_provider(), ProviderKind::Claude);
1974 }
1975
1976 #[test]
1977 fn effective_model_reads_from_providers_first() {
1978 let cfg = parse_llm(
1979 r#"
1980[llm]
1981
1982[[llm.providers]]
1983type = "ollama"
1984model = "qwen3:8b"
1985"#,
1986 );
1987 assert_eq!(cfg.effective_model(), "qwen3:8b");
1988 }
1989
1990 #[test]
1991 fn effective_model_skips_embed_only_provider() {
1992 let cfg = parse_llm(
1993 r#"
1994[llm]
1995
1996[[llm.providers]]
1997type = "ollama"
1998model = "gemma4:26b"
1999embed = true
2000
2001[[llm.providers]]
2002type = "openai"
2003model = "gpt-4o-mini"
2004"#,
2005 );
2006 assert_eq!(cfg.effective_model(), "gpt-4o-mini");
2007 }
2008
2009 #[test]
2010 fn effective_base_url_default_when_absent() {
2011 let cfg = parse_llm("[llm]\n");
2012 assert_eq!(cfg.effective_base_url(), "http://localhost:11434");
2013 }
2014
2015 #[test]
2016 fn effective_base_url_from_providers_entry() {
2017 let cfg = parse_llm(
2018 r#"
2019[llm]
2020
2021[[llm.providers]]
2022type = "ollama"
2023base_url = "http://myhost:11434"
2024"#,
2025 );
2026 assert_eq!(cfg.effective_base_url(), "http://myhost:11434");
2027 }
2028
2029 #[test]
2032 fn complexity_routing_defaults() {
2033 let cr = ComplexityRoutingConfig::default();
2034 assert!(
2035 cr.bypass_single_provider,
2036 "bypass_single_provider must default to true"
2037 );
2038 assert_eq!(cr.triage_timeout_secs, 5);
2039 assert_eq!(cr.max_triage_tokens, 50);
2040 assert!(cr.triage_provider.is_none());
2041 assert!(cr.tiers.simple.is_none());
2042 }
2043
2044 #[test]
2045 fn complexity_routing_toml_round_trip() {
2046 let cfg = parse_llm(
2047 r#"
2048[llm]
2049routing = "triage"
2050
2051[llm.complexity_routing]
2052triage_provider = "fast"
2053bypass_single_provider = false
2054triage_timeout_secs = 10
2055max_triage_tokens = 100
2056
2057[llm.complexity_routing.tiers]
2058simple = "fast"
2059medium = "medium"
2060complex = "large"
2061expert = "opus"
2062"#,
2063 );
2064 assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2065 let cr = cfg
2066 .complexity_routing
2067 .expect("complexity_routing must be present");
2068 assert_eq!(cr.triage_provider.as_deref(), Some("fast"));
2069 assert!(!cr.bypass_single_provider);
2070 assert_eq!(cr.triage_timeout_secs, 10);
2071 assert_eq!(cr.max_triage_tokens, 100);
2072 assert_eq!(cr.tiers.simple.as_deref(), Some("fast"));
2073 assert_eq!(cr.tiers.medium.as_deref(), Some("medium"));
2074 assert_eq!(cr.tiers.complex.as_deref(), Some("large"));
2075 assert_eq!(cr.tiers.expert.as_deref(), Some("opus"));
2076 }
2077
2078 #[test]
2079 fn complexity_routing_partial_tiers_toml() {
2080 let cfg = parse_llm(
2082 r#"
2083[llm]
2084routing = "triage"
2085
2086[llm.complexity_routing.tiers]
2087simple = "haiku"
2088complex = "sonnet"
2089"#,
2090 );
2091 let cr = cfg
2092 .complexity_routing
2093 .expect("complexity_routing must be present");
2094 assert_eq!(cr.tiers.simple.as_deref(), Some("haiku"));
2095 assert!(cr.tiers.medium.is_none());
2096 assert_eq!(cr.tiers.complex.as_deref(), Some("sonnet"));
2097 assert!(cr.tiers.expert.is_none());
2098 assert!(cr.bypass_single_provider);
2100 assert_eq!(cr.triage_timeout_secs, 5);
2101 }
2102
2103 #[test]
2104 fn routing_strategy_triage_deserialized() {
2105 let cfg = parse_llm(
2106 r#"
2107[llm]
2108routing = "triage"
2109"#,
2110 );
2111 assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2112 }
2113
2114 #[test]
2117 fn stt_provider_entry_by_name_match() {
2118 let cfg = parse_llm(
2119 r#"
2120[llm]
2121
2122[[llm.providers]]
2123type = "openai"
2124name = "quality"
2125model = "gpt-5.4"
2126stt_model = "gpt-4o-mini-transcribe"
2127
2128[llm.stt]
2129provider = "quality"
2130"#,
2131 );
2132 let entry = cfg.stt_provider_entry().expect("should find stt provider");
2133 assert_eq!(entry.effective_name(), "quality");
2134 assert_eq!(entry.stt_model.as_deref(), Some("gpt-4o-mini-transcribe"));
2135 }
2136
2137 #[test]
2138 fn stt_provider_entry_auto_detect_when_provider_empty() {
2139 let cfg = parse_llm(
2140 r#"
2141[llm]
2142
2143[[llm.providers]]
2144type = "openai"
2145name = "openai-stt"
2146stt_model = "whisper-1"
2147
2148[llm.stt]
2149provider = ""
2150"#,
2151 );
2152 let entry = cfg.stt_provider_entry().expect("should auto-detect");
2153 assert_eq!(entry.effective_name(), "openai-stt");
2154 }
2155
2156 #[test]
2157 fn stt_provider_entry_auto_detect_no_stt_section() {
2158 let cfg = parse_llm(
2159 r#"
2160[llm]
2161
2162[[llm.providers]]
2163type = "openai"
2164name = "openai-stt"
2165stt_model = "whisper-1"
2166"#,
2167 );
2168 let entry = cfg.stt_provider_entry().expect("should auto-detect");
2170 assert_eq!(entry.effective_name(), "openai-stt");
2171 }
2172
2173 #[test]
2174 fn stt_provider_entry_none_when_no_stt_model() {
2175 let cfg = parse_llm(
2176 r#"
2177[llm]
2178
2179[[llm.providers]]
2180type = "openai"
2181name = "quality"
2182model = "gpt-5.4"
2183"#,
2184 );
2185 assert!(cfg.stt_provider_entry().is_none());
2186 }
2187
2188 #[test]
2189 fn stt_provider_entry_name_mismatch_falls_back_to_none() {
2190 let cfg = parse_llm(
2192 r#"
2193[llm]
2194
2195[[llm.providers]]
2196type = "openai"
2197name = "quality"
2198model = "gpt-5.4"
2199
2200[[llm.providers]]
2201type = "openai"
2202name = "openai-stt"
2203stt_model = "whisper-1"
2204
2205[llm.stt]
2206provider = "quality"
2207"#,
2208 );
2209 assert!(cfg.stt_provider_entry().is_none());
2211 }
2212
2213 #[test]
2214 fn stt_config_deserializes_new_slim_format() {
2215 let cfg = parse_llm(
2216 r#"
2217[llm]
2218
2219[[llm.providers]]
2220type = "openai"
2221name = "quality"
2222stt_model = "whisper-1"
2223
2224[llm.stt]
2225provider = "quality"
2226language = "en"
2227"#,
2228 );
2229 let stt = cfg.stt.as_ref().expect("stt section present");
2230 assert_eq!(stt.provider, "quality");
2231 assert_eq!(stt.language, "en");
2232 }
2233
2234 #[test]
2235 fn stt_config_default_provider_is_empty() {
2236 assert_eq!(default_stt_provider(), "");
2238 }
2239
2240 #[test]
2241 fn validate_stt_missing_provider_ok() {
2242 let cfg = parse_llm("[llm]\n");
2243 assert!(cfg.validate_stt().is_ok());
2244 }
2245
2246 #[test]
2247 fn validate_stt_valid_reference() {
2248 let cfg = parse_llm(
2249 r#"
2250[llm]
2251
2252[[llm.providers]]
2253type = "openai"
2254name = "quality"
2255stt_model = "whisper-1"
2256
2257[llm.stt]
2258provider = "quality"
2259"#,
2260 );
2261 assert!(cfg.validate_stt().is_ok());
2262 }
2263
2264 #[test]
2265 fn validate_stt_nonexistent_provider_errors() {
2266 let cfg = parse_llm(
2267 r#"
2268[llm]
2269
2270[[llm.providers]]
2271type = "openai"
2272name = "quality"
2273model = "gpt-5.4"
2274
2275[llm.stt]
2276provider = "nonexistent"
2277"#,
2278 );
2279 assert!(cfg.validate_stt().is_err());
2280 }
2281
2282 #[test]
2283 fn validate_stt_provider_exists_but_no_stt_model_returns_ok_with_warn() {
2284 let cfg = parse_llm(
2286 r#"
2287[llm]
2288
2289[[llm.providers]]
2290type = "openai"
2291name = "quality"
2292model = "gpt-5.4"
2293
2294[llm.stt]
2295provider = "quality"
2296"#,
2297 );
2298 assert!(cfg.validate_stt().is_ok());
2300 assert!(
2302 cfg.stt_provider_entry().is_none(),
2303 "stt_provider_entry must be None when provider has no stt_model"
2304 );
2305 }
2306
2307 #[test]
2310 fn bandit_warmup_queries_explicit_value_is_deserialized() {
2311 let cfg = parse_llm(
2312 r#"
2313[llm]
2314
2315[llm.router]
2316strategy = "bandit"
2317
2318[llm.router.bandit]
2319warmup_queries = 50
2320"#,
2321 );
2322 let bandit = cfg
2323 .router
2324 .expect("router section must be present")
2325 .bandit
2326 .expect("bandit section must be present");
2327 assert_eq!(
2328 bandit.warmup_queries,
2329 Some(50),
2330 "warmup_queries = 50 must deserialize to Some(50)"
2331 );
2332 }
2333
2334 #[test]
2335 fn bandit_warmup_queries_explicit_null_is_none() {
2336 let cfg = parse_llm(
2339 r#"
2340[llm]
2341
2342[llm.router]
2343strategy = "bandit"
2344
2345[llm.router.bandit]
2346warmup_queries = 0
2347"#,
2348 );
2349 let bandit = cfg
2350 .router
2351 .expect("router section must be present")
2352 .bandit
2353 .expect("bandit section must be present");
2354 assert_eq!(
2356 bandit.warmup_queries,
2357 Some(0),
2358 "warmup_queries = 0 must deserialize to Some(0)"
2359 );
2360 }
2361
2362 #[test]
2363 fn bandit_warmup_queries_missing_field_defaults_to_none() {
2364 let cfg = parse_llm(
2366 r#"
2367[llm]
2368
2369[llm.router]
2370strategy = "bandit"
2371
2372[llm.router.bandit]
2373alpha = 1.5
2374"#,
2375 );
2376 let bandit = cfg
2377 .router
2378 .expect("router section must be present")
2379 .bandit
2380 .expect("bandit section must be present");
2381 assert_eq!(
2382 bandit.warmup_queries, None,
2383 "omitted warmup_queries must default to None"
2384 );
2385 }
2386
2387 #[test]
2388 fn provider_name_new_and_as_str() {
2389 let n = ProviderName::new("fast");
2390 assert_eq!(n.as_str(), "fast");
2391 assert!(!n.is_empty());
2392 }
2393
2394 #[test]
2395 fn provider_name_default_is_empty() {
2396 let n = ProviderName::default();
2397 assert!(n.is_empty());
2398 assert_eq!(n.as_str(), "");
2399 }
2400
2401 #[test]
2402 fn provider_name_deref_to_str() {
2403 let n = ProviderName::new("quality");
2404 let s: &str = &n;
2405 assert_eq!(s, "quality");
2406 }
2407
2408 #[test]
2409 fn provider_name_partial_eq_str() {
2410 let n = ProviderName::new("fast");
2411 assert_eq!(n, "fast");
2412 assert_ne!(n, "slow");
2413 }
2414
2415 #[test]
2416 fn provider_name_serde_roundtrip() {
2417 let n = ProviderName::new("my-provider");
2418 let json = serde_json::to_string(&n).expect("serialize");
2419 assert_eq!(json, "\"my-provider\"");
2420 let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2421 assert_eq!(back, n);
2422 }
2423
2424 #[test]
2425 fn provider_name_serde_empty_roundtrip() {
2426 let n = ProviderName::default();
2427 let json = serde_json::to_string(&n).expect("serialize");
2428 assert_eq!(json, "\"\"");
2429 let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2430 assert_eq!(back, n);
2431 assert!(back.is_empty());
2432 }
2433
2434 fn gonka_entry_with_nodes(nodes: Vec<GonkaNode>) -> ProviderEntry {
2437 ProviderEntry {
2438 provider_type: ProviderKind::Gonka,
2439 name: Some("my-gonka".into()),
2440 gonka_nodes: nodes,
2441 ..Default::default()
2442 }
2443 }
2444
2445 fn valid_gonka_nodes() -> Vec<GonkaNode> {
2446 vec![
2447 GonkaNode {
2448 url: "https://node1.gonka.ai".into(),
2449 address: "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6".into(),
2450 name: Some("node1".into()),
2451 },
2452 GonkaNode {
2453 url: "https://node2.gonka.ai".into(),
2454 address: "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum".into(),
2455 name: Some("node2".into()),
2456 },
2457 GonkaNode {
2458 url: "http://node3.internal".into(),
2459 address: "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg".into(),
2460 name: None,
2461 },
2462 ]
2463 }
2464
2465 #[test]
2466 fn validate_gonka_valid() {
2467 let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2468 assert!(entry.validate().is_ok());
2469 }
2470
2471 #[test]
2472 fn validate_gonka_empty_nodes_errors() {
2473 let entry = gonka_entry_with_nodes(vec![]);
2474 let err = entry.validate().unwrap_err();
2475 assert!(
2476 err.to_string().contains("gonka_nodes"),
2477 "error should mention gonka_nodes: {err}"
2478 );
2479 }
2480
2481 #[test]
2482 fn validate_gonka_node_empty_url_errors() {
2483 let entry = gonka_entry_with_nodes(vec![GonkaNode {
2484 url: String::new(),
2485 address: "gonka1test".into(),
2486 name: None,
2487 }]);
2488 let err = entry.validate().unwrap_err();
2489 assert!(err.to_string().contains("url"), "{err}");
2490 }
2491
2492 #[test]
2493 fn validate_gonka_node_invalid_scheme_errors() {
2494 let entry = gonka_entry_with_nodes(vec![GonkaNode {
2495 url: "ftp://node.gonka.ai".into(),
2496 address: "gonka1test".into(),
2497 name: None,
2498 }]);
2499 let err = entry.validate().unwrap_err();
2500 assert!(err.to_string().contains("http"), "{err}");
2501 }
2502
2503 #[test]
2504 fn validate_gonka_without_name_errors() {
2505 let entry = ProviderEntry {
2506 provider_type: ProviderKind::Gonka,
2507 name: None,
2508 gonka_nodes: valid_gonka_nodes(),
2509 ..Default::default()
2510 };
2511 let err = entry.validate().unwrap_err();
2512 assert!(err.to_string().contains("gonka"), "{err}");
2513 }
2514
2515 #[test]
2516 fn gonka_toml_round_trip() {
2517 let toml = r#"
2518[llm]
2519
2520[[llm.providers]]
2521type = "gonka"
2522name = "my-gonka"
2523gonka_chain_prefix = "custom-chain"
2524
2525[[llm.providers.gonka_nodes]]
2526url = "https://node1.gonka.ai"
2527address = "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2528name = "node1"
2529
2530[[llm.providers.gonka_nodes]]
2531url = "https://node2.gonka.ai"
2532address = "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum"
2533name = "node2"
2534
2535[[llm.providers.gonka_nodes]]
2536url = "https://node3.gonka.ai"
2537address = "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg"
2538"#;
2539 let cfg = parse_llm(toml);
2540 assert_eq!(cfg.providers.len(), 1);
2541 let entry = &cfg.providers[0];
2542 assert_eq!(entry.provider_type, ProviderKind::Gonka);
2543 assert_eq!(entry.name.as_deref(), Some("my-gonka"));
2544 let nodes = &entry.gonka_nodes;
2545 assert_eq!(nodes.len(), 3);
2546 assert_eq!(nodes[0].url, "https://node1.gonka.ai");
2547 assert_eq!(
2548 nodes[0].address,
2549 "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2550 );
2551 assert_eq!(nodes[0].name.as_deref(), Some("node1"));
2552 assert_eq!(nodes[2].name, None);
2553 assert_eq!(entry.gonka_chain_prefix.as_deref(), Some("custom-chain"));
2554 }
2555
2556 #[test]
2557 fn gonka_default_chain_prefix() {
2558 let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2559 assert_eq!(entry.effective_gonka_chain_prefix(), "gonka");
2560 }
2561
2562 #[test]
2563 fn gonka_explicit_chain_prefix() {
2564 let entry = ProviderEntry {
2565 provider_type: ProviderKind::Gonka,
2566 name: Some("my-gonka".into()),
2567 gonka_nodes: valid_gonka_nodes(),
2568 gonka_chain_prefix: Some("my-chain".into()),
2569 ..Default::default()
2570 };
2571 assert_eq!(entry.effective_gonka_chain_prefix(), "my-chain");
2572 }
2573
2574 #[test]
2575 fn effective_model_gonka_is_empty() {
2576 let entry = ProviderEntry {
2577 provider_type: ProviderKind::Gonka,
2578 model: None,
2579 ..Default::default()
2580 };
2581 assert_eq!(entry.effective_model(), "");
2582 }
2583
2584 #[test]
2585 fn existing_configs_still_parse() {
2586 let toml = r#"
2587[llm]
2588
2589[[llm.providers]]
2590type = "ollama"
2591model = "qwen3:8b"
2592
2593[[llm.providers]]
2594type = "claude"
2595name = "claude"
2596model = "claude-sonnet-4-6"
2597"#;
2598 let cfg = parse_llm(toml);
2599 assert_eq!(cfg.providers.len(), 2);
2600 assert_eq!(cfg.providers[0].provider_type, ProviderKind::Ollama);
2601 assert_eq!(cfg.providers[1].provider_type, ProviderKind::Claude);
2602 }
2603}