Skip to main content

zeph_config/
providers.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use serde::{Deserialize, Serialize};
5
6// ── LLM provider config types (moved from zeph-llm) ─────────────────────────
7
8#[non_exhaustive]
9/// Extended or adaptive thinking mode for Claude.
10///
11/// Serializes with `mode` as tag:
12/// `{ "mode": "extended", "budget_tokens": 10000 }` or `{ "mode": "adaptive" }`.
13#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
14#[serde(tag = "mode", rename_all = "snake_case")]
15pub enum ThinkingConfig {
16    /// Extended thinking with an explicit token budget.
17    Extended {
18        /// Maximum thinking tokens to allocate.
19        budget_tokens: u32,
20    },
21    /// Adaptive thinking that selects effort automatically.
22    Adaptive {
23        /// Explicit effort hint when provided; model-chosen when `None`.
24        #[serde(default, skip_serializing_if = "Option::is_none")]
25        effort: Option<ThinkingEffort>,
26    },
27}
28
29/// Effort level for adaptive thinking.
30#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
31#[serde(rename_all = "lowercase")]
32#[non_exhaustive]
33pub enum ThinkingEffort {
34    /// Minimal thinking; fastest responses.
35    Low,
36    /// Balanced thinking depth. This is the default.
37    #[default]
38    Medium,
39    /// Maximum thinking depth; slowest responses.
40    High,
41}
42
43#[non_exhaustive]
44/// Prompt-cache TTL variant for the Anthropic API.
45///
46/// When used as a TOML config value the accepted strings are `"ephemeral"` and `"1h"`.
47/// On the wire (Anthropic API), `OneHour` serializes as `"1h"` inside the `cache_control.ttl`
48/// field.
49#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Default)]
50#[serde(rename_all = "snake_case")]
51pub enum CacheTtl {
52    /// Default ephemeral TTL (~5 minutes). No beta header required.
53    #[default]
54    Ephemeral,
55    /// Extended 1-hour TTL. Requires the `extended-cache-ttl-2025-04-25` beta header.
56    /// Cache writes cost approximately 2× more than `Ephemeral`.
57    #[serde(rename = "1h")]
58    OneHour,
59}
60
61impl CacheTtl {
62    /// Returns `true` when this TTL variant requires the `extended-cache-ttl-2025-04-25` beta
63    /// header to be sent with each request.
64    #[must_use]
65    pub fn requires_beta(self) -> bool {
66        match self {
67            Self::OneHour => true,
68            Self::Ephemeral => false,
69        }
70    }
71}
72
73/// Thinking level for Gemini models that support extended reasoning.
74///
75/// Maps to `generationConfig.thinkingConfig.thinkingLevel` in the Gemini API.
76/// Valid for Gemini 3+ models. For Gemini 2.5, use `thinking_budget` instead.
77#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
78#[serde(rename_all = "lowercase")]
79#[non_exhaustive]
80pub enum GeminiThinkingLevel {
81    /// Minimal reasoning pass.
82    Minimal,
83    /// Low reasoning depth.
84    Low,
85    /// Medium reasoning depth.
86    Medium,
87    /// Full reasoning depth.
88    High,
89}
90
91pub use zeph_common::ProviderName;
92
93fn default_response_cache_ttl_secs() -> u64 {
94    3600
95}
96
97fn default_semantic_cache_threshold() -> f32 {
98    0.95
99}
100
101fn default_semantic_cache_max_candidates() -> u32 {
102    10
103}
104
105fn default_router_ema_alpha() -> f64 {
106    0.1
107}
108
109fn default_router_reorder_interval() -> u64 {
110    10
111}
112
113fn default_embedding_model() -> String {
114    "qwen3-embedding".into()
115}
116
117fn default_candle_source() -> String {
118    "huggingface".into()
119}
120
121fn default_chat_template() -> String {
122    "chatml".into()
123}
124
125fn default_candle_device() -> String {
126    "cpu".into()
127}
128
129fn default_temperature() -> f64 {
130    0.7
131}
132
133fn default_max_tokens() -> usize {
134    2048
135}
136
137fn default_seed() -> u64 {
138    42
139}
140
141fn default_repeat_penalty() -> f32 {
142    1.1
143}
144
145fn default_repeat_last_n() -> usize {
146    64
147}
148
149fn default_cascade_quality_threshold() -> f64 {
150    0.5
151}
152
153fn default_cascade_max_escalations() -> u8 {
154    2
155}
156
157fn default_cascade_window_size() -> usize {
158    50
159}
160
161fn default_cascade_judge_timeout_ms() -> u64 {
162    5_000
163}
164
165fn default_reputation_decay_factor() -> f64 {
166    0.95
167}
168
169fn default_reputation_weight() -> f64 {
170    0.3
171}
172
173fn default_reputation_min_observations() -> u64 {
174    5
175}
176
177/// Returns the default STT provider name (empty string — auto-detect).
178#[must_use]
179pub fn default_stt_provider() -> String {
180    String::new()
181}
182
183/// Returns the default STT transcription language hint (`"auto"`).
184#[must_use]
185pub fn default_stt_language() -> String {
186    "auto".into()
187}
188
189/// Returns the default embedding model name used by `[llm] embedding_model`.
190#[must_use]
191pub(crate) fn get_default_embedding_model() -> String {
192    default_embedding_model()
193}
194
195/// Returns the default response cache TTL in seconds.
196#[must_use]
197pub(crate) fn get_default_response_cache_ttl_secs() -> u64 {
198    default_response_cache_ttl_secs()
199}
200
201/// Returns the default EMA alpha for the router latency estimator.
202#[must_use]
203pub(crate) fn get_default_router_ema_alpha() -> f64 {
204    default_router_ema_alpha()
205}
206
207/// Returns the default router reorder interval (turns between provider re-ranking).
208#[must_use]
209pub(crate) fn get_default_router_reorder_interval() -> u64 {
210    default_router_reorder_interval()
211}
212
213/// LLM provider backend selector.
214///
215/// Used in `[[llm.providers]]` entries as the `type` field.
216///
217/// # Example (TOML)
218///
219/// ```toml
220/// [[llm.providers]]
221/// type = "openai"
222/// model = "gpt-4o"
223/// name = "quality"
224/// ```
225#[non_exhaustive]
226#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
227#[serde(rename_all = "lowercase")]
228pub enum ProviderKind {
229    /// Local Ollama server (default base URL: `http://localhost:11434`).
230    Ollama,
231    /// Anthropic Claude API.
232    Claude,
233    /// `OpenAI` API.
234    OpenAi,
235    /// Google Gemini API.
236    Gemini,
237    /// Local Candle inference (CPU/GPU, no external server required).
238    Candle,
239    /// OpenAI-compatible third-party API (e.g. Groq, Together AI, LM Studio).
240    Compatible,
241    /// Native Gonka blockchain provider.
242    Gonka,
243    /// Cocoon confidential compute network via localhost sidecar.
244    Cocoon,
245}
246
247impl ProviderKind {
248    /// Return the lowercase string identifier for this provider kind.
249    ///
250    /// # Examples
251    ///
252    /// ```
253    /// use zeph_config::ProviderKind;
254    ///
255    /// assert_eq!(ProviderKind::Claude.as_str(), "claude");
256    /// assert_eq!(ProviderKind::OpenAi.as_str(), "openai");
257    /// ```
258    #[must_use]
259    pub fn as_str(self) -> &'static str {
260        match self {
261            Self::Ollama => "ollama",
262            Self::Claude => "claude",
263            Self::OpenAi => "openai",
264            Self::Gemini => "gemini",
265            Self::Candle => "candle",
266            Self::Compatible => "compatible",
267            Self::Gonka => "gonka",
268            Self::Cocoon => "cocoon",
269        }
270    }
271}
272
273impl std::fmt::Display for ProviderKind {
274    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
275        f.write_str(self.as_str())
276    }
277}
278
279fn default_max_tool_json_bytes() -> usize {
280    4 * 1024 * 1024
281}
282
283fn default_max_thinking_bytes() -> usize {
284    1024 * 1024
285}
286
287fn default_max_compaction_bytes() -> usize {
288    32 * 1024
289}
290
291fn stream_limits_is_default(v: &StreamLimits) -> bool {
292    v.max_tool_json_bytes == default_max_tool_json_bytes()
293        && v.max_thinking_bytes == default_max_thinking_bytes()
294        && v.max_compaction_bytes == default_max_compaction_bytes()
295}
296
297/// Per-buffer byte caps for Claude SSE streaming.
298///
299/// Controls the maximum number of bytes accumulated in each streaming buffer before
300/// excess data is discarded with a warning. All caps default to values that match the
301/// pre-existing hardcoded constants, so omitting `[llm.stream_limits]` in the config
302/// preserves identical behavior.
303///
304/// # Example (TOML)
305///
306/// ```toml
307/// [llm.stream_limits]
308/// max_tool_json_bytes  = 8388608   # 8 MiB  — raise for unusually large tool results
309/// max_thinking_bytes   = 2097152   # 2 MiB  — raise for deep extended-thinking runs
310/// max_compaction_bytes = 65536     # 64 KiB — raise for verbose compaction summaries
311/// ```
312#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
313pub struct StreamLimits {
314    /// Maximum bytes for an accumulated tool-use JSON buffer. Default: 4 MiB.
315    #[serde(default = "default_max_tool_json_bytes")]
316    pub max_tool_json_bytes: usize,
317
318    /// Maximum bytes for an accumulated thinking block. Default: 1 MiB.
319    #[serde(default = "default_max_thinking_bytes")]
320    pub max_thinking_bytes: usize,
321
322    /// Maximum bytes for an accumulated server-side compaction summary. Default: 32 KiB.
323    #[serde(default = "default_max_compaction_bytes")]
324    pub max_compaction_bytes: usize,
325}
326
327impl Default for StreamLimits {
328    fn default() -> Self {
329        Self {
330            max_tool_json_bytes: default_max_tool_json_bytes(),
331            max_thinking_bytes: default_max_thinking_bytes(),
332            max_compaction_bytes: default_max_compaction_bytes(),
333        }
334    }
335}
336
337/// LLM configuration, nested under `[llm]` in TOML.
338///
339/// Declares the provider pool and controls routing, embedding, caching, and STT.
340/// All providers are declared in `[[llm.providers]]`; subsystems reference them by
341/// the `name` field using a `*_provider` config key.
342///
343/// # Example (TOML)
344///
345/// ```toml
346/// [[llm.providers]]
347/// name = "fast"
348/// type = "openai"
349/// model = "gpt-4o-mini"
350///
351/// [[llm.providers]]
352/// name = "quality"
353/// type = "claude"
354/// model = "claude-opus-4-5"
355///
356/// [llm]
357/// routing = "none"
358/// embedding_model = "qwen3-embedding"
359/// ```
360#[derive(Debug, Deserialize, Serialize)]
361pub struct LlmConfig {
362    /// Provider pool. First entry is default unless one is marked `default = true`.
363    #[serde(default, skip_serializing_if = "Vec::is_empty")]
364    pub providers: Vec<ProviderEntry>,
365
366    /// Routing strategy for multi-provider configs.
367    #[serde(default, skip_serializing_if = "is_routing_none")]
368    pub routing: LlmRoutingStrategy,
369
370    #[serde(default = "default_embedding_model_opt")]
371    pub embedding_model: String,
372    #[serde(default, skip_serializing_if = "Option::is_none")]
373    pub candle: Option<CandleConfig>,
374    #[serde(default)]
375    pub stt: Option<SttConfig>,
376    #[serde(default)]
377    pub response_cache_enabled: bool,
378    #[serde(default = "default_response_cache_ttl_secs")]
379    pub response_cache_ttl_secs: u64,
380    /// Enable semantic similarity-based response caching. Requires embedding support.
381    #[serde(default)]
382    pub semantic_cache_enabled: bool,
383    /// Cosine similarity threshold for semantic cache hits (0.0–1.0).
384    ///
385    /// Only the highest-scoring candidate above this threshold is returned.
386    /// Lower values produce more cache hits but risk returning less relevant responses.
387    /// Recommended range: 0.92–0.98; default: 0.95.
388    #[serde(default = "default_semantic_cache_threshold")]
389    pub semantic_cache_threshold: f32,
390    /// Maximum cached entries to examine per semantic lookup (SQL `LIMIT` clause in
391    /// `ResponseCache::get_semantic()`). Controls the recall-vs-performance tradeoff:
392    ///
393    /// - **Higher values** (e.g. 50): scan more entries, better chance of finding a
394    ///   semantically similar cached response, but slower queries.
395    /// - **Lower values** (e.g. 5): faster queries, but may miss relevant cached entries
396    ///   when the cache is large.
397    /// - **Default (10)**: balanced middle ground for typical workloads.
398    ///
399    /// Tuning guidance: set to 50+ when recall matters more than latency (e.g. long-running
400    /// sessions with many cached responses); reduce to 5 for low-latency interactive use.
401    /// Env override: `ZEPH_LLM_SEMANTIC_CACHE_MAX_CANDIDATES`.
402    #[serde(default = "default_semantic_cache_max_candidates")]
403    pub semantic_cache_max_candidates: u32,
404    #[serde(default)]
405    pub router_ema_enabled: bool,
406    #[serde(default = "default_router_ema_alpha")]
407    pub router_ema_alpha: f64,
408    #[serde(default = "default_router_reorder_interval")]
409    pub router_reorder_interval: u64,
410    /// Routing configuration for Thompson/Cascade strategies.
411    #[serde(default, skip_serializing_if = "Option::is_none")]
412    pub router: Option<RouterConfig>,
413    /// Provider-specific instruction file to inject into the system prompt.
414    /// Merged with `agent.instruction_files` at startup.
415    #[serde(default, skip_serializing_if = "Option::is_none")]
416    pub instruction_file: Option<std::path::PathBuf>,
417    /// Shorthand model spec for tool-pair summarization and context compaction.
418    /// Format: `ollama/<model>`, `claude[/<model>]`, `openai[/<model>]`, `compatible/<name>`, `candle`.
419    /// Ignored when `[llm.summary_provider]` is set.
420    #[serde(default, skip_serializing_if = "Option::is_none")]
421    pub summary_model: Option<String>,
422    /// Structured provider config for summarization. Takes precedence over `summary_model`.
423    #[serde(default, skip_serializing_if = "Option::is_none")]
424    pub summary_provider: Option<ProviderEntry>,
425
426    /// Complexity triage routing configuration. Required when `routing = "triage"`.
427    #[serde(default, skip_serializing_if = "Option::is_none")]
428    pub complexity_routing: Option<ComplexityRoutingConfig>,
429
430    /// Collaborative Entropy (`CoE`) configuration. `None` = `CoE` disabled.
431    #[serde(default, skip_serializing_if = "Option::is_none")]
432    pub coe: Option<CoeConfig>,
433
434    /// SSE streaming buffer size limits.
435    ///
436    /// Controls the maximum bytes accumulated in per-block SSE buffers before excess
437    /// data is silently discarded. All fields have sane defaults; omitting the section
438    /// keeps pre-existing behavior.
439    #[serde(default, skip_serializing_if = "stream_limits_is_default")]
440    pub stream_limits: StreamLimits,
441}
442
443fn default_embedding_model_opt() -> String {
444    default_embedding_model()
445}
446
447impl Default for LlmConfig {
448    fn default() -> Self {
449        toml::from_str("").expect("empty TOML produces valid LlmConfig defaults")
450    }
451}
452
453#[allow(clippy::trivially_copy_pass_by_ref)]
454fn is_routing_none(s: &LlmRoutingStrategy) -> bool {
455    *s == LlmRoutingStrategy::None
456}
457
458impl LlmConfig {
459    /// Effective provider kind for the primary (first/default) provider in the pool.
460    #[must_use]
461    pub fn effective_provider(&self) -> ProviderKind {
462        self.providers
463            .first()
464            .map_or(ProviderKind::Ollama, |e| e.provider_type)
465    }
466
467    /// Effective base URL for the primary provider.
468    #[must_use]
469    pub fn effective_base_url(&self) -> &str {
470        self.providers
471            .first()
472            .and_then(|e| e.base_url.as_deref())
473            .unwrap_or("http://localhost:11434")
474    }
475
476    /// Effective model for the primary chat-capable provider.
477    ///
478    /// Skips embed-only entries (those with `embed = true`) and returns the model of the
479    /// first provider that can handle chat requests. Falls back to `"qwen3:8b"` when no
480    /// chat-capable provider is configured.
481    #[must_use]
482    pub fn effective_model(&self) -> &str {
483        self.providers
484            .iter()
485            .find(|e| !e.embed)
486            .and_then(|e| e.model.as_deref())
487            .unwrap_or("qwen3:8b")
488    }
489
490    /// Find the provider entry designated for STT.
491    ///
492    /// Resolution priority:
493    /// 1. `[llm.stt].provider` matches `[[llm.providers]].name` and the entry has `stt_model`
494    /// 2. `[llm.stt].provider` is empty — fall through to auto-detect
495    /// 3. First provider with `stt_model` set (auto-detect fallback)
496    /// 4. `None` — STT disabled
497    #[must_use]
498    pub fn stt_provider_entry(&self) -> Option<&ProviderEntry> {
499        let name_hint = self.stt.as_ref().map_or("", |s| s.provider.as_str());
500        if name_hint.is_empty() {
501            self.providers.iter().find(|p| p.stt_model.is_some())
502        } else {
503            self.providers
504                .iter()
505                .find(|p| p.effective_name() == name_hint && p.stt_model.is_some())
506        }
507    }
508
509    /// Returns the name of the effective embedding model.
510    ///
511    /// Resolution order:
512    /// 1. `embedding_model` from the `[[llm.providers]]` entry marked `embed = true`
513    /// 2. `embedding_model` from the first entry in `[[llm.providers]]`
514    /// 3. `[llm] embedding_model` global fallback (defaults to `"nomic-embed-text"`)
515    ///
516    /// # Examples
517    ///
518    /// ```
519    /// use zeph_config::providers::LlmConfig;
520    ///
521    /// let cfg = LlmConfig::default();
522    /// assert!(!cfg.effective_embedding_model().is_empty());
523    /// ```
524    #[must_use]
525    pub fn effective_embedding_model(&self) -> String {
526        if let Some(m) = self
527            .providers
528            .iter()
529            .find(|e| e.embed)
530            .and_then(|e| e.embedding_model.as_ref())
531        {
532            return m.clone();
533        }
534        if let Some(m) = self
535            .providers
536            .first()
537            .and_then(|e| e.embedding_model.as_ref())
538        {
539            return m.clone();
540        }
541        self.embedding_model.clone()
542    }
543
544    /// Returns the name of the stable skill embedding model.
545    ///
546    /// Prefers the `[[llm.providers]]` entry with `embed = true`, using its
547    /// `embedding_model` field first and `model` field as a secondary fallback.
548    /// Falls back to [`Self::effective_embedding_model`] when no dedicated embed
549    /// entry exists. Using the actual provider model name prevents false-positive
550    /// collection rebuilds in `zeph_memory::embedding_registry`.
551    ///
552    /// # Examples
553    ///
554    /// ```
555    /// use zeph_config::providers::LlmConfig;
556    ///
557    /// let cfg = LlmConfig::default();
558    /// assert!(!cfg.stable_skill_embedding_model().is_empty());
559    /// ```
560    #[must_use]
561    pub fn stable_skill_embedding_model(&self) -> String {
562        let embed_entry = self
563            .providers
564            .iter()
565            .find(|e| e.embed)
566            .or_else(|| self.providers.iter().find(|e| e.embedding_model.is_some()));
567
568        if let Some(entry) = embed_entry {
569            if let Some(em) = entry.embedding_model.as_ref().filter(|s| !s.is_empty()) {
570                return em.clone();
571            }
572            if let Some(m) = entry.model.as_ref().filter(|s| !s.is_empty()) {
573                return m.clone();
574            }
575        }
576
577        self.effective_embedding_model()
578    }
579
580    /// Validate that the config uses the new `[[llm.providers]]` format.
581    ///
582    /// # Errors
583    ///
584    /// Returns `ConfigError::Validation` when no providers are configured.
585    pub fn check_legacy_format(&self) -> Result<(), crate::error::ConfigError> {
586        Ok(())
587    }
588
589    /// Validate STT config cross-references.
590    ///
591    /// # Errors
592    ///
593    /// Returns `ConfigError::Validation` when the referenced STT provider does not exist.
594    pub fn validate_stt(&self) -> Result<(), crate::error::ConfigError> {
595        use crate::error::ConfigError;
596
597        let Some(stt) = &self.stt else {
598            return Ok(());
599        };
600        if stt.provider.is_empty() {
601            return Ok(());
602        }
603        let found = self
604            .providers
605            .iter()
606            .find(|p| p.effective_name() == stt.provider);
607        match found {
608            None => {
609                return Err(ConfigError::Validation(format!(
610                    "[llm.stt].provider = {:?} does not match any [[llm.providers]] entry",
611                    stt.provider
612                )));
613            }
614            Some(entry) if entry.stt_model.is_none() => {
615                tracing::warn!(
616                    provider = stt.provider,
617                    "[[llm.providers]] entry exists but has no `stt_model` — STT will not be activated"
618                );
619            }
620            _ => {}
621        }
622        Ok(())
623    }
624
625    /// Resolve `provider_name` to its model string and emit a startup warning when the
626    /// model does not look like a fast-tier model.
627    ///
628    /// **Soft check — never returns an error.** Misconfiguration produces a single
629    /// `tracing::warn!` at startup so operators can fix configs without being blocked.
630    ///
631    /// Rules:
632    /// - Empty `provider_name` → silently OK (caller will use the primary provider).
633    /// - Provider not found in pool → warns `"<label> provider '<name>' not found"`.
634    /// - Model resolved but not in `FAST_TIER_MODEL_HINTS` and not in `extra_allowlist` →
635    ///   warns `"<label> provider '<name>' uses '<model>' which may not be fast-tier"`.
636    /// - Model matches a hint or allowlist entry → silently OK.
637    ///
638    /// # Examples
639    ///
640    /// ```no_run
641    /// use zeph_config::providers::{LlmConfig, ProviderName};
642    ///
643    /// // LlmConfig is constructed via config file; here we illustrate the call shape.
644    /// # let cfg: LlmConfig = unimplemented!();
645    /// // empty provider name is silently ok
646    /// cfg.warn_non_fast_tier_provider(&ProviderName::default(), "memcot.distill_provider", &[]);
647    /// ```
648    pub fn warn_non_fast_tier_provider(
649        &self,
650        provider_name: &ProviderName,
651        feature_label: &str,
652        extra_allowlist: &[String],
653    ) {
654        if provider_name.is_empty() {
655            return;
656        }
657        let name = provider_name.as_str();
658        let Some(entry) = self.providers.iter().find(|p| p.effective_name() == name) else {
659            tracing::warn!(
660                provider = name,
661                "{feature_label} provider '{name}' not found in [[llm.providers]]"
662            );
663            return;
664        };
665        let model = entry.model.as_deref().unwrap_or("");
666        if model.is_empty() {
667            return;
668        }
669        let lower = model.to_lowercase();
670        let in_hints = FAST_TIER_MODEL_HINTS.iter().any(|h| lower.contains(h));
671        let in_extra = extra_allowlist.iter().any(|h| lower.contains(h.as_str()));
672        if !in_hints && !in_extra {
673            tracing::warn!(
674                provider = name,
675                actual = model,
676                "{feature_label} provider '{name}' uses model '{model}' \
677                 which may not be fast-tier; prefer a fast model to bound distillation cost"
678            );
679        }
680    }
681}
682
683/// Lowercased substrings that identify commonly accepted fast-tier models.
684///
685/// Used by [`LlmConfig::warn_non_fast_tier_provider`] for a soft startup check.
686/// Updating this list is non-breaking; missing a fast model only suppresses a warning.
687pub const FAST_TIER_MODEL_HINTS: &[&str] = &[
688    "gpt-4o-mini",
689    "gpt-4.1-mini",
690    "gpt-5-mini",
691    "gpt-5-nano",
692    "claude-haiku",
693    "claude-3-haiku",
694    "claude-3-5-haiku",
695    "qwen3:8b",
696    "qwen2.5:7b",
697    "qwen2:7b",
698    "llama3.2:3b",
699    "llama3.1:8b",
700    "gemma3:4b",
701    "gemma3:8b",
702    "phi4:mini",
703    "mistral:7b",
704];
705
706/// Speech-to-text configuration, nested under `[llm.stt]` in TOML.
707///
708/// When set, Zeph uses the referenced provider for voice transcription.
709/// The provider must have an `stt_model` field set in its `[[llm.providers]]` entry.
710///
711/// # Example (TOML)
712///
713/// ```toml
714/// [llm.stt]
715/// provider = "fast"
716/// language = "en"
717/// ```
718#[derive(Debug, Clone, Deserialize, Serialize)]
719pub struct SttConfig {
720    /// Provider name from `[[llm.providers]]`. Empty string means auto-detect first provider
721    /// with `stt_model` set.
722    #[serde(default = "default_stt_provider")]
723    pub provider: String,
724    /// Language hint for transcription (e.g. `"en"`, `"auto"`).
725    #[serde(default = "default_stt_language")]
726    pub language: String,
727}
728
729/// Routing strategy selection for multi-provider routing.
730#[non_exhaustive]
731#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
732#[serde(rename_all = "lowercase")]
733pub enum RouterStrategyConfig {
734    /// Exponential moving average latency-aware ordering.
735    #[default]
736    Ema,
737    /// Thompson Sampling with Beta distributions (persistence-backed).
738    Thompson,
739    /// Cascade routing: try cheapest provider first, escalate on degenerate output.
740    Cascade,
741    /// PILOT: `LinUCB` contextual bandit with online learning and cost-aware reward.
742    Bandit,
743}
744
745/// Agent Stability Index (ASI) configuration.
746///
747/// Tracks per-provider response coherence via a sliding window of response embeddings.
748/// When coherence drops below `coherence_threshold`, the provider's routing prior is
749/// penalized by `penalty_weight`. Disabled by default; session-only (no persistence).
750///
751/// # Known Limitation
752///
753/// ASI embeddings are computed in a background `tokio::spawn` task after the response is
754/// returned to the caller. Under high request rates, the coherence score used for routing
755/// may lag 1–2 responses behind due to this fire-and-forget design. With the default
756/// `window = 5`, this lag is tolerable — coherence is a slow-moving signal.
757#[derive(Debug, Clone, Deserialize, Serialize)]
758pub struct AsiConfig {
759    /// Enable ASI coherence tracking. Default: false.
760    #[serde(default)]
761    pub enabled: bool,
762
763    /// Sliding window size for response embeddings per provider. Default: 5.
764    #[serde(default = "default_asi_window")]
765    pub window: usize,
766
767    /// Coherence score [0.0, 1.0] below which the provider is penalized. Default: 0.7.
768    #[serde(default = "default_asi_coherence_threshold")]
769    pub coherence_threshold: f32,
770
771    /// Penalty weight applied to Thompson beta / EMA score on low coherence. Default: 0.3.
772    ///
773    /// For Thompson, this shifts the beta prior: `beta += penalty_weight * (threshold - coherence)`.
774    /// For EMA, the score is multiplied by `max(0.5, coherence / threshold)`.
775    #[serde(default = "default_asi_penalty_weight")]
776    pub penalty_weight: f32,
777}
778
779fn default_asi_window() -> usize {
780    5
781}
782
783fn default_asi_coherence_threshold() -> f32 {
784    0.7
785}
786
787fn default_asi_penalty_weight() -> f32 {
788    0.3
789}
790
791impl Default for AsiConfig {
792    fn default() -> Self {
793        Self {
794            enabled: false,
795            window: default_asi_window(),
796            coherence_threshold: default_asi_coherence_threshold(),
797            penalty_weight: default_asi_penalty_weight(),
798        }
799    }
800}
801
802/// Routing configuration for multi-provider setups.
803#[derive(Debug, Clone, Deserialize, Serialize)]
804pub struct RouterConfig {
805    /// Routing strategy: `"ema"` (default), `"thompson"`, `"cascade"`, or `"bandit"`.
806    #[serde(default)]
807    pub strategy: RouterStrategyConfig,
808    /// Path for persisting Thompson Sampling state. Defaults to `~/.zeph/router_thompson_state.json`.
809    ///
810    /// # Security
811    ///
812    /// This path is user-controlled. The application writes and reads a JSON file at
813    /// this location. Ensure the path is within a directory that is not world-writable
814    /// (e.g., avoid `/tmp`). The file is created with mode `0o600` on Unix.
815    #[serde(default)]
816    pub thompson_state_path: Option<String>,
817    /// Cascade routing configuration. Only used when `strategy = "cascade"`.
818    #[serde(default)]
819    pub cascade: Option<CascadeConfig>,
820    /// Bayesian reputation scoring configuration (RAPS). Disabled by default.
821    #[serde(default)]
822    pub reputation: Option<ReputationConfig>,
823    /// PILOT bandit routing configuration. Only used when `strategy = "bandit"`.
824    #[serde(default)]
825    pub bandit: Option<BanditConfig>,
826    /// Embedding-based quality gate threshold for Thompson/EMA routing. Default: disabled.
827    ///
828    /// When set, after provider selection, the cosine similarity between the query embedding
829    /// and the response embedding is computed. If below this threshold, the next provider in
830    /// the ordered list is tried. On exhaustion, the best response seen is returned.
831    ///
832    /// Only applies to Thompson and EMA strategies. Cascade uses its own quality classifier.
833    /// Fail-open: embedding errors disable the gate for that request.
834    #[serde(default)]
835    pub quality_gate: Option<f32>,
836    /// Agent Stability Index configuration. Disabled by default.
837    #[serde(default)]
838    pub asi: Option<AsiConfig>,
839    /// Maximum number of concurrent `embed_batch` calls through the router.
840    ///
841    /// Limits simultaneous embedding HTTP requests to prevent provider rate-limiting
842    /// and memory pressure during indexing or high-frequency recall. Default: 4.
843    /// Set to 0 to disable the semaphore (unlimited concurrency).
844    #[serde(default = "default_embed_concurrency")]
845    pub embed_concurrency: usize,
846}
847
848fn default_embed_concurrency() -> usize {
849    4
850}
851
852/// Configuration for Bayesian reputation scoring (RAPS — Reputation-Adjusted Provider Selection).
853///
854/// When enabled, quality outcomes from tool execution shift the routing scores over time,
855/// giving an advantage to providers that consistently produce valid tool arguments.
856///
857/// Default: disabled. Set `enabled = true` to activate.
858#[derive(Debug, Clone, Deserialize, Serialize)]
859pub struct ReputationConfig {
860    /// Enable reputation scoring. Default: false.
861    #[serde(default)]
862    pub enabled: bool,
863    /// Session-level decay factor applied on each load. Range: (0.0, 1.0]. Default: 0.95.
864    /// Lower values make reputation forget faster; 1.0 = no decay.
865    #[serde(default = "default_reputation_decay_factor")]
866    pub decay_factor: f64,
867    /// Weight of reputation in routing score blend. Range: [0.0, 1.0]. Default: 0.3.
868    ///
869    /// **Warning**: values above 0.5 can aggressively suppress low-reputation providers.
870    /// At `weight = 1.0` with `rep_factor = 0.0` (all failures), the routing score
871    /// drops to zero — the provider becomes unreachable for that session. Stick to
872    /// the default (0.3) unless you intentionally want strong reputation gating.
873    #[serde(default = "default_reputation_weight")]
874    pub weight: f64,
875    /// Minimum quality observations before reputation influences routing. Default: 5.
876    #[serde(default = "default_reputation_min_observations")]
877    pub min_observations: u64,
878    /// Path for persisting reputation state. Defaults to `~/.config/zeph/router_reputation_state.json`.
879    #[serde(default)]
880    pub state_path: Option<String>,
881}
882
883/// Configuration for cascade routing (`strategy = "cascade"`).
884///
885/// Cascade routing tries providers in chain order (cheapest first), escalating to
886/// the next provider when the response is classified as degenerate (empty, repetitive,
887/// incoherent). Chain order determines cost order: first provider = cheapest.
888///
889/// # Limitations
890///
891/// The heuristic classifier detects degenerate outputs only, not semantic failures.
892/// Use `classifier_mode = "judge"` for semantic quality gating (adds LLM call cost).
893#[derive(Debug, Clone, Deserialize, Serialize)]
894pub struct CascadeConfig {
895    /// Minimum quality score [0.0, 1.0] to accept a response without escalating.
896    /// Responses scoring below this threshold trigger escalation.
897    #[serde(default = "default_cascade_quality_threshold")]
898    pub quality_threshold: f64,
899
900    /// Maximum number of quality-based escalations per request.
901    /// Network/API errors do not count against this budget.
902    /// Default: 2 (allows up to 3 providers: cheap → mid → expensive).
903    #[serde(default = "default_cascade_max_escalations")]
904    pub max_escalations: u8,
905
906    /// Quality classifier mode: `"heuristic"` (default) or `"judge"`.
907    /// Heuristic is zero-cost but detects only degenerate outputs.
908    /// Judge requires a configured `summary_model` and adds one LLM call per evaluation.
909    #[serde(default)]
910    pub classifier_mode: CascadeClassifierMode,
911
912    /// Rolling quality history window size per provider. Default: 50.
913    #[serde(default = "default_cascade_window_size")]
914    pub window_size: usize,
915
916    /// Maximum cumulative input+output tokens across all escalation levels.
917    /// When exceeded, returns the best-seen response instead of escalating further.
918    /// `None` disables the budget (unbounded escalation cost).
919    #[serde(default)]
920    pub max_cascade_tokens: Option<u32>,
921
922    /// Explicit cost ordering of provider names (cheapest first).
923    /// When set, cascade routing sorts providers by their position in this list before
924    /// trying them. Providers not in the list are appended after listed ones in their
925    /// original chain order. When unset, chain order is used (default behavior).
926    #[serde(default, skip_serializing_if = "Option::is_none")]
927    pub cost_tiers: Option<Vec<String>>,
928
929    /// Hard timeout for the judge LLM call (milliseconds).
930    /// If the judge does not respond within this budget, the call is treated as a failure
931    /// and heuristic scoring is used instead. Default: 5000 (5 s).
932    #[serde(default = "default_cascade_judge_timeout_ms")]
933    pub judge_timeout_ms: u64,
934}
935
936impl Default for CascadeConfig {
937    fn default() -> Self {
938        Self {
939            quality_threshold: default_cascade_quality_threshold(),
940            max_escalations: default_cascade_max_escalations(),
941            classifier_mode: CascadeClassifierMode::default(),
942            window_size: default_cascade_window_size(),
943            max_cascade_tokens: None,
944            cost_tiers: None,
945            judge_timeout_ms: default_cascade_judge_timeout_ms(),
946        }
947    }
948}
949
950/// Quality classifier mode for cascade routing.
951#[non_exhaustive]
952#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
953#[serde(rename_all = "lowercase")]
954pub enum CascadeClassifierMode {
955    /// Zero-cost heuristic: detects degenerate outputs (empty, repetitive, incoherent).
956    /// Does not detect semantic failures (hallucinations, wrong answers).
957    #[default]
958    Heuristic,
959    /// LLM-based judge: more accurate but adds latency. Falls back to heuristic on failure.
960    /// Requires `summary_model` to be configured.
961    Judge,
962}
963
964fn default_bandit_alpha() -> f32 {
965    1.0
966}
967
968fn default_bandit_dim() -> usize {
969    32
970}
971
972fn default_bandit_cost_weight() -> f32 {
973    0.1
974}
975
976fn default_bandit_decay_factor() -> f32 {
977    1.0
978}
979
980fn default_bandit_embedding_timeout_ms() -> u64 {
981    50
982}
983
984fn default_bandit_cache_size() -> usize {
985    512
986}
987
988/// Configuration for PILOT bandit routing (`strategy = "bandit"`).
989///
990/// PILOT (Provider Intelligence via Learned Online Tuning) uses a `LinUCB` contextual
991/// bandit to learn which provider performs best for a given query context. The feature
992/// vector is derived from the query embedding (first `dim` components, L2-normalised).
993///
994/// **Cold start**: the bandit falls back to Thompson sampling for the first
995/// `10 * num_providers` queries (configurable). After warmup, `LinUCB` takes over.
996///
997/// **Embedding**: an `embedding_provider` must be set for feature vectors. If the embed
998/// call exceeds `embedding_timeout_ms` or fails, the bandit falls back to Thompson/uniform.
999/// Use a local provider (Ollama, Candle) to avoid network latency on the hot path.
1000#[derive(Debug, Clone, Deserialize, Serialize)]
1001pub struct BanditConfig {
1002    /// `LinUCB` exploration parameter. Default: 1.0.
1003    /// Higher values increase exploration; lower values favour exploitation.
1004    #[serde(default = "default_bandit_alpha")]
1005    pub alpha: f32,
1006
1007    /// Feature vector dimension (first `dim` components of the embedding).
1008    ///
1009    /// This is simple truncation, not PCA. The first raw embedding dimensions do not
1010    /// necessarily capture the most variance. For `OpenAI` `text-embedding-3-*` models,
1011    /// consider using the `dimensions` API parameter (Matryoshka embeddings) instead.
1012    /// Default: 32.
1013    #[serde(default = "default_bandit_dim")]
1014    pub dim: usize,
1015
1016    /// Cost penalty weight in the reward signal: `reward = quality - cost_weight * cost_fraction`.
1017    /// Default: 0.1. Increase to penalise expensive providers more aggressively.
1018    #[serde(default = "default_bandit_cost_weight")]
1019    pub cost_weight: f32,
1020
1021    /// Session-level decay applied to arm state on startup: `A = I + decay*(A-I)`, `b = decay*b`.
1022    /// Values < 1.0 cause re-exploration after provider quality changes. Default: 1.0 (no decay).
1023    #[serde(default = "default_bandit_decay_factor")]
1024    pub decay_factor: f32,
1025
1026    /// Provider name from `[[llm.providers]]` used for query embeddings.
1027    ///
1028    /// SLM recommended: prefer a fast local model (e.g. Ollama `nomic-embed-text`,
1029    /// Candle, or `text-embedding-3-small`) — this is called on every bandit request.
1030    /// Empty string disables `LinUCB` (bandit always falls back to Thompson/uniform).
1031    #[serde(default)]
1032    pub embedding_provider: ProviderName,
1033
1034    /// Hard timeout for the embedding call in milliseconds. Default: 50.
1035    /// If exceeded, the request falls back to Thompson/uniform selection.
1036    #[serde(default = "default_bandit_embedding_timeout_ms")]
1037    pub embedding_timeout_ms: u64,
1038
1039    /// Maximum cached embeddings (keyed by query text hash). Default: 512.
1040    #[serde(default = "default_bandit_cache_size")]
1041    pub cache_size: usize,
1042
1043    /// Path for persisting bandit state. Defaults to `~/.config/zeph/router_bandit_state.json`.
1044    ///
1045    /// # Security
1046    ///
1047    /// This path is user-controlled. The file is created with mode `0o600` on Unix.
1048    /// Do not place it in world-writable directories.
1049    #[serde(default)]
1050    pub state_path: Option<String>,
1051
1052    /// MAR (Memory-Augmented Routing) confidence threshold.
1053    ///
1054    /// When the top-1 semantic recall score for the current query is >= this value,
1055    /// the bandit biases toward cheaper providers (the answer is likely in memory).
1056    /// Set to 1.0 to disable MAR. Default: 0.9.
1057    #[serde(default = "default_bandit_memory_confidence_threshold")]
1058    pub memory_confidence_threshold: f32,
1059
1060    /// Minimum number of queries before `LinUCB` takes over from Thompson warmup.
1061    ///
1062    /// When unset or `0`, defaults to `10 × number of providers` (computed at startup).
1063    /// Set explicitly to control how long the bandit explores uniformly before
1064    /// switching to context-aware routing. Setting `0` preserves the computed default.
1065    #[serde(default)]
1066    pub warmup_queries: Option<u64>,
1067}
1068
1069fn default_bandit_memory_confidence_threshold() -> f32 {
1070    0.9
1071}
1072
1073impl Default for BanditConfig {
1074    fn default() -> Self {
1075        Self {
1076            alpha: default_bandit_alpha(),
1077            dim: default_bandit_dim(),
1078            cost_weight: default_bandit_cost_weight(),
1079            decay_factor: default_bandit_decay_factor(),
1080            embedding_provider: ProviderName::default(),
1081            embedding_timeout_ms: default_bandit_embedding_timeout_ms(),
1082            cache_size: default_bandit_cache_size(),
1083            state_path: None,
1084            memory_confidence_threshold: default_bandit_memory_confidence_threshold(),
1085            warmup_queries: None,
1086        }
1087    }
1088}
1089
1090#[derive(Debug, Deserialize, Serialize)]
1091pub struct CandleConfig {
1092    #[serde(default = "default_candle_source")]
1093    pub source: String,
1094    #[serde(default)]
1095    pub local_path: String,
1096    #[serde(default)]
1097    pub filename: Option<String>,
1098    #[serde(default = "default_chat_template")]
1099    pub chat_template: String,
1100    #[serde(default = "default_candle_device")]
1101    pub device: String,
1102    #[serde(default)]
1103    pub embedding_repo: Option<String>,
1104    /// Resolved `HuggingFace` Hub API token for authenticated model downloads.
1105    ///
1106    /// Must be the **token value** — resolved by the caller before constructing this config.
1107    #[serde(default)]
1108    pub hf_token: Option<String>,
1109    #[serde(default)]
1110    pub generation: GenerationParams,
1111    /// Maximum seconds to wait for each half of a single inference request.
1112    ///
1113    /// The timeout is applied **twice** per `chat()` call: once for the channel send
1114    /// (waiting for a free slot) and once for the oneshot reply (waiting for the worker
1115    /// to finish). The effective maximum wall-clock wait per request is therefore
1116    /// `2 × inference_timeout_secs`. CPU inference can be slow; 120s is a conservative
1117    /// default for large models, giving up to 240s total before an error is returned.
1118    /// Values of 0 are silently promoted to 1 at bootstrap.
1119    #[serde(default = "default_inference_timeout_secs")]
1120    pub inference_timeout_secs: u64,
1121}
1122
1123fn default_inference_timeout_secs() -> u64 {
1124    120
1125}
1126
1127/// Sampling / generation parameters for Candle local inference.
1128///
1129/// Used inside `[llm.candle.generation]` or a `[[llm.providers]]` Candle entry.
1130#[derive(Debug, Clone, Deserialize, Serialize)]
1131pub struct GenerationParams {
1132    /// Sampling temperature. Higher values produce more creative outputs. Default: `0.7`.
1133    #[serde(default = "default_temperature")]
1134    pub temperature: f64,
1135    /// Nucleus sampling threshold. When set, tokens with cumulative probability above
1136    /// this value are excluded. Default: `None` (disabled).
1137    #[serde(default)]
1138    pub top_p: Option<f64>,
1139    /// Top-k sampling. When set, only the top-k most probable tokens are considered.
1140    /// Default: `None` (disabled).
1141    #[serde(default)]
1142    pub top_k: Option<usize>,
1143    /// Maximum number of tokens to generate per response. Capped at [`MAX_TOKENS_CAP`].
1144    /// Default: `2048`.
1145    #[serde(default = "default_max_tokens")]
1146    pub max_tokens: usize,
1147    /// Random seed for reproducible outputs. Default: `42`.
1148    #[serde(default = "default_seed")]
1149    pub seed: u64,
1150    /// Repetition penalty applied during sampling. Default: `1.1`.
1151    #[serde(default = "default_repeat_penalty")]
1152    pub repeat_penalty: f32,
1153    /// Number of last tokens to consider for the repetition penalty window. Default: `64`.
1154    #[serde(default = "default_repeat_last_n")]
1155    pub repeat_last_n: usize,
1156}
1157
1158/// Hard upper bound on `GenerationParams::max_tokens` to prevent unbounded generation.
1159pub const MAX_TOKENS_CAP: usize = 32768;
1160
1161impl GenerationParams {
1162    /// Returns `max_tokens` clamped to [`MAX_TOKENS_CAP`].
1163    ///
1164    /// # Examples
1165    ///
1166    /// ```
1167    /// use zeph_config::GenerationParams;
1168    ///
1169    /// let params = GenerationParams::default();
1170    /// assert!(params.capped_max_tokens() <= 32768);
1171    /// ```
1172    #[must_use]
1173    pub fn capped_max_tokens(&self) -> usize {
1174        self.max_tokens.min(MAX_TOKENS_CAP)
1175    }
1176}
1177
1178impl Default for GenerationParams {
1179    fn default() -> Self {
1180        Self {
1181            temperature: default_temperature(),
1182            top_p: None,
1183            top_k: None,
1184            max_tokens: default_max_tokens(),
1185            seed: default_seed(),
1186            repeat_penalty: default_repeat_penalty(),
1187            repeat_last_n: default_repeat_last_n(),
1188        }
1189    }
1190}
1191
1192// ─── Unified config types ─────────────────────────────────────────────────────
1193
1194/// Routing strategy for the `[[llm.providers]]` pool.
1195#[non_exhaustive]
1196#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
1197#[serde(rename_all = "lowercase")]
1198pub enum LlmRoutingStrategy {
1199    /// Single provider or first-in-pool (default).
1200    #[default]
1201    None,
1202    /// Exponential moving average latency-aware ordering.
1203    Ema,
1204    /// Thompson Sampling with Beta distributions.
1205    Thompson,
1206    /// Cascade: try cheapest provider first, escalate on degenerate output.
1207    Cascade,
1208    /// Complexity triage routing: pre-classify each request, delegate to appropriate tier.
1209    Triage,
1210    /// PILOT: `LinUCB` contextual bandit with online learning and budget-aware reward.
1211    Bandit,
1212}
1213
1214fn default_triage_timeout_secs() -> u64 {
1215    5
1216}
1217
1218fn default_max_triage_tokens() -> u32 {
1219    50
1220}
1221
1222fn default_true() -> bool {
1223    true
1224}
1225
1226#[allow(clippy::trivially_copy_pass_by_ref)]
1227fn is_true(v: &bool) -> bool {
1228    *v
1229}
1230
1231/// Tier-to-provider name mapping for complexity routing.
1232#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1233pub struct TierMapping {
1234    pub simple: Option<String>,
1235    pub medium: Option<String>,
1236    pub complex: Option<String>,
1237    pub expert: Option<String>,
1238}
1239
1240/// Configuration for complexity-based triage routing (`routing = "triage"`).
1241///
1242/// When `[llm] routing = "triage"` is set, a cheap triage model classifies each request
1243/// and routes it to the appropriate tier provider. Requires at least one tier mapping.
1244///
1245/// # Example
1246///
1247/// ```toml
1248/// [llm]
1249/// routing = "triage"
1250///
1251/// [llm.complexity_routing]
1252/// triage_provider = "local-fast"
1253///
1254/// [llm.complexity_routing.tiers]
1255/// simple = "local-fast"
1256/// medium = "haiku"
1257/// complex = "sonnet"
1258/// expert = "opus"
1259/// ```
1260#[derive(Debug, Clone, Deserialize, Serialize)]
1261pub struct ComplexityRoutingConfig {
1262    /// Provider name from `[[llm.providers]]` used for triage classification.
1263    #[serde(default)]
1264    pub triage_provider: Option<ProviderName>,
1265
1266    /// Skip triage when all tiers map to the same provider.
1267    #[serde(default = "default_true")]
1268    pub bypass_single_provider: bool,
1269
1270    /// Tier-to-provider name mapping.
1271    #[serde(default)]
1272    pub tiers: TierMapping,
1273
1274    /// Max output tokens for the triage classification call. Default: 50.
1275    #[serde(default = "default_max_triage_tokens")]
1276    pub max_triage_tokens: u32,
1277
1278    /// Timeout in seconds for the triage classification call. Default: 5.
1279    /// On timeout, falls back to the default (first) tier provider.
1280    #[serde(default = "default_triage_timeout_secs")]
1281    pub triage_timeout_secs: u64,
1282
1283    /// Optional fallback strategy when triage misclassifies.
1284    /// Only `"cascade"` is currently supported (Phase 4).
1285    #[serde(default)]
1286    pub fallback_strategy: Option<String>,
1287}
1288
1289impl Default for ComplexityRoutingConfig {
1290    fn default() -> Self {
1291        Self {
1292            triage_provider: None,
1293            bypass_single_provider: true,
1294            tiers: TierMapping::default(),
1295            max_triage_tokens: default_max_triage_tokens(),
1296            triage_timeout_secs: default_triage_timeout_secs(),
1297            fallback_strategy: None,
1298        }
1299    }
1300}
1301
1302/// Configuration for the Collaborative Entropy (`CoE`) subsystem (`[llm.coe]` TOML section).
1303///
1304/// `CoE` detects uncertain responses from the primary provider and escalates to a
1305/// secondary provider when either the intra-entropy or inter-divergence signal crosses
1306/// its threshold. Only active for `RouterStrategy::Ema` and `RouterStrategy::Thompson`.
1307///
1308/// # Example
1309///
1310/// ```toml
1311/// [llm.coe]
1312/// enabled = true
1313/// intra_threshold = 0.8
1314/// inter_threshold = 0.20
1315/// shadow_sample_rate = 0.1
1316/// secondary_provider = "quality"
1317/// embedding_provider = ""
1318/// ```
1319#[derive(Debug, Clone, Deserialize, Serialize)]
1320#[serde(default)]
1321pub struct CoeConfig {
1322    /// Enable `CoE`. When `false`, the struct is ignored.
1323    pub enabled: bool,
1324    /// Mean negative log-prob threshold; responses above this trigger intra escalation.
1325    pub intra_threshold: f64,
1326    /// Divergence threshold in `[0.0, 1.0]`.
1327    pub inter_threshold: f64,
1328    /// Baseline rate at which secondary is called even when intra is low.
1329    pub shadow_sample_rate: f64,
1330    /// Provider name from `[[llm.providers]]` used as the escalation target.
1331    pub secondary_provider: ProviderName,
1332    /// Provider name for inter-divergence embeddings. Empty → inherit bandit's embedding provider.
1333    pub embedding_provider: ProviderName,
1334}
1335
1336impl Default for CoeConfig {
1337    fn default() -> Self {
1338        Self {
1339            enabled: false,
1340            intra_threshold: 0.8,
1341            inter_threshold: 0.20,
1342            shadow_sample_rate: 0.1,
1343            secondary_provider: ProviderName::default(),
1344            embedding_provider: ProviderName::default(),
1345        }
1346    }
1347}
1348
1349/// A single Gonka network node endpoint.
1350///
1351/// Used in `[[llm.providers]]` entries with `type = "gonka"` to declare
1352/// the node pool for blockchain inference routing.
1353#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
1354pub struct GonkaNode {
1355    /// HTTP(S) URL of the Gonka node (e.g. `"https://node1.gonka.ai"`).
1356    pub url: String,
1357    /// On-chain bech32 address of this node (e.g. `"gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"`).
1358    ///
1359    /// Required for signature construction: every signed request binds to the target node's
1360    /// on-chain address, making signatures non-replayable across different nodes.
1361    pub address: String,
1362    /// Optional human-readable label for `zeph gonka doctor` output.
1363    #[serde(default, skip_serializing_if = "Option::is_none")]
1364    pub name: Option<String>,
1365}
1366
1367/// Inline candle config for use inside `ProviderEntry`.
1368/// Re-uses the generation params from `CandleConfig`.
1369#[derive(Debug, Clone, Deserialize, Serialize)]
1370pub struct CandleInlineConfig {
1371    #[serde(default = "default_candle_source")]
1372    pub source: String,
1373    #[serde(default)]
1374    pub local_path: String,
1375    #[serde(default)]
1376    pub filename: Option<String>,
1377    #[serde(default = "default_chat_template")]
1378    pub chat_template: String,
1379    #[serde(default = "default_candle_device")]
1380    pub device: String,
1381    #[serde(default)]
1382    pub embedding_repo: Option<String>,
1383    /// Resolved `HuggingFace` Hub API token for authenticated model downloads.
1384    #[serde(default)]
1385    pub hf_token: Option<String>,
1386    #[serde(default)]
1387    pub generation: GenerationParams,
1388    /// Maximum wall-clock seconds to wait for a single inference request.
1389    ///
1390    /// Effective timeout is `2 × inference_timeout_secs` (send + recv each have this budget).
1391    /// CPU inference can be slow; 120s is a conservative default. Floored at 1s.
1392    #[serde(default = "default_inference_timeout_secs")]
1393    pub inference_timeout_secs: u64,
1394}
1395
1396impl Default for CandleInlineConfig {
1397    fn default() -> Self {
1398        Self {
1399            source: default_candle_source(),
1400            local_path: String::new(),
1401            filename: None,
1402            chat_template: default_chat_template(),
1403            device: default_candle_device(),
1404            embedding_repo: None,
1405            hf_token: None,
1406            generation: GenerationParams::default(),
1407            inference_timeout_secs: default_inference_timeout_secs(),
1408        }
1409    }
1410}
1411
1412/// Per-1K-token pricing for a Cocoon provider, in cents.
1413///
1414/// Cocoon model names (e.g. `Qwen/Qwen3-0.6B`) are not in the built-in pricing table.
1415/// When this struct is present in a provider entry, its values are registered with
1416/// `CostTracker` at startup so that token costs are tracked accurately.
1417///
1418/// Reasoning tokens (when the model uses chain-of-thought) are folded into
1419/// `completion_tokens` by the Cocoon sidecar and counted at the completion price.
1420#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
1421pub struct CocoonPricing {
1422    /// Prompt (input) token price in cents per 1K tokens.
1423    #[serde(default)]
1424    pub prompt_cents_per_1k: f64,
1425    /// Completion (output) token price in cents per 1K tokens.
1426    /// Reasoning tokens are counted here since the sidecar folds them into completion tokens.
1427    #[serde(default)]
1428    pub completion_cents_per_1k: f64,
1429}
1430
1431/// Unified provider entry: one struct replaces `CloudLlmConfig`, `OpenAiConfig`,
1432/// `GeminiConfig`, `OllamaConfig`, `CompatibleConfig`, and `OrchestratorProviderConfig`.
1433///
1434/// Provider-specific fields use `#[serde(default)]` and are ignored by backends
1435/// that do not use them (flat-union pattern).
1436#[derive(Debug, Clone, Deserialize, Serialize)]
1437#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
1438pub struct ProviderEntry {
1439    /// Required: provider backend type.
1440    #[serde(rename = "type")]
1441    pub provider_type: ProviderKind,
1442
1443    /// Optional name for multi-provider configs. Auto-generated from type if absent.
1444    #[serde(default)]
1445    pub name: Option<String>,
1446
1447    /// Model identifier. Required for most types.
1448    #[serde(default)]
1449    pub model: Option<String>,
1450
1451    /// API base URL. Each type has its own default.
1452    #[serde(default)]
1453    pub base_url: Option<String>,
1454
1455    /// Max output tokens.
1456    #[serde(default)]
1457    pub max_tokens: Option<u32>,
1458
1459    /// Embedding model. When set, this provider supports `embed()` calls.
1460    #[serde(default)]
1461    pub embedding_model: Option<String>,
1462
1463    /// STT model. When set, this provider supports speech-to-text via the Whisper API or
1464    /// Candle-local inference.
1465    #[serde(default)]
1466    pub stt_model: Option<String>,
1467
1468    /// Mark this entry as the embedding provider (handles `embed()` calls).
1469    #[serde(default)]
1470    pub embed: bool,
1471
1472    /// Mark this entry as the default chat provider (overrides position-based default).
1473    #[serde(default)]
1474    pub default: bool,
1475
1476    // --- Claude-specific ---
1477    #[serde(default)]
1478    pub thinking: Option<ThinkingConfig>,
1479    #[serde(default)]
1480    pub server_compaction: bool,
1481    #[serde(default)]
1482    pub enable_extended_context: bool,
1483    /// Prompt cache TTL variant. `None` keeps the default ~5-minute ephemeral TTL.
1484    /// Set to `"1h"` to enable the extended 1-hour TTL (beta, ~2× write cost).
1485    #[serde(default)]
1486    pub prompt_cache_ttl: Option<CacheTtl>,
1487
1488    // --- OpenAI-specific ---
1489    #[serde(default)]
1490    pub reasoning_effort: Option<String>,
1491
1492    // --- Gemini-specific ---
1493    #[serde(default)]
1494    pub thinking_level: Option<GeminiThinkingLevel>,
1495    #[serde(default)]
1496    pub thinking_budget: Option<i32>,
1497    #[serde(default)]
1498    pub include_thoughts: Option<bool>,
1499
1500    // --- Compatible-specific: optional inline api_key ---
1501    #[serde(default)]
1502    pub api_key: Option<String>,
1503
1504    // --- Candle-specific ---
1505    #[serde(default)]
1506    pub candle: Option<CandleInlineConfig>,
1507
1508    // --- Vision ---
1509    #[serde(default)]
1510    pub vision_model: Option<String>,
1511
1512    // --- Gonka-specific ---
1513    /// Gonka network node pool. Required (non-empty) when `type = "gonka"`.
1514    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1515    pub gonka_nodes: Vec<GonkaNode>,
1516    /// bech32 chain prefix for address encoding. Defaults to `"gonka"` when omitted.
1517    #[serde(default, skip_serializing_if = "Option::is_none")]
1518    pub gonka_chain_prefix: Option<String>,
1519
1520    // --- Cocoon-specific ---
1521    /// Cocoon sidecar HTTP URL. Defaults to `"http://localhost:10000"` when absent.
1522    #[serde(default, skip_serializing_if = "Option::is_none")]
1523    pub cocoon_client_url: Option<String>,
1524    /// Sentinel field for access hash. Leave empty in config; actual value
1525    /// is resolved from the age vault as `ZEPH_COCOON_ACCESS_HASH`.
1526    #[serde(default, skip_serializing_if = "Option::is_none")]
1527    pub cocoon_access_hash: Option<String>,
1528    /// Whether to perform a health check against `/stats` at provider construction time.
1529    #[serde(default = "default_true", skip_serializing_if = "is_true")]
1530    pub cocoon_health_check: bool,
1531    /// Manual per-1K-token pricing for this Cocoon provider.
1532    ///
1533    /// Cocoon model names (e.g. `Qwen/Qwen3-0.6B`) are not in the built-in pricing table.
1534    /// When this section is present, the values are registered with `CostTracker` at startup
1535    /// so that token costs are tracked accurately.
1536    ///
1537    /// Example TOML:
1538    /// ```toml
1539    /// [llm.providers.cocoon_pricing]
1540    /// prompt_cents_per_1k = 0.01
1541    /// completion_cents_per_1k = 0.03
1542    /// ```
1543    #[serde(default, skip_serializing_if = "Option::is_none")]
1544    pub cocoon_pricing: Option<CocoonPricing>,
1545
1546    /// Provider-specific instruction file.
1547    #[serde(default)]
1548    pub instruction_file: Option<std::path::PathBuf>,
1549
1550    /// Maximum concurrent LLM calls from orchestrated sub-agents to this provider.
1551    ///
1552    /// When set, `DagScheduler` acquires a semaphore permit before dispatching a
1553    /// sub-agent that targets this provider. Dispatch is deferred (using the existing
1554    /// `deferral_backoff` mechanism) when the semaphore is saturated.
1555    ///
1556    /// `None` (default) = unlimited — no admission control applied.
1557    ///
1558    /// # Example (TOML)
1559    ///
1560    /// ```toml
1561    /// [[llm.providers]]
1562    /// name = "quality"
1563    /// type = "openai"
1564    /// model = "gpt-5"
1565    /// max_concurrent = 3
1566    /// ```
1567    #[serde(default, skip_serializing_if = "Option::is_none")]
1568    pub max_concurrent: Option<u32>,
1569}
1570
1571impl Default for ProviderEntry {
1572    fn default() -> Self {
1573        Self {
1574            provider_type: ProviderKind::Ollama,
1575            name: None,
1576            model: None,
1577            base_url: None,
1578            max_tokens: None,
1579            embedding_model: None,
1580            stt_model: None,
1581            embed: false,
1582            default: false,
1583            thinking: None,
1584            server_compaction: false,
1585            enable_extended_context: false,
1586            prompt_cache_ttl: None,
1587            reasoning_effort: None,
1588            thinking_level: None,
1589            thinking_budget: None,
1590            include_thoughts: None,
1591            api_key: None,
1592            candle: None,
1593            vision_model: None,
1594            gonka_nodes: Vec::new(),
1595            gonka_chain_prefix: None,
1596            cocoon_client_url: None,
1597            cocoon_access_hash: None,
1598            cocoon_health_check: true,
1599            cocoon_pricing: None,
1600            instruction_file: None,
1601            max_concurrent: None,
1602        }
1603    }
1604}
1605
1606impl ProviderEntry {
1607    /// Resolve the effective name: explicit `name` field or type string.
1608    #[must_use]
1609    pub fn effective_name(&self) -> String {
1610        self.name
1611            .clone()
1612            .unwrap_or_else(|| self.provider_type.as_str().to_owned())
1613    }
1614
1615    /// Resolve the effective model: explicit `model` field or the provider-type default.
1616    ///
1617    /// Defaults mirror those used in `build_provider_from_entry` so that `runtime.model_name`
1618    /// always reflects the actual model being used rather than the provider type string.
1619    #[must_use]
1620    pub fn effective_model(&self) -> String {
1621        if let Some(ref m) = self.model {
1622            return m.clone();
1623        }
1624        match self.provider_type {
1625            ProviderKind::Ollama => "qwen3:8b".to_owned(),
1626            ProviderKind::Claude => "claude-haiku-4-5-20251001".to_owned(),
1627            ProviderKind::OpenAi => "gpt-4o-mini".to_owned(),
1628            ProviderKind::Gemini => "gemini-2.0-flash".to_owned(),
1629            // Compatible/Candle return empty because the model is resolved elsewhere.
1630            // Gonka returns empty because it is a blockchain provider, not an LLM — there is no model concept.
1631            ProviderKind::Compatible | ProviderKind::Candle | ProviderKind::Gonka => String::new(),
1632            ProviderKind::Cocoon => "Qwen/Qwen3-0.6B".to_owned(),
1633        }
1634    }
1635
1636    /// Validate this entry for cross-field consistency.
1637    ///
1638    /// # Errors
1639    ///
1640    /// Returns `ConfigError` when a fatal invariant is violated (e.g. compatible provider
1641    /// without a name).
1642    pub fn validate(&self) -> Result<(), crate::error::ConfigError> {
1643        use crate::error::ConfigError;
1644
1645        // B2: compatible provider MUST have name set.
1646        if self.provider_type == ProviderKind::Compatible && self.name.is_none() {
1647            return Err(ConfigError::Validation(
1648                "[[llm.providers]] entry with type=\"compatible\" must set `name`".into(),
1649            ));
1650        }
1651
1652        // B3: gonka provider MUST have name and valid gonka_nodes.
1653        if self.provider_type == ProviderKind::Gonka {
1654            if self.name.is_none() {
1655                return Err(ConfigError::Validation(
1656                    "[[llm.providers]] entry with type=\"gonka\" must set `name`".into(),
1657                ));
1658            }
1659            self.validate_gonka_nodes()?;
1660        }
1661
1662        // B4: cocoon provider MUST have a name.
1663        if self.provider_type == ProviderKind::Cocoon
1664            && self.name.as_ref().is_none_or(String::is_empty)
1665        {
1666            return Err(ConfigError::Validation(
1667                "[[llm.providers]] entry with type=\"cocoon\" must set `name`".into(),
1668            ));
1669        }
1670
1671        // B5: cocoon URL must be valid http/https; cocoon model must not be empty.
1672        if self.provider_type == ProviderKind::Cocoon {
1673            let name = self.effective_name();
1674            if let Some(ref url_str) = self.cocoon_client_url {
1675                match url::Url::parse(url_str) {
1676                    Err(_) => {
1677                        return Err(ConfigError::Validation(format!(
1678                            "[[llm.providers]] entry '{name}': cocoon_client_url \
1679                             '{url_str}' is not a valid URL; expected format: \
1680                             http://localhost:10000"
1681                        )));
1682                    }
1683                    Ok(u) if !matches!(u.host_str(), Some("localhost" | "127.0.0.1" | "::1")) => {
1684                        return Err(ConfigError::Validation(format!(
1685                            "[[llm.providers]] entry '{name}': cocoon_client_url host must be \
1686                             localhost or 127.0.0.1, got '{}'",
1687                            u.host_str().unwrap_or("<none>")
1688                        )));
1689                    }
1690                    Ok(u) if u.scheme() != "http" && u.scheme() != "https" => {
1691                        return Err(ConfigError::Validation(format!(
1692                            "[[llm.providers]] entry '{name}': cocoon_client_url \
1693                             scheme must be http or https, got '{}'",
1694                            u.scheme()
1695                        )));
1696                    }
1697                    _ => {}
1698                }
1699            }
1700            if self.model.as_deref().is_some_and(|m| m.trim().is_empty()) {
1701                return Err(ConfigError::Validation(format!(
1702                    "[[llm.providers]] entry '{name}': model must not be empty \
1703                     for cocoon provider"
1704                )));
1705            }
1706            if let Some(ref p) = self.cocoon_pricing {
1707                if !p.prompt_cents_per_1k.is_finite() || p.prompt_cents_per_1k < 0.0 {
1708                    return Err(ConfigError::Validation(format!(
1709                        "[[llm.providers]] entry '{name}': cocoon_pricing.prompt_cents_per_1k \
1710                         must be a finite non-negative number"
1711                    )));
1712                }
1713                if !p.completion_cents_per_1k.is_finite() || p.completion_cents_per_1k < 0.0 {
1714                    return Err(ConfigError::Validation(format!(
1715                        "[[llm.providers]] entry '{name}': \
1716                         cocoon_pricing.completion_cents_per_1k \
1717                         must be a finite non-negative number"
1718                    )));
1719                }
1720            }
1721        }
1722
1723        // B1: warn on irrelevant fields.
1724        self.warn_irrelevant_fields();
1725
1726        // W6: Candle STT-only provider (stt_model set, no model) is valid — no warning needed.
1727        // Warn if Ollama has stt_model set (Ollama does not support Whisper API).
1728        if self.stt_model.is_some() && self.provider_type == ProviderKind::Ollama {
1729            tracing::warn!(
1730                provider = self.effective_name(),
1731                "field `stt_model` is set on an Ollama provider; Ollama does not support the \
1732                 Whisper STT API — use OpenAI, compatible, or candle instead"
1733            );
1734        }
1735
1736        Ok(())
1737    }
1738
1739    /// Resolve the effective Gonka chain prefix: explicit value or `"gonka"` default.
1740    #[must_use]
1741    pub fn effective_gonka_chain_prefix(&self) -> &str {
1742        self.gonka_chain_prefix.as_deref().unwrap_or("gonka")
1743    }
1744
1745    fn warn_irrelevant_fields(&self) {
1746        let name = self.effective_name();
1747        match self.provider_type {
1748            ProviderKind::Ollama => {
1749                if self.thinking.is_some() {
1750                    tracing::warn!(
1751                        provider = name,
1752                        "field `thinking` is only used by Claude providers"
1753                    );
1754                }
1755                if self.reasoning_effort.is_some() {
1756                    tracing::warn!(
1757                        provider = name,
1758                        "field `reasoning_effort` is only used by OpenAI providers"
1759                    );
1760                }
1761                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1762                    tracing::warn!(
1763                        provider = name,
1764                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1765                    );
1766                }
1767            }
1768            ProviderKind::Claude => {
1769                if self.reasoning_effort.is_some() {
1770                    tracing::warn!(
1771                        provider = name,
1772                        "field `reasoning_effort` is only used by OpenAI providers"
1773                    );
1774                }
1775                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1776                    tracing::warn!(
1777                        provider = name,
1778                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1779                    );
1780                }
1781            }
1782            ProviderKind::OpenAi => {
1783                if self.thinking.is_some() {
1784                    tracing::warn!(
1785                        provider = name,
1786                        "field `thinking` is only used by Claude providers"
1787                    );
1788                }
1789                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1790                    tracing::warn!(
1791                        provider = name,
1792                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1793                    );
1794                }
1795            }
1796            ProviderKind::Gemini => {
1797                if self.thinking.is_some() {
1798                    tracing::warn!(
1799                        provider = name,
1800                        "field `thinking` is only used by Claude providers"
1801                    );
1802                }
1803                if self.reasoning_effort.is_some() {
1804                    tracing::warn!(
1805                        provider = name,
1806                        "field `reasoning_effort` is only used by OpenAI providers"
1807                    );
1808                }
1809            }
1810            ProviderKind::Gonka => {
1811                if self.thinking.is_some() {
1812                    tracing::warn!(
1813                        provider = name,
1814                        "field `thinking` is only used by Claude providers"
1815                    );
1816                }
1817                if self.reasoning_effort.is_some() {
1818                    tracing::warn!(
1819                        provider = name,
1820                        "field `reasoning_effort` is only used by OpenAI providers"
1821                    );
1822                }
1823                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1824                    tracing::warn!(
1825                        provider = name,
1826                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1827                    );
1828                }
1829            }
1830            ProviderKind::Compatible | ProviderKind::Candle => {}
1831            ProviderKind::Cocoon => {
1832                if self.base_url.is_some() {
1833                    tracing::warn!(
1834                        provider = name,
1835                        "field `base_url` is ignored for cocoon providers; use `cocoon_client_url` instead"
1836                    );
1837                }
1838            }
1839        }
1840    }
1841
1842    fn validate_gonka_nodes(&self) -> Result<(), crate::error::ConfigError> {
1843        use crate::error::ConfigError;
1844        if self.gonka_nodes.is_empty() {
1845            return Err(ConfigError::Validation(format!(
1846                "[[llm.providers]] entry '{}' with type=\"gonka\" must set non-empty `gonka_nodes`",
1847                self.effective_name()
1848            )));
1849        }
1850        for (i, node) in self.gonka_nodes.iter().enumerate() {
1851            if node.url.is_empty() {
1852                return Err(ConfigError::Validation(format!(
1853                    "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must not be empty",
1854                    self.effective_name()
1855                )));
1856            }
1857            if !node.url.starts_with("http://") && !node.url.starts_with("https://") {
1858                return Err(ConfigError::Validation(format!(
1859                    "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must start with http:// or https://",
1860                    self.effective_name()
1861                )));
1862            }
1863        }
1864        Ok(())
1865    }
1866}
1867
1868/// Per-session LLM generation override parameters persisted across restarts (#4654).
1869///
1870/// Phase 1 captures `reasoning_effort` only. Serialized to JSON and stored in the
1871/// `channel_preferences` table under `pref_key = "provider_overrides"`.
1872///
1873/// `#[serde(default)]` makes deserialization forward-compatible: a blob written by a newer
1874/// binary with additional fields is accepted, unknown fields are ignored, so the known params
1875/// still apply. (Issue #4654 originally specified `deny_unknown_fields`; this was intentionally
1876/// relaxed for forward compatibility — see PR and CHANGELOG.)
1877///
1878/// # Examples
1879///
1880/// ```
1881/// use zeph_config::ProviderOverrides;
1882///
1883/// let overrides = ProviderOverrides {
1884///     reasoning_effort: Some("high".to_owned()),
1885/// };
1886/// assert!(!overrides.is_empty());
1887///
1888/// let empty = ProviderOverrides::default();
1889/// assert!(empty.is_empty());
1890/// ```
1891#[derive(Debug, Default, Clone, PartialEq, Serialize, Deserialize)]
1892#[serde(default)]
1893pub struct ProviderOverrides {
1894    /// `OpenAI` reasoning effort: `"low"`, `"medium"`, or `"high"`. `None` = provider default.
1895    #[serde(skip_serializing_if = "Option::is_none")]
1896    pub reasoning_effort: Option<String>,
1897}
1898
1899impl ProviderOverrides {
1900    /// Returns `true` when no override is set.
1901    ///
1902    /// Used by the persistence layer to skip writing an empty blob.
1903    ///
1904    /// # Examples
1905    ///
1906    /// ```
1907    /// use zeph_config::ProviderOverrides;
1908    ///
1909    /// assert!(ProviderOverrides::default().is_empty());
1910    /// assert!(!ProviderOverrides { reasoning_effort: Some("low".into()) }.is_empty());
1911    /// ```
1912    #[must_use]
1913    pub fn is_empty(&self) -> bool {
1914        self.reasoning_effort.is_none()
1915    }
1916}
1917
1918/// Validate a pool of `ProviderEntry` items.
1919///
1920/// # Errors
1921///
1922/// Returns `ConfigError` for fatal validation failures:
1923/// - Empty pool
1924/// - Duplicate names
1925/// - Multiple entries marked `default = true`
1926/// - Individual entry validation errors
1927pub fn validate_pool(entries: &[ProviderEntry]) -> Result<(), crate::error::ConfigError> {
1928    use crate::error::ConfigError;
1929    use std::collections::HashSet;
1930
1931    if entries.is_empty() {
1932        return Err(ConfigError::Validation(
1933            "at least one LLM provider must be configured in [[llm.providers]]".into(),
1934        ));
1935    }
1936
1937    let default_count = entries.iter().filter(|e| e.default).count();
1938    if default_count > 1 {
1939        return Err(ConfigError::Validation(
1940            "only one [[llm.providers]] entry can be marked `default = true`".into(),
1941        ));
1942    }
1943
1944    let mut seen_names: HashSet<String> = HashSet::new();
1945    for entry in entries {
1946        let name = entry.effective_name();
1947        if !seen_names.insert(name.clone()) {
1948            return Err(ConfigError::Validation(format!(
1949                "duplicate provider name \"{name}\" in [[llm.providers]]"
1950            )));
1951        }
1952        entry.validate()?;
1953    }
1954
1955    Ok(())
1956}
1957
1958#[cfg(test)]
1959mod tests {
1960    use super::*;
1961
1962    fn ollama_entry() -> ProviderEntry {
1963        ProviderEntry {
1964            provider_type: ProviderKind::Ollama,
1965            name: Some("ollama".into()),
1966            model: Some("qwen3:8b".into()),
1967            ..Default::default()
1968        }
1969    }
1970
1971    fn claude_entry() -> ProviderEntry {
1972        ProviderEntry {
1973            provider_type: ProviderKind::Claude,
1974            name: Some("claude".into()),
1975            model: Some("claude-sonnet-4-6".into()),
1976            max_tokens: Some(8192),
1977            ..Default::default()
1978        }
1979    }
1980
1981    // ─── ProviderEntry::validate ─────────────────────────────────────────────
1982
1983    #[test]
1984    fn validate_ollama_valid() {
1985        assert!(ollama_entry().validate().is_ok());
1986    }
1987
1988    #[test]
1989    fn validate_claude_valid() {
1990        assert!(claude_entry().validate().is_ok());
1991    }
1992
1993    #[test]
1994    fn validate_compatible_without_name_errors() {
1995        let entry = ProviderEntry {
1996            provider_type: ProviderKind::Compatible,
1997            name: None,
1998            ..Default::default()
1999        };
2000        let err = entry.validate().unwrap_err();
2001        assert!(
2002            err.to_string().contains("compatible"),
2003            "error should mention compatible: {err}"
2004        );
2005    }
2006
2007    #[test]
2008    fn validate_compatible_with_name_ok() {
2009        let entry = ProviderEntry {
2010            provider_type: ProviderKind::Compatible,
2011            name: Some("my-proxy".into()),
2012            base_url: Some("http://localhost:8080".into()),
2013            model: Some("gpt-4o".into()),
2014            max_tokens: Some(4096),
2015            ..Default::default()
2016        };
2017        assert!(entry.validate().is_ok());
2018    }
2019
2020    #[test]
2021    fn validate_openai_valid() {
2022        let entry = ProviderEntry {
2023            provider_type: ProviderKind::OpenAi,
2024            name: Some("openai".into()),
2025            model: Some("gpt-4o".into()),
2026            max_tokens: Some(4096),
2027            ..Default::default()
2028        };
2029        assert!(entry.validate().is_ok());
2030    }
2031
2032    #[test]
2033    fn validate_gemini_valid() {
2034        let entry = ProviderEntry {
2035            provider_type: ProviderKind::Gemini,
2036            name: Some("gemini".into()),
2037            model: Some("gemini-2.0-flash".into()),
2038            ..Default::default()
2039        };
2040        assert!(entry.validate().is_ok());
2041    }
2042
2043    // ─── validate_pool ───────────────────────────────────────────────────────
2044
2045    #[test]
2046    fn validate_pool_empty_errors() {
2047        let err = validate_pool(&[]).unwrap_err();
2048        assert!(err.to_string().contains("at least one"), "{err}");
2049    }
2050
2051    #[test]
2052    fn validate_pool_single_entry_ok() {
2053        assert!(validate_pool(&[ollama_entry()]).is_ok());
2054    }
2055
2056    #[test]
2057    fn validate_pool_duplicate_names_errors() {
2058        let a = ollama_entry();
2059        let b = ollama_entry(); // same effective name "ollama"
2060        let err = validate_pool(&[a, b]).unwrap_err();
2061        assert!(err.to_string().contains("duplicate"), "{err}");
2062    }
2063
2064    #[test]
2065    fn validate_pool_multiple_defaults_errors() {
2066        let mut a = ollama_entry();
2067        let mut b = claude_entry();
2068        a.default = true;
2069        b.default = true;
2070        let err = validate_pool(&[a, b]).unwrap_err();
2071        assert!(err.to_string().contains("default"), "{err}");
2072    }
2073
2074    #[test]
2075    fn validate_pool_two_different_providers_ok() {
2076        assert!(validate_pool(&[ollama_entry(), claude_entry()]).is_ok());
2077    }
2078
2079    #[test]
2080    fn validate_pool_propagates_entry_error() {
2081        let bad = ProviderEntry {
2082            provider_type: ProviderKind::Compatible,
2083            name: None, // invalid: compatible without name
2084            ..Default::default()
2085        };
2086        assert!(validate_pool(&[bad]).is_err());
2087    }
2088
2089    // ─── ProviderEntry::effective_model ──────────────────────────────────────
2090
2091    #[test]
2092    fn effective_model_returns_explicit_when_set() {
2093        let entry = ProviderEntry {
2094            provider_type: ProviderKind::Claude,
2095            model: Some("claude-sonnet-4-6".into()),
2096            ..Default::default()
2097        };
2098        assert_eq!(entry.effective_model(), "claude-sonnet-4-6");
2099    }
2100
2101    #[test]
2102    fn effective_model_ollama_default_when_none() {
2103        let entry = ProviderEntry {
2104            provider_type: ProviderKind::Ollama,
2105            model: None,
2106            ..Default::default()
2107        };
2108        assert_eq!(entry.effective_model(), "qwen3:8b");
2109    }
2110
2111    #[test]
2112    fn effective_model_claude_default_when_none() {
2113        let entry = ProviderEntry {
2114            provider_type: ProviderKind::Claude,
2115            model: None,
2116            ..Default::default()
2117        };
2118        assert_eq!(entry.effective_model(), "claude-haiku-4-5-20251001");
2119    }
2120
2121    #[test]
2122    fn effective_model_openai_default_when_none() {
2123        let entry = ProviderEntry {
2124            provider_type: ProviderKind::OpenAi,
2125            model: None,
2126            ..Default::default()
2127        };
2128        assert_eq!(entry.effective_model(), "gpt-4o-mini");
2129    }
2130
2131    #[test]
2132    fn effective_model_gemini_default_when_none() {
2133        let entry = ProviderEntry {
2134            provider_type: ProviderKind::Gemini,
2135            model: None,
2136            ..Default::default()
2137        };
2138        assert_eq!(entry.effective_model(), "gemini-2.0-flash");
2139    }
2140
2141    // ─── LlmConfig::check_legacy_format ──────────────────────────────────────
2142
2143    // Parse a complete TOML snippet that includes the [llm] header.
2144    fn parse_llm(toml: &str) -> LlmConfig {
2145        #[derive(serde::Deserialize)]
2146        struct Wrapper {
2147            llm: LlmConfig,
2148        }
2149        toml::from_str::<Wrapper>(toml).unwrap().llm
2150    }
2151
2152    #[test]
2153    fn check_legacy_format_new_format_ok() {
2154        let cfg = parse_llm(
2155            r#"
2156[llm]
2157
2158[[llm.providers]]
2159type = "ollama"
2160model = "qwen3:8b"
2161"#,
2162        );
2163        assert!(cfg.check_legacy_format().is_ok());
2164    }
2165
2166    #[test]
2167    fn check_legacy_format_empty_providers_no_legacy_ok() {
2168        // No providers, no legacy fields — passes (empty [llm] is acceptable here)
2169        let cfg = parse_llm("[llm]\n");
2170        assert!(cfg.check_legacy_format().is_ok());
2171    }
2172
2173    // ─── LlmConfig::effective_* helpers ──────────────────────────────────────
2174
2175    #[test]
2176    fn effective_provider_falls_back_to_ollama_when_no_providers() {
2177        let cfg = parse_llm("[llm]\n");
2178        assert_eq!(cfg.effective_provider(), ProviderKind::Ollama);
2179    }
2180
2181    #[test]
2182    fn effective_provider_reads_from_providers_first() {
2183        let cfg = parse_llm(
2184            r#"
2185[llm]
2186
2187[[llm.providers]]
2188type = "claude"
2189model = "claude-sonnet-4-6"
2190"#,
2191        );
2192        assert_eq!(cfg.effective_provider(), ProviderKind::Claude);
2193    }
2194
2195    #[test]
2196    fn effective_model_reads_from_providers_first() {
2197        let cfg = parse_llm(
2198            r#"
2199[llm]
2200
2201[[llm.providers]]
2202type = "ollama"
2203model = "qwen3:8b"
2204"#,
2205        );
2206        assert_eq!(cfg.effective_model(), "qwen3:8b");
2207    }
2208
2209    #[test]
2210    fn effective_model_skips_embed_only_provider() {
2211        let cfg = parse_llm(
2212            r#"
2213[llm]
2214
2215[[llm.providers]]
2216type = "ollama"
2217model = "gemma4:26b"
2218embed = true
2219
2220[[llm.providers]]
2221type = "openai"
2222model = "gpt-4o-mini"
2223"#,
2224        );
2225        assert_eq!(cfg.effective_model(), "gpt-4o-mini");
2226    }
2227
2228    #[test]
2229    fn effective_base_url_default_when_absent() {
2230        let cfg = parse_llm("[llm]\n");
2231        assert_eq!(cfg.effective_base_url(), "http://localhost:11434");
2232    }
2233
2234    #[test]
2235    fn effective_base_url_from_providers_entry() {
2236        let cfg = parse_llm(
2237            r#"
2238[llm]
2239
2240[[llm.providers]]
2241type = "ollama"
2242base_url = "http://myhost:11434"
2243"#,
2244        );
2245        assert_eq!(cfg.effective_base_url(), "http://myhost:11434");
2246    }
2247
2248    // ─── ComplexityRoutingConfig / LlmRoutingStrategy::Triage TOML parsing ──
2249
2250    #[test]
2251    fn complexity_routing_defaults() {
2252        let cr = ComplexityRoutingConfig::default();
2253        assert!(
2254            cr.bypass_single_provider,
2255            "bypass_single_provider must default to true"
2256        );
2257        assert_eq!(cr.triage_timeout_secs, 5);
2258        assert_eq!(cr.max_triage_tokens, 50);
2259        assert!(cr.triage_provider.is_none());
2260        assert!(cr.tiers.simple.is_none());
2261    }
2262
2263    #[test]
2264    fn complexity_routing_toml_round_trip() {
2265        let cfg = parse_llm(
2266            r#"
2267[llm]
2268routing = "triage"
2269
2270[llm.complexity_routing]
2271triage_provider = "fast"
2272bypass_single_provider = false
2273triage_timeout_secs = 10
2274max_triage_tokens = 100
2275
2276[llm.complexity_routing.tiers]
2277simple = "fast"
2278medium = "medium"
2279complex = "large"
2280expert = "opus"
2281"#,
2282        );
2283        assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2284        let cr = cfg
2285            .complexity_routing
2286            .expect("complexity_routing must be present");
2287        assert_eq!(
2288            cr.triage_provider.as_ref().map(ProviderName::as_str),
2289            Some("fast")
2290        );
2291        assert!(!cr.bypass_single_provider);
2292        assert_eq!(cr.triage_timeout_secs, 10);
2293        assert_eq!(cr.max_triage_tokens, 100);
2294        assert_eq!(cr.tiers.simple.as_deref(), Some("fast"));
2295        assert_eq!(cr.tiers.medium.as_deref(), Some("medium"));
2296        assert_eq!(cr.tiers.complex.as_deref(), Some("large"));
2297        assert_eq!(cr.tiers.expert.as_deref(), Some("opus"));
2298    }
2299
2300    #[test]
2301    fn complexity_routing_partial_tiers_toml() {
2302        // Only simple + complex configured; medium and expert are None.
2303        let cfg = parse_llm(
2304            r#"
2305[llm]
2306routing = "triage"
2307
2308[llm.complexity_routing.tiers]
2309simple = "haiku"
2310complex = "sonnet"
2311"#,
2312        );
2313        let cr = cfg
2314            .complexity_routing
2315            .expect("complexity_routing must be present");
2316        assert_eq!(cr.tiers.simple.as_deref(), Some("haiku"));
2317        assert!(cr.tiers.medium.is_none());
2318        assert_eq!(cr.tiers.complex.as_deref(), Some("sonnet"));
2319        assert!(cr.tiers.expert.is_none());
2320        // Defaults still applied.
2321        assert!(cr.bypass_single_provider);
2322        assert_eq!(cr.triage_timeout_secs, 5);
2323    }
2324
2325    #[test]
2326    fn routing_strategy_triage_deserialized() {
2327        let cfg = parse_llm(
2328            r#"
2329[llm]
2330routing = "triage"
2331"#,
2332        );
2333        assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2334    }
2335
2336    // ─── stt_provider_entry ───────────────────────────────────────────────────
2337
2338    #[test]
2339    fn stt_provider_entry_by_name_match() {
2340        let cfg = parse_llm(
2341            r#"
2342[llm]
2343
2344[[llm.providers]]
2345type = "openai"
2346name = "quality"
2347model = "gpt-5.4"
2348stt_model = "gpt-4o-mini-transcribe"
2349
2350[llm.stt]
2351provider = "quality"
2352"#,
2353        );
2354        let entry = cfg.stt_provider_entry().expect("should find stt provider");
2355        assert_eq!(entry.effective_name(), "quality");
2356        assert_eq!(entry.stt_model.as_deref(), Some("gpt-4o-mini-transcribe"));
2357    }
2358
2359    #[test]
2360    fn stt_provider_entry_auto_detect_when_provider_empty() {
2361        let cfg = parse_llm(
2362            r#"
2363[llm]
2364
2365[[llm.providers]]
2366type = "openai"
2367name = "openai-stt"
2368stt_model = "whisper-1"
2369
2370[llm.stt]
2371provider = ""
2372"#,
2373        );
2374        let entry = cfg.stt_provider_entry().expect("should auto-detect");
2375        assert_eq!(entry.effective_name(), "openai-stt");
2376    }
2377
2378    #[test]
2379    fn stt_provider_entry_auto_detect_no_stt_section() {
2380        let cfg = parse_llm(
2381            r#"
2382[llm]
2383
2384[[llm.providers]]
2385type = "openai"
2386name = "openai-stt"
2387stt_model = "whisper-1"
2388"#,
2389        );
2390        // No [llm.stt] section — should still find first provider with stt_model.
2391        let entry = cfg.stt_provider_entry().expect("should auto-detect");
2392        assert_eq!(entry.effective_name(), "openai-stt");
2393    }
2394
2395    #[test]
2396    fn stt_provider_entry_none_when_no_stt_model() {
2397        let cfg = parse_llm(
2398            r#"
2399[llm]
2400
2401[[llm.providers]]
2402type = "openai"
2403name = "quality"
2404model = "gpt-5.4"
2405"#,
2406        );
2407        assert!(cfg.stt_provider_entry().is_none());
2408    }
2409
2410    #[test]
2411    fn stt_provider_entry_name_mismatch_falls_back_to_none() {
2412        // Named provider exists but has no stt_model; another unnamed has stt_model.
2413        let cfg = parse_llm(
2414            r#"
2415[llm]
2416
2417[[llm.providers]]
2418type = "openai"
2419name = "quality"
2420model = "gpt-5.4"
2421
2422[[llm.providers]]
2423type = "openai"
2424name = "openai-stt"
2425stt_model = "whisper-1"
2426
2427[llm.stt]
2428provider = "quality"
2429"#,
2430        );
2431        // "quality" has no stt_model — returns None for name-based lookup.
2432        assert!(cfg.stt_provider_entry().is_none());
2433    }
2434
2435    #[test]
2436    fn stt_config_deserializes_new_slim_format() {
2437        let cfg = parse_llm(
2438            r#"
2439[llm]
2440
2441[[llm.providers]]
2442type = "openai"
2443name = "quality"
2444stt_model = "whisper-1"
2445
2446[llm.stt]
2447provider = "quality"
2448language = "en"
2449"#,
2450        );
2451        let stt = cfg.stt.as_ref().expect("stt section present");
2452        assert_eq!(stt.provider, "quality");
2453        assert_eq!(stt.language, "en");
2454    }
2455
2456    #[test]
2457    fn stt_config_default_provider_is_empty() {
2458        // Verify that W4 fix: default_stt_provider() returns "" not "whisper".
2459        assert_eq!(default_stt_provider(), "");
2460    }
2461
2462    #[test]
2463    fn validate_stt_missing_provider_ok() {
2464        let cfg = parse_llm("[llm]\n");
2465        assert!(cfg.validate_stt().is_ok());
2466    }
2467
2468    #[test]
2469    fn validate_stt_valid_reference() {
2470        let cfg = parse_llm(
2471            r#"
2472[llm]
2473
2474[[llm.providers]]
2475type = "openai"
2476name = "quality"
2477stt_model = "whisper-1"
2478
2479[llm.stt]
2480provider = "quality"
2481"#,
2482        );
2483        assert!(cfg.validate_stt().is_ok());
2484    }
2485
2486    #[test]
2487    fn validate_stt_nonexistent_provider_errors() {
2488        let cfg = parse_llm(
2489            r#"
2490[llm]
2491
2492[[llm.providers]]
2493type = "openai"
2494name = "quality"
2495model = "gpt-5.4"
2496
2497[llm.stt]
2498provider = "nonexistent"
2499"#,
2500        );
2501        assert!(cfg.validate_stt().is_err());
2502    }
2503
2504    #[test]
2505    fn validate_stt_provider_exists_but_no_stt_model_returns_ok_with_warn() {
2506        // MEDIUM: provider is found but has no stt_model — should return Ok (warn path, not error).
2507        let cfg = parse_llm(
2508            r#"
2509[llm]
2510
2511[[llm.providers]]
2512type = "openai"
2513name = "quality"
2514model = "gpt-5.4"
2515
2516[llm.stt]
2517provider = "quality"
2518"#,
2519        );
2520        // validate_stt must succeed (only a tracing::warn is emitted — not an error).
2521        assert!(cfg.validate_stt().is_ok());
2522        // stt_provider_entry must return None because no stt_model is set.
2523        assert!(
2524            cfg.stt_provider_entry().is_none(),
2525            "stt_provider_entry must be None when provider has no stt_model"
2526        );
2527    }
2528
2529    // ─── BanditConfig::warmup_queries deserialization ─────────────────────────
2530
2531    #[test]
2532    fn bandit_warmup_queries_explicit_value_is_deserialized() {
2533        let cfg = parse_llm(
2534            r#"
2535[llm]
2536
2537[llm.router]
2538strategy = "bandit"
2539
2540[llm.router.bandit]
2541warmup_queries = 50
2542"#,
2543        );
2544        let bandit = cfg
2545            .router
2546            .expect("router section must be present")
2547            .bandit
2548            .expect("bandit section must be present");
2549        assert_eq!(
2550            bandit.warmup_queries,
2551            Some(50),
2552            "warmup_queries = 50 must deserialize to Some(50)"
2553        );
2554    }
2555
2556    #[test]
2557    fn bandit_warmup_queries_explicit_null_is_none() {
2558        // Explicitly writing the field as absent: field simply not present is
2559        // equivalent due to #[serde(default)]. Test that an explicit 0 is Some(0).
2560        let cfg = parse_llm(
2561            r#"
2562[llm]
2563
2564[llm.router]
2565strategy = "bandit"
2566
2567[llm.router.bandit]
2568warmup_queries = 0
2569"#,
2570        );
2571        let bandit = cfg
2572            .router
2573            .expect("router section must be present")
2574            .bandit
2575            .expect("bandit section must be present");
2576        // 0 is a valid explicit value — it means "preserve computed default".
2577        assert_eq!(
2578            bandit.warmup_queries,
2579            Some(0),
2580            "warmup_queries = 0 must deserialize to Some(0)"
2581        );
2582    }
2583
2584    #[test]
2585    fn bandit_warmup_queries_missing_field_defaults_to_none() {
2586        // When warmup_queries is omitted entirely, #[serde(default)] must produce None.
2587        let cfg = parse_llm(
2588            r#"
2589[llm]
2590
2591[llm.router]
2592strategy = "bandit"
2593
2594[llm.router.bandit]
2595alpha = 1.5
2596"#,
2597        );
2598        let bandit = cfg
2599            .router
2600            .expect("router section must be present")
2601            .bandit
2602            .expect("bandit section must be present");
2603        assert_eq!(
2604            bandit.warmup_queries, None,
2605            "omitted warmup_queries must default to None"
2606        );
2607    }
2608
2609    #[test]
2610    fn provider_name_new_and_as_str() {
2611        let n = ProviderName::new("fast");
2612        assert_eq!(n.as_str(), "fast");
2613        assert!(!n.is_empty());
2614    }
2615
2616    #[test]
2617    fn provider_name_default_is_empty() {
2618        let n = ProviderName::default();
2619        assert!(n.is_empty());
2620        assert_eq!(n.as_str(), "");
2621    }
2622
2623    #[test]
2624    fn provider_name_partial_eq_str() {
2625        let n = ProviderName::new("fast");
2626        assert_eq!(n, "fast");
2627        assert_ne!(n, "slow");
2628    }
2629
2630    #[test]
2631    fn provider_name_serde_roundtrip() {
2632        let n = ProviderName::new("my-provider");
2633        let json = serde_json::to_string(&n).expect("serialize");
2634        assert_eq!(json, "\"my-provider\"");
2635        let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2636        assert_eq!(back, n);
2637    }
2638
2639    #[test]
2640    fn provider_name_serde_empty_roundtrip() {
2641        let n = ProviderName::default();
2642        let json = serde_json::to_string(&n).expect("serialize");
2643        assert_eq!(json, "\"\"");
2644        let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2645        assert_eq!(back, n);
2646        assert!(back.is_empty());
2647    }
2648
2649    // ─── GonkaNode / ProviderKind::Gonka ─────────────────────────────────────
2650
2651    fn gonka_entry_with_nodes(nodes: Vec<GonkaNode>) -> ProviderEntry {
2652        ProviderEntry {
2653            provider_type: ProviderKind::Gonka,
2654            name: Some("my-gonka".into()),
2655            gonka_nodes: nodes,
2656            ..Default::default()
2657        }
2658    }
2659
2660    fn valid_gonka_nodes() -> Vec<GonkaNode> {
2661        vec![
2662            GonkaNode {
2663                url: "https://node1.gonka.ai".into(),
2664                address: "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6".into(),
2665                name: Some("node1".into()),
2666            },
2667            GonkaNode {
2668                url: "https://node2.gonka.ai".into(),
2669                address: "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum".into(),
2670                name: Some("node2".into()),
2671            },
2672            GonkaNode {
2673                url: "http://node3.internal".into(),
2674                address: "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg".into(),
2675                name: None,
2676            },
2677        ]
2678    }
2679
2680    #[test]
2681    fn validate_gonka_valid() {
2682        let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2683        assert!(entry.validate().is_ok());
2684    }
2685
2686    #[test]
2687    fn validate_gonka_empty_nodes_errors() {
2688        let entry = gonka_entry_with_nodes(vec![]);
2689        let err = entry.validate().unwrap_err();
2690        assert!(
2691            err.to_string().contains("gonka_nodes"),
2692            "error should mention gonka_nodes: {err}"
2693        );
2694    }
2695
2696    #[test]
2697    fn validate_gonka_node_empty_url_errors() {
2698        let entry = gonka_entry_with_nodes(vec![GonkaNode {
2699            url: String::new(),
2700            address: "gonka1test".into(),
2701            name: None,
2702        }]);
2703        let err = entry.validate().unwrap_err();
2704        assert!(err.to_string().contains("url"), "{err}");
2705    }
2706
2707    #[test]
2708    fn validate_gonka_node_invalid_scheme_errors() {
2709        let entry = gonka_entry_with_nodes(vec![GonkaNode {
2710            url: "ftp://node.gonka.ai".into(),
2711            address: "gonka1test".into(),
2712            name: None,
2713        }]);
2714        let err = entry.validate().unwrap_err();
2715        assert!(err.to_string().contains("http"), "{err}");
2716    }
2717
2718    #[test]
2719    fn validate_gonka_without_name_errors() {
2720        let entry = ProviderEntry {
2721            provider_type: ProviderKind::Gonka,
2722            name: None,
2723            gonka_nodes: valid_gonka_nodes(),
2724            ..Default::default()
2725        };
2726        let err = entry.validate().unwrap_err();
2727        assert!(err.to_string().contains("gonka"), "{err}");
2728    }
2729
2730    #[test]
2731    fn gonka_toml_round_trip() {
2732        let toml = r#"
2733[llm]
2734
2735[[llm.providers]]
2736type = "gonka"
2737name = "my-gonka"
2738gonka_chain_prefix = "custom-chain"
2739
2740[[llm.providers.gonka_nodes]]
2741url = "https://node1.gonka.ai"
2742address = "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2743name = "node1"
2744
2745[[llm.providers.gonka_nodes]]
2746url = "https://node2.gonka.ai"
2747address = "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum"
2748name = "node2"
2749
2750[[llm.providers.gonka_nodes]]
2751url = "https://node3.gonka.ai"
2752address = "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg"
2753"#;
2754        let cfg = parse_llm(toml);
2755        assert_eq!(cfg.providers.len(), 1);
2756        let entry = &cfg.providers[0];
2757        assert_eq!(entry.provider_type, ProviderKind::Gonka);
2758        assert_eq!(entry.name.as_deref(), Some("my-gonka"));
2759        let nodes = &entry.gonka_nodes;
2760        assert_eq!(nodes.len(), 3);
2761        assert_eq!(nodes[0].url, "https://node1.gonka.ai");
2762        assert_eq!(
2763            nodes[0].address,
2764            "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2765        );
2766        assert_eq!(nodes[0].name.as_deref(), Some("node1"));
2767        assert_eq!(nodes[2].name, None);
2768        assert_eq!(entry.gonka_chain_prefix.as_deref(), Some("custom-chain"));
2769    }
2770
2771    #[test]
2772    fn gonka_default_chain_prefix() {
2773        let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2774        assert_eq!(entry.effective_gonka_chain_prefix(), "gonka");
2775    }
2776
2777    #[test]
2778    fn gonka_explicit_chain_prefix() {
2779        let entry = ProviderEntry {
2780            provider_type: ProviderKind::Gonka,
2781            name: Some("my-gonka".into()),
2782            gonka_nodes: valid_gonka_nodes(),
2783            gonka_chain_prefix: Some("my-chain".into()),
2784            ..Default::default()
2785        };
2786        assert_eq!(entry.effective_gonka_chain_prefix(), "my-chain");
2787    }
2788
2789    #[test]
2790    fn effective_model_gonka_is_empty() {
2791        let entry = ProviderEntry {
2792            provider_type: ProviderKind::Gonka,
2793            model: None,
2794            ..Default::default()
2795        };
2796        assert_eq!(entry.effective_model(), "");
2797    }
2798
2799    #[test]
2800    fn existing_configs_still_parse() {
2801        let toml = r#"
2802[llm]
2803
2804[[llm.providers]]
2805type = "ollama"
2806model = "qwen3:8b"
2807
2808[[llm.providers]]
2809type = "claude"
2810name = "claude"
2811model = "claude-sonnet-4-6"
2812"#;
2813        let cfg = parse_llm(toml);
2814        assert_eq!(cfg.providers.len(), 2);
2815        assert_eq!(cfg.providers[0].provider_type, ProviderKind::Ollama);
2816        assert_eq!(cfg.providers[1].provider_type, ProviderKind::Claude);
2817    }
2818
2819    // ── ProviderEntry::validate — Cocoon URL and model validation ─────────────
2820
2821    fn cocoon_entry(url: Option<&str>, model: Option<&str>) -> ProviderEntry {
2822        ProviderEntry {
2823            provider_type: ProviderKind::Cocoon,
2824            name: Some("cocoon".into()),
2825            cocoon_client_url: url.map(str::to_owned),
2826            model: model.map(str::to_owned),
2827            ..Default::default()
2828        }
2829    }
2830
2831    #[test]
2832    fn test_cocoon_url_validation_accepts_http() {
2833        assert!(
2834            cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2835                .validate()
2836                .is_ok()
2837        );
2838    }
2839
2840    #[test]
2841    fn test_cocoon_url_validation_accepts_https_localhost() {
2842        assert!(
2843            cocoon_entry(Some("https://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2844                .validate()
2845                .is_ok()
2846        );
2847    }
2848
2849    #[test]
2850    fn test_cocoon_url_validation_rejects_non_localhost() {
2851        let err = cocoon_entry(Some("http://192.168.1.10:10000"), Some("Qwen/Qwen3-0.6B"))
2852            .validate()
2853            .unwrap_err();
2854        assert!(
2855            err.to_string().contains("localhost"),
2856            "error should mention localhost restriction: {err}"
2857        );
2858    }
2859
2860    #[test]
2861    fn test_cocoon_url_validation_rejects_non_http_scheme() {
2862        let err = cocoon_entry(Some("ftp://localhost"), Some("Qwen/Qwen3-0.6B"))
2863            .validate()
2864            .unwrap_err();
2865        assert!(
2866            err.to_string().contains("ftp"),
2867            "error should mention the bad scheme: {err}"
2868        );
2869    }
2870
2871    #[test]
2872    fn test_cocoon_url_validation_rejects_invalid_url() {
2873        let err = cocoon_entry(Some("not-a-url"), Some("Qwen/Qwen3-0.6B"))
2874            .validate()
2875            .unwrap_err();
2876        assert!(
2877            err.to_string().contains("not-a-url"),
2878            "error should mention the bad value: {err}"
2879        );
2880    }
2881
2882    #[test]
2883    fn test_cocoon_url_none_passes() {
2884        assert!(
2885            cocoon_entry(None, Some("Qwen/Qwen3-0.6B"))
2886                .validate()
2887                .is_ok()
2888        );
2889    }
2890
2891    #[test]
2892    fn test_cocoon_model_empty_rejected() {
2893        let err = cocoon_entry(Some("http://localhost:10000"), Some(""))
2894            .validate()
2895            .unwrap_err();
2896        assert!(
2897            err.to_string().contains("empty"),
2898            "error should mention 'empty': {err}"
2899        );
2900    }
2901
2902    #[test]
2903    fn test_cocoon_model_none_passes() {
2904        assert!(
2905            cocoon_entry(Some("http://localhost:10000"), None)
2906                .validate()
2907                .is_ok()
2908        );
2909    }
2910
2911    #[test]
2912    fn validate_cocoon_pricing_negative_prompt_errors() {
2913        let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2914        e.cocoon_pricing = Some(CocoonPricing {
2915            prompt_cents_per_1k: -1.0,
2916            completion_cents_per_1k: 0.03,
2917        });
2918        assert!(e.validate().is_err());
2919    }
2920
2921    #[test]
2922    fn validate_cocoon_pricing_negative_completion_errors() {
2923        let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2924        e.cocoon_pricing = Some(CocoonPricing {
2925            prompt_cents_per_1k: 0.01,
2926            completion_cents_per_1k: -0.5,
2927        });
2928        assert!(e.validate().is_err());
2929    }
2930
2931    #[test]
2932    fn validate_cocoon_pricing_valid_passes() {
2933        let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2934        e.cocoon_pricing = Some(CocoonPricing {
2935            prompt_cents_per_1k: 0.01,
2936            completion_cents_per_1k: 0.03,
2937        });
2938        assert!(e.validate().is_ok());
2939    }
2940}