zeph_config/
providers.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::fmt;
5
6use serde::{Deserialize, Serialize};
7
8// ── LLM provider config types (moved from zeph-llm) ─────────────────────────
9
10/// Extended or adaptive thinking mode for Claude.
11///
12/// Serializes with `mode` as tag:
13/// `{ "mode": "extended", "budget_tokens": 10000 }` or `{ "mode": "adaptive" }`.
14#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
15#[serde(tag = "mode", rename_all = "snake_case")]
16pub enum ThinkingConfig {
17    /// Extended thinking with an explicit token budget.
18    Extended {
19        /// Maximum thinking tokens to allocate.
20        budget_tokens: u32,
21    },
22    /// Adaptive thinking that selects effort automatically.
23    Adaptive {
24        /// Explicit effort hint when provided; model-chosen when `None`.
25        #[serde(default, skip_serializing_if = "Option::is_none")]
26        effort: Option<ThinkingEffort>,
27    },
28}
29
30/// Effort level for adaptive thinking.
31#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
32#[serde(rename_all = "lowercase")]
33pub enum ThinkingEffort {
34    /// Minimal thinking; fastest responses.
35    Low,
36    /// Balanced thinking depth. This is the default.
37    #[default]
38    Medium,
39    /// Maximum thinking depth; slowest responses.
40    High,
41}
42
43/// Prompt-cache TTL variant for the Anthropic API.
44///
45/// When used as a TOML config value the accepted strings are `"ephemeral"` and `"1h"`.
46/// On the wire (Anthropic API), `OneHour` serializes as `"1h"` inside the `cache_control.ttl`
47/// field.
48#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Default)]
49#[serde(rename_all = "snake_case")]
50pub enum CacheTtl {
51    /// Default ephemeral TTL (~5 minutes). No beta header required.
52    #[default]
53    Ephemeral,
54    /// Extended 1-hour TTL. Requires the `extended-cache-ttl-2025-04-25` beta header.
55    /// Cache writes cost approximately 2× more than `Ephemeral`.
56    #[serde(rename = "1h")]
57    OneHour,
58}
59
60impl CacheTtl {
61    /// Returns `true` when this TTL variant requires the `extended-cache-ttl-2025-04-25` beta
62    /// header to be sent with each request.
63    #[must_use]
64    pub fn requires_beta(self) -> bool {
65        match self {
66            Self::OneHour => true,
67            Self::Ephemeral => false,
68        }
69    }
70}
71
72/// Thinking level for Gemini models that support extended reasoning.
73///
74/// Maps to `generationConfig.thinkingConfig.thinkingLevel` in the Gemini API.
75/// Valid for Gemini 3+ models. For Gemini 2.5, use `thinking_budget` instead.
76#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
77#[serde(rename_all = "lowercase")]
78pub enum GeminiThinkingLevel {
79    /// Minimal reasoning pass.
80    Minimal,
81    /// Low reasoning depth.
82    Low,
83    /// Medium reasoning depth.
84    Medium,
85    /// Full reasoning depth.
86    High,
87}
88
89/// Newtype wrapper for a provider name referencing an entry in `[[llm.providers]]`.
90///
91/// Using a dedicated type instead of bare `String` makes provider cross-references
92/// explicit in the type system and enables validation at config load time.
93///
94/// # Note
95///
96/// `zeph-common` now defines a canonical `ProviderName(Arc<str>)` newtype. This
97/// config-local type uses `String` and exists for backward compat within `zeph-config`.
98///
99/// TODO(critic): migrate to `zeph_common::ProviderName` once `zeph-config` → `zeph-common`
100/// dependency inversion (A-1) lands.
101#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
102#[serde(transparent)]
103pub struct ProviderName(String);
104
105impl ProviderName {
106    /// Create a new `ProviderName` from any string-like value.
107    ///
108    /// An empty string is a sentinel meaning "use the primary provider" and is the
109    /// default value. Check [`is_empty`](Self::is_empty) before using in routing.
110    ///
111    /// # Examples
112    ///
113    /// ```
114    /// use zeph_config::providers::ProviderName;
115    ///
116    /// let name = ProviderName::new("fast");
117    /// assert_eq!(name.as_str(), "fast");
118    /// ```
119    #[must_use]
120    pub fn new(name: impl Into<String>) -> Self {
121        Self(name.into())
122    }
123
124    /// Return `true` when this is the empty sentinel (use primary provider).
125    ///
126    /// # Examples
127    ///
128    /// ```
129    /// use zeph_config::providers::ProviderName;
130    ///
131    /// assert!(ProviderName::default().is_empty());
132    /// assert!(!ProviderName::new("fast").is_empty());
133    /// ```
134    #[must_use]
135    pub fn is_empty(&self) -> bool {
136        self.0.is_empty()
137    }
138
139    /// Return the inner string slice.
140    ///
141    /// # Examples
142    ///
143    /// ```
144    /// use zeph_config::providers::ProviderName;
145    ///
146    /// let name = ProviderName::new("quality");
147    /// assert_eq!(name.as_str(), "quality");
148    /// ```
149    #[must_use]
150    pub fn as_str(&self) -> &str {
151        &self.0
152    }
153
154    /// Return `Some(&str)` when non-empty, `None` for the empty sentinel.
155    ///
156    /// Bridges `Option<ProviderName>` fields and the legacy
157    /// `.as_deref().filter(|s| !s.is_empty())` pattern.
158    ///
159    /// # Examples
160    ///
161    /// ```
162    /// use zeph_config::providers::ProviderName;
163    ///
164    /// assert_eq!(ProviderName::default().as_non_empty(), None);
165    /// assert_eq!(ProviderName::new("fast").as_non_empty(), Some("fast"));
166    /// ```
167    #[must_use]
168    pub fn as_non_empty(&self) -> Option<&str> {
169        if self.0.is_empty() {
170            None
171        } else {
172            Some(&self.0)
173        }
174    }
175}
176
177impl fmt::Display for ProviderName {
178    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179        self.0.fmt(f)
180    }
181}
182
183impl AsRef<str> for ProviderName {
184    fn as_ref(&self) -> &str {
185        &self.0
186    }
187}
188
189impl std::ops::Deref for ProviderName {
190    type Target = str;
191
192    fn deref(&self) -> &str {
193        &self.0
194    }
195}
196
197impl PartialEq<str> for ProviderName {
198    fn eq(&self, other: &str) -> bool {
199        self.0 == other
200    }
201}
202
203impl PartialEq<&str> for ProviderName {
204    fn eq(&self, other: &&str) -> bool {
205        self.0 == *other
206    }
207}
208
209fn default_response_cache_ttl_secs() -> u64 {
210    3600
211}
212
213fn default_semantic_cache_threshold() -> f32 {
214    0.95
215}
216
217fn default_semantic_cache_max_candidates() -> u32 {
218    10
219}
220
221fn default_router_ema_alpha() -> f64 {
222    0.1
223}
224
225fn default_router_reorder_interval() -> u64 {
226    10
227}
228
229fn default_embedding_model() -> String {
230    "qwen3-embedding".into()
231}
232
233fn default_candle_source() -> String {
234    "huggingface".into()
235}
236
237fn default_chat_template() -> String {
238    "chatml".into()
239}
240
241fn default_candle_device() -> String {
242    "cpu".into()
243}
244
245fn default_temperature() -> f64 {
246    0.7
247}
248
249fn default_max_tokens() -> usize {
250    2048
251}
252
253fn default_seed() -> u64 {
254    42
255}
256
257fn default_repeat_penalty() -> f32 {
258    1.1
259}
260
261fn default_repeat_last_n() -> usize {
262    64
263}
264
265fn default_cascade_quality_threshold() -> f64 {
266    0.5
267}
268
269fn default_cascade_max_escalations() -> u8 {
270    2
271}
272
273fn default_cascade_window_size() -> usize {
274    50
275}
276
277fn default_reputation_decay_factor() -> f64 {
278    0.95
279}
280
281fn default_reputation_weight() -> f64 {
282    0.3
283}
284
285fn default_reputation_min_observations() -> u64 {
286    5
287}
288
289/// Returns the default STT provider name (empty string — auto-detect).
290#[must_use]
291pub fn default_stt_provider() -> String {
292    String::new()
293}
294
295/// Returns the default STT transcription language hint (`"auto"`).
296#[must_use]
297pub fn default_stt_language() -> String {
298    "auto".into()
299}
300
301/// Returns the default embedding model name used by `[llm] embedding_model`.
302#[must_use]
303pub fn get_default_embedding_model() -> String {
304    default_embedding_model()
305}
306
307/// Returns the default response cache TTL in seconds.
308#[must_use]
309pub fn get_default_response_cache_ttl_secs() -> u64 {
310    default_response_cache_ttl_secs()
311}
312
313/// Returns the default EMA alpha for the router latency estimator.
314#[must_use]
315pub fn get_default_router_ema_alpha() -> f64 {
316    default_router_ema_alpha()
317}
318
319/// Returns the default router reorder interval (turns between provider re-ranking).
320#[must_use]
321pub fn get_default_router_reorder_interval() -> u64 {
322    default_router_reorder_interval()
323}
324
325/// LLM provider backend selector.
326///
327/// Used in `[[llm.providers]]` entries as the `type` field.
328///
329/// # Example (TOML)
330///
331/// ```toml
332/// [[llm.providers]]
333/// type = "openai"
334/// model = "gpt-4o"
335/// name = "quality"
336/// ```
337#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
338#[serde(rename_all = "lowercase")]
339pub enum ProviderKind {
340    /// Local Ollama server (default base URL: `http://localhost:11434`).
341    Ollama,
342    /// Anthropic Claude API.
343    Claude,
344    /// `OpenAI` API.
345    OpenAi,
346    /// Google Gemini API.
347    Gemini,
348    /// Local Candle inference (CPU/GPU, no external server required).
349    Candle,
350    /// OpenAI-compatible third-party API (e.g. Groq, Together AI, LM Studio).
351    Compatible,
352    /// Native Gonka blockchain provider.
353    Gonka,
354    /// Cocoon confidential compute network via localhost sidecar.
355    Cocoon,
356}
357
358impl ProviderKind {
359    /// Return the lowercase string identifier for this provider kind.
360    ///
361    /// # Examples
362    ///
363    /// ```
364    /// use zeph_config::ProviderKind;
365    ///
366    /// assert_eq!(ProviderKind::Claude.as_str(), "claude");
367    /// assert_eq!(ProviderKind::OpenAi.as_str(), "openai");
368    /// ```
369    #[must_use]
370    pub fn as_str(self) -> &'static str {
371        match self {
372            Self::Ollama => "ollama",
373            Self::Claude => "claude",
374            Self::OpenAi => "openai",
375            Self::Gemini => "gemini",
376            Self::Candle => "candle",
377            Self::Compatible => "compatible",
378            Self::Gonka => "gonka",
379            Self::Cocoon => "cocoon",
380        }
381    }
382}
383
384impl std::fmt::Display for ProviderKind {
385    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
386        f.write_str(self.as_str())
387    }
388}
389
390/// LLM configuration, nested under `[llm]` in TOML.
391///
392/// Declares the provider pool and controls routing, embedding, caching, and STT.
393/// All providers are declared in `[[llm.providers]]`; subsystems reference them by
394/// the `name` field using a `*_provider` config key.
395///
396/// # Example (TOML)
397///
398/// ```toml
399/// [[llm.providers]]
400/// name = "fast"
401/// type = "openai"
402/// model = "gpt-4o-mini"
403///
404/// [[llm.providers]]
405/// name = "quality"
406/// type = "claude"
407/// model = "claude-opus-4-5"
408///
409/// [llm]
410/// routing = "none"
411/// embedding_model = "qwen3-embedding"
412/// ```
413#[derive(Debug, Deserialize, Serialize)]
414pub struct LlmConfig {
415    /// Provider pool. First entry is default unless one is marked `default = true`.
416    #[serde(default, skip_serializing_if = "Vec::is_empty")]
417    pub providers: Vec<ProviderEntry>,
418
419    /// Routing strategy for multi-provider configs.
420    #[serde(default, skip_serializing_if = "is_routing_none")]
421    pub routing: LlmRoutingStrategy,
422
423    #[serde(default = "default_embedding_model_opt")]
424    pub embedding_model: String,
425    #[serde(default, skip_serializing_if = "Option::is_none")]
426    pub candle: Option<CandleConfig>,
427    #[serde(default)]
428    pub stt: Option<SttConfig>,
429    #[serde(default)]
430    pub response_cache_enabled: bool,
431    #[serde(default = "default_response_cache_ttl_secs")]
432    pub response_cache_ttl_secs: u64,
433    /// Enable semantic similarity-based response caching. Requires embedding support.
434    #[serde(default)]
435    pub semantic_cache_enabled: bool,
436    /// Cosine similarity threshold for semantic cache hits (0.0–1.0).
437    ///
438    /// Only the highest-scoring candidate above this threshold is returned.
439    /// Lower values produce more cache hits but risk returning less relevant responses.
440    /// Recommended range: 0.92–0.98; default: 0.95.
441    #[serde(default = "default_semantic_cache_threshold")]
442    pub semantic_cache_threshold: f32,
443    /// Maximum cached entries to examine per semantic lookup (SQL `LIMIT` clause in
444    /// `ResponseCache::get_semantic()`). Controls the recall-vs-performance tradeoff:
445    ///
446    /// - **Higher values** (e.g. 50): scan more entries, better chance of finding a
447    ///   semantically similar cached response, but slower queries.
448    /// - **Lower values** (e.g. 5): faster queries, but may miss relevant cached entries
449    ///   when the cache is large.
450    /// - **Default (10)**: balanced middle ground for typical workloads.
451    ///
452    /// Tuning guidance: set to 50+ when recall matters more than latency (e.g. long-running
453    /// sessions with many cached responses); reduce to 5 for low-latency interactive use.
454    /// Env override: `ZEPH_LLM_SEMANTIC_CACHE_MAX_CANDIDATES`.
455    #[serde(default = "default_semantic_cache_max_candidates")]
456    pub semantic_cache_max_candidates: u32,
457    #[serde(default)]
458    pub router_ema_enabled: bool,
459    #[serde(default = "default_router_ema_alpha")]
460    pub router_ema_alpha: f64,
461    #[serde(default = "default_router_reorder_interval")]
462    pub router_reorder_interval: u64,
463    /// Routing configuration for Thompson/Cascade strategies.
464    #[serde(default, skip_serializing_if = "Option::is_none")]
465    pub router: Option<RouterConfig>,
466    /// Provider-specific instruction file to inject into the system prompt.
467    /// Merged with `agent.instruction_files` at startup.
468    #[serde(default, skip_serializing_if = "Option::is_none")]
469    pub instruction_file: Option<std::path::PathBuf>,
470    /// Shorthand model spec for tool-pair summarization and context compaction.
471    /// Format: `ollama/<model>`, `claude[/<model>]`, `openai[/<model>]`, `compatible/<name>`, `candle`.
472    /// Ignored when `[llm.summary_provider]` is set.
473    #[serde(default, skip_serializing_if = "Option::is_none")]
474    pub summary_model: Option<String>,
475    /// Structured provider config for summarization. Takes precedence over `summary_model`.
476    #[serde(default, skip_serializing_if = "Option::is_none")]
477    pub summary_provider: Option<ProviderEntry>,
478
479    /// Complexity triage routing configuration. Required when `routing = "triage"`.
480    #[serde(default, skip_serializing_if = "Option::is_none")]
481    pub complexity_routing: Option<ComplexityRoutingConfig>,
482
483    /// Collaborative Entropy (`CoE`) configuration. `None` = `CoE` disabled.
484    #[serde(default, skip_serializing_if = "Option::is_none")]
485    pub coe: Option<CoeConfig>,
486}
487
488fn default_embedding_model_opt() -> String {
489    default_embedding_model()
490}
491
492#[allow(clippy::trivially_copy_pass_by_ref)]
493fn is_routing_none(s: &LlmRoutingStrategy) -> bool {
494    *s == LlmRoutingStrategy::None
495}
496
497impl LlmConfig {
498    /// Effective provider kind for the primary (first/default) provider in the pool.
499    #[must_use]
500    pub fn effective_provider(&self) -> ProviderKind {
501        self.providers
502            .first()
503            .map_or(ProviderKind::Ollama, |e| e.provider_type)
504    }
505
506    /// Effective base URL for the primary provider.
507    #[must_use]
508    pub fn effective_base_url(&self) -> &str {
509        self.providers
510            .first()
511            .and_then(|e| e.base_url.as_deref())
512            .unwrap_or("http://localhost:11434")
513    }
514
515    /// Effective model for the primary chat-capable provider.
516    ///
517    /// Skips embed-only entries (those with `embed = true`) and returns the model of the
518    /// first provider that can handle chat requests. Falls back to `"qwen3:8b"` when no
519    /// chat-capable provider is configured.
520    #[must_use]
521    pub fn effective_model(&self) -> &str {
522        self.providers
523            .iter()
524            .find(|e| !e.embed)
525            .and_then(|e| e.model.as_deref())
526            .unwrap_or("qwen3:8b")
527    }
528
529    /// Find the provider entry designated for STT.
530    ///
531    /// Resolution priority:
532    /// 1. `[llm.stt].provider` matches `[[llm.providers]].name` and the entry has `stt_model`
533    /// 2. `[llm.stt].provider` is empty — fall through to auto-detect
534    /// 3. First provider with `stt_model` set (auto-detect fallback)
535    /// 4. `None` — STT disabled
536    #[must_use]
537    pub fn stt_provider_entry(&self) -> Option<&ProviderEntry> {
538        let name_hint = self.stt.as_ref().map_or("", |s| s.provider.as_str());
539        if name_hint.is_empty() {
540            self.providers.iter().find(|p| p.stt_model.is_some())
541        } else {
542            self.providers
543                .iter()
544                .find(|p| p.effective_name() == name_hint && p.stt_model.is_some())
545        }
546    }
547
548    /// Validate that the config uses the new `[[llm.providers]]` format.
549    ///
550    /// # Errors
551    ///
552    /// Returns `ConfigError::Validation` when no providers are configured.
553    pub fn check_legacy_format(&self) -> Result<(), crate::error::ConfigError> {
554        Ok(())
555    }
556
557    /// Validate STT config cross-references.
558    ///
559    /// # Errors
560    ///
561    /// Returns `ConfigError::Validation` when the referenced STT provider does not exist.
562    pub fn validate_stt(&self) -> Result<(), crate::error::ConfigError> {
563        use crate::error::ConfigError;
564
565        let Some(stt) = &self.stt else {
566            return Ok(());
567        };
568        if stt.provider.is_empty() {
569            return Ok(());
570        }
571        let found = self
572            .providers
573            .iter()
574            .find(|p| p.effective_name() == stt.provider);
575        match found {
576            None => {
577                return Err(ConfigError::Validation(format!(
578                    "[llm.stt].provider = {:?} does not match any [[llm.providers]] entry",
579                    stt.provider
580                )));
581            }
582            Some(entry) if entry.stt_model.is_none() => {
583                tracing::warn!(
584                    provider = stt.provider,
585                    "[[llm.providers]] entry exists but has no `stt_model` — STT will not be activated"
586                );
587            }
588            _ => {}
589        }
590        Ok(())
591    }
592
593    /// Resolve `provider_name` to its model string and emit a startup warning when the
594    /// model does not look like a fast-tier model.
595    ///
596    /// **Soft check — never returns an error.** Misconfiguration produces a single
597    /// `tracing::warn!` at startup so operators can fix configs without being blocked.
598    ///
599    /// Rules:
600    /// - Empty `provider_name` → silently OK (caller will use the primary provider).
601    /// - Provider not found in pool → warns `"<label> provider '<name>' not found"`.
602    /// - Model resolved but not in `FAST_TIER_MODEL_HINTS` and not in `extra_allowlist` →
603    ///   warns `"<label> provider '<name>' uses '<model>' which may not be fast-tier"`.
604    /// - Model matches a hint or allowlist entry → silently OK.
605    ///
606    /// # Examples
607    ///
608    /// ```no_run
609    /// use zeph_config::providers::{LlmConfig, ProviderName};
610    ///
611    /// // LlmConfig is constructed via config file; here we illustrate the call shape.
612    /// # let cfg: LlmConfig = unimplemented!();
613    /// // empty provider name is silently ok
614    /// cfg.warn_non_fast_tier_provider(&ProviderName::default(), "memcot.distill_provider", &[]);
615    /// ```
616    pub fn warn_non_fast_tier_provider(
617        &self,
618        provider_name: &ProviderName,
619        feature_label: &str,
620        extra_allowlist: &[String],
621    ) {
622        if provider_name.is_empty() {
623            return;
624        }
625        let name = provider_name.as_str();
626        let Some(entry) = self.providers.iter().find(|p| p.effective_name() == name) else {
627            tracing::warn!(
628                provider = name,
629                "{feature_label} provider '{name}' not found in [[llm.providers]]"
630            );
631            return;
632        };
633        let model = entry.model.as_deref().unwrap_or("");
634        if model.is_empty() {
635            return;
636        }
637        let lower = model.to_lowercase();
638        let in_hints = FAST_TIER_MODEL_HINTS.iter().any(|h| lower.contains(h));
639        let in_extra = extra_allowlist.iter().any(|h| lower.contains(h.as_str()));
640        if !in_hints && !in_extra {
641            tracing::warn!(
642                provider = name,
643                actual = model,
644                "{feature_label} provider '{name}' uses model '{model}' \
645                 which may not be fast-tier; prefer a fast model to bound distillation cost"
646            );
647        }
648    }
649}
650
651/// Lowercased substrings that identify commonly accepted fast-tier models.
652///
653/// Used by [`LlmConfig::warn_non_fast_tier_provider`] for a soft startup check.
654/// Updating this list is non-breaking; missing a fast model only suppresses a warning.
655pub const FAST_TIER_MODEL_HINTS: &[&str] = &[
656    "gpt-4o-mini",
657    "gpt-4.1-mini",
658    "gpt-5-mini",
659    "gpt-5-nano",
660    "claude-haiku",
661    "claude-3-haiku",
662    "claude-3-5-haiku",
663    "qwen3:8b",
664    "qwen2.5:7b",
665    "qwen2:7b",
666    "llama3.2:3b",
667    "llama3.1:8b",
668    "gemma3:4b",
669    "gemma3:8b",
670    "phi4:mini",
671    "mistral:7b",
672];
673
674/// Speech-to-text configuration, nested under `[llm.stt]` in TOML.
675///
676/// When set, Zeph uses the referenced provider for voice transcription.
677/// The provider must have an `stt_model` field set in its `[[llm.providers]]` entry.
678///
679/// # Example (TOML)
680///
681/// ```toml
682/// [llm.stt]
683/// provider = "fast"
684/// language = "en"
685/// ```
686#[derive(Debug, Clone, Deserialize, Serialize)]
687pub struct SttConfig {
688    /// Provider name from `[[llm.providers]]`. Empty string means auto-detect first provider
689    /// with `stt_model` set.
690    #[serde(default = "default_stt_provider")]
691    pub provider: String,
692    /// Language hint for transcription (e.g. `"en"`, `"auto"`).
693    #[serde(default = "default_stt_language")]
694    pub language: String,
695}
696
697/// Routing strategy selection for multi-provider routing.
698#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
699#[serde(rename_all = "lowercase")]
700pub enum RouterStrategyConfig {
701    /// Exponential moving average latency-aware ordering.
702    #[default]
703    Ema,
704    /// Thompson Sampling with Beta distributions (persistence-backed).
705    Thompson,
706    /// Cascade routing: try cheapest provider first, escalate on degenerate output.
707    Cascade,
708    /// PILOT: `LinUCB` contextual bandit with online learning and cost-aware reward.
709    Bandit,
710}
711
712/// Agent Stability Index (ASI) configuration.
713///
714/// Tracks per-provider response coherence via a sliding window of response embeddings.
715/// When coherence drops below `coherence_threshold`, the provider's routing prior is
716/// penalized by `penalty_weight`. Disabled by default; session-only (no persistence).
717///
718/// # Known Limitation
719///
720/// ASI embeddings are computed in a background `tokio::spawn` task after the response is
721/// returned to the caller. Under high request rates, the coherence score used for routing
722/// may lag 1–2 responses behind due to this fire-and-forget design. With the default
723/// `window = 5`, this lag is tolerable — coherence is a slow-moving signal.
724#[derive(Debug, Clone, Deserialize, Serialize)]
725pub struct AsiConfig {
726    /// Enable ASI coherence tracking. Default: false.
727    #[serde(default)]
728    pub enabled: bool,
729
730    /// Sliding window size for response embeddings per provider. Default: 5.
731    #[serde(default = "default_asi_window")]
732    pub window: usize,
733
734    /// Coherence score [0.0, 1.0] below which the provider is penalized. Default: 0.7.
735    #[serde(default = "default_asi_coherence_threshold")]
736    pub coherence_threshold: f32,
737
738    /// Penalty weight applied to Thompson beta / EMA score on low coherence. Default: 0.3.
739    ///
740    /// For Thompson, this shifts the beta prior: `beta += penalty_weight * (threshold - coherence)`.
741    /// For EMA, the score is multiplied by `max(0.5, coherence / threshold)`.
742    #[serde(default = "default_asi_penalty_weight")]
743    pub penalty_weight: f32,
744}
745
746fn default_asi_window() -> usize {
747    5
748}
749
750fn default_asi_coherence_threshold() -> f32 {
751    0.7
752}
753
754fn default_asi_penalty_weight() -> f32 {
755    0.3
756}
757
758impl Default for AsiConfig {
759    fn default() -> Self {
760        Self {
761            enabled: false,
762            window: default_asi_window(),
763            coherence_threshold: default_asi_coherence_threshold(),
764            penalty_weight: default_asi_penalty_weight(),
765        }
766    }
767}
768
769/// Routing configuration for multi-provider setups.
770#[derive(Debug, Clone, Deserialize, Serialize)]
771pub struct RouterConfig {
772    /// Routing strategy: `"ema"` (default), `"thompson"`, `"cascade"`, or `"bandit"`.
773    #[serde(default)]
774    pub strategy: RouterStrategyConfig,
775    /// Path for persisting Thompson Sampling state. Defaults to `~/.zeph/router_thompson_state.json`.
776    ///
777    /// # Security
778    ///
779    /// This path is user-controlled. The application writes and reads a JSON file at
780    /// this location. Ensure the path is within a directory that is not world-writable
781    /// (e.g., avoid `/tmp`). The file is created with mode `0o600` on Unix.
782    #[serde(default)]
783    pub thompson_state_path: Option<String>,
784    /// Cascade routing configuration. Only used when `strategy = "cascade"`.
785    #[serde(default)]
786    pub cascade: Option<CascadeConfig>,
787    /// Bayesian reputation scoring configuration (RAPS). Disabled by default.
788    #[serde(default)]
789    pub reputation: Option<ReputationConfig>,
790    /// PILOT bandit routing configuration. Only used when `strategy = "bandit"`.
791    #[serde(default)]
792    pub bandit: Option<BanditConfig>,
793    /// Embedding-based quality gate threshold for Thompson/EMA routing. Default: disabled.
794    ///
795    /// When set, after provider selection, the cosine similarity between the query embedding
796    /// and the response embedding is computed. If below this threshold, the next provider in
797    /// the ordered list is tried. On exhaustion, the best response seen is returned.
798    ///
799    /// Only applies to Thompson and EMA strategies. Cascade uses its own quality classifier.
800    /// Fail-open: embedding errors disable the gate for that request.
801    #[serde(default)]
802    pub quality_gate: Option<f32>,
803    /// Agent Stability Index configuration. Disabled by default.
804    #[serde(default)]
805    pub asi: Option<AsiConfig>,
806    /// Maximum number of concurrent `embed_batch` calls through the router.
807    ///
808    /// Limits simultaneous embedding HTTP requests to prevent provider rate-limiting
809    /// and memory pressure during indexing or high-frequency recall. Default: 4.
810    /// Set to 0 to disable the semaphore (unlimited concurrency).
811    #[serde(default = "default_embed_concurrency")]
812    pub embed_concurrency: usize,
813}
814
815fn default_embed_concurrency() -> usize {
816    4
817}
818
819/// Configuration for Bayesian reputation scoring (RAPS — Reputation-Adjusted Provider Selection).
820///
821/// When enabled, quality outcomes from tool execution shift the routing scores over time,
822/// giving an advantage to providers that consistently produce valid tool arguments.
823///
824/// Default: disabled. Set `enabled = true` to activate.
825#[derive(Debug, Clone, Deserialize, Serialize)]
826pub struct ReputationConfig {
827    /// Enable reputation scoring. Default: false.
828    #[serde(default)]
829    pub enabled: bool,
830    /// Session-level decay factor applied on each load. Range: (0.0, 1.0]. Default: 0.95.
831    /// Lower values make reputation forget faster; 1.0 = no decay.
832    #[serde(default = "default_reputation_decay_factor")]
833    pub decay_factor: f64,
834    /// Weight of reputation in routing score blend. Range: [0.0, 1.0]. Default: 0.3.
835    ///
836    /// **Warning**: values above 0.5 can aggressively suppress low-reputation providers.
837    /// At `weight = 1.0` with `rep_factor = 0.0` (all failures), the routing score
838    /// drops to zero — the provider becomes unreachable for that session. Stick to
839    /// the default (0.3) unless you intentionally want strong reputation gating.
840    #[serde(default = "default_reputation_weight")]
841    pub weight: f64,
842    /// Minimum quality observations before reputation influences routing. Default: 5.
843    #[serde(default = "default_reputation_min_observations")]
844    pub min_observations: u64,
845    /// Path for persisting reputation state. Defaults to `~/.config/zeph/router_reputation_state.json`.
846    #[serde(default)]
847    pub state_path: Option<String>,
848}
849
850/// Configuration for cascade routing (`strategy = "cascade"`).
851///
852/// Cascade routing tries providers in chain order (cheapest first), escalating to
853/// the next provider when the response is classified as degenerate (empty, repetitive,
854/// incoherent). Chain order determines cost order: first provider = cheapest.
855///
856/// # Limitations
857///
858/// The heuristic classifier detects degenerate outputs only, not semantic failures.
859/// Use `classifier_mode = "judge"` for semantic quality gating (adds LLM call cost).
860#[derive(Debug, Clone, Deserialize, Serialize)]
861pub struct CascadeConfig {
862    /// Minimum quality score [0.0, 1.0] to accept a response without escalating.
863    /// Responses scoring below this threshold trigger escalation.
864    #[serde(default = "default_cascade_quality_threshold")]
865    pub quality_threshold: f64,
866
867    /// Maximum number of quality-based escalations per request.
868    /// Network/API errors do not count against this budget.
869    /// Default: 2 (allows up to 3 providers: cheap → mid → expensive).
870    #[serde(default = "default_cascade_max_escalations")]
871    pub max_escalations: u8,
872
873    /// Quality classifier mode: `"heuristic"` (default) or `"judge"`.
874    /// Heuristic is zero-cost but detects only degenerate outputs.
875    /// Judge requires a configured `summary_model` and adds one LLM call per evaluation.
876    #[serde(default)]
877    pub classifier_mode: CascadeClassifierMode,
878
879    /// Rolling quality history window size per provider. Default: 50.
880    #[serde(default = "default_cascade_window_size")]
881    pub window_size: usize,
882
883    /// Maximum cumulative input+output tokens across all escalation levels.
884    /// When exceeded, returns the best-seen response instead of escalating further.
885    /// `None` disables the budget (unbounded escalation cost).
886    #[serde(default)]
887    pub max_cascade_tokens: Option<u32>,
888
889    /// Explicit cost ordering of provider names (cheapest first).
890    /// When set, cascade routing sorts providers by their position in this list before
891    /// trying them. Providers not in the list are appended after listed ones in their
892    /// original chain order. When unset, chain order is used (default behavior).
893    #[serde(default, skip_serializing_if = "Option::is_none")]
894    pub cost_tiers: Option<Vec<String>>,
895}
896
897impl Default for CascadeConfig {
898    fn default() -> Self {
899        Self {
900            quality_threshold: default_cascade_quality_threshold(),
901            max_escalations: default_cascade_max_escalations(),
902            classifier_mode: CascadeClassifierMode::default(),
903            window_size: default_cascade_window_size(),
904            max_cascade_tokens: None,
905            cost_tiers: None,
906        }
907    }
908}
909
910/// Quality classifier mode for cascade routing.
911#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
912#[serde(rename_all = "lowercase")]
913pub enum CascadeClassifierMode {
914    /// Zero-cost heuristic: detects degenerate outputs (empty, repetitive, incoherent).
915    /// Does not detect semantic failures (hallucinations, wrong answers).
916    #[default]
917    Heuristic,
918    /// LLM-based judge: more accurate but adds latency. Falls back to heuristic on failure.
919    /// Requires `summary_model` to be configured.
920    Judge,
921}
922
923fn default_bandit_alpha() -> f32 {
924    1.0
925}
926
927fn default_bandit_dim() -> usize {
928    32
929}
930
931fn default_bandit_cost_weight() -> f32 {
932    0.1
933}
934
935fn default_bandit_decay_factor() -> f32 {
936    1.0
937}
938
939fn default_bandit_embedding_timeout_ms() -> u64 {
940    50
941}
942
943fn default_bandit_cache_size() -> usize {
944    512
945}
946
947/// Configuration for PILOT bandit routing (`strategy = "bandit"`).
948///
949/// PILOT (Provider Intelligence via Learned Online Tuning) uses a `LinUCB` contextual
950/// bandit to learn which provider performs best for a given query context. The feature
951/// vector is derived from the query embedding (first `dim` components, L2-normalised).
952///
953/// **Cold start**: the bandit falls back to Thompson sampling for the first
954/// `10 * num_providers` queries (configurable). After warmup, `LinUCB` takes over.
955///
956/// **Embedding**: an `embedding_provider` must be set for feature vectors. If the embed
957/// call exceeds `embedding_timeout_ms` or fails, the bandit falls back to Thompson/uniform.
958/// Use a local provider (Ollama, Candle) to avoid network latency on the hot path.
959#[derive(Debug, Clone, Deserialize, Serialize)]
960pub struct BanditConfig {
961    /// `LinUCB` exploration parameter. Default: 1.0.
962    /// Higher values increase exploration; lower values favour exploitation.
963    #[serde(default = "default_bandit_alpha")]
964    pub alpha: f32,
965
966    /// Feature vector dimension (first `dim` components of the embedding).
967    ///
968    /// This is simple truncation, not PCA. The first raw embedding dimensions do not
969    /// necessarily capture the most variance. For `OpenAI` `text-embedding-3-*` models,
970    /// consider using the `dimensions` API parameter (Matryoshka embeddings) instead.
971    /// Default: 32.
972    #[serde(default = "default_bandit_dim")]
973    pub dim: usize,
974
975    /// Cost penalty weight in the reward signal: `reward = quality - cost_weight * cost_fraction`.
976    /// Default: 0.1. Increase to penalise expensive providers more aggressively.
977    #[serde(default = "default_bandit_cost_weight")]
978    pub cost_weight: f32,
979
980    /// Session-level decay applied to arm state on startup: `A = I + decay*(A-I)`, `b = decay*b`.
981    /// Values < 1.0 cause re-exploration after provider quality changes. Default: 1.0 (no decay).
982    #[serde(default = "default_bandit_decay_factor")]
983    pub decay_factor: f32,
984
985    /// Provider name from `[[llm.providers]]` used for query embeddings.
986    ///
987    /// SLM recommended: prefer a fast local model (e.g. Ollama `nomic-embed-text`,
988    /// Candle, or `text-embedding-3-small`) — this is called on every bandit request.
989    /// Empty string disables `LinUCB` (bandit always falls back to Thompson/uniform).
990    #[serde(default)]
991    pub embedding_provider: ProviderName,
992
993    /// Hard timeout for the embedding call in milliseconds. Default: 50.
994    /// If exceeded, the request falls back to Thompson/uniform selection.
995    #[serde(default = "default_bandit_embedding_timeout_ms")]
996    pub embedding_timeout_ms: u64,
997
998    /// Maximum cached embeddings (keyed by query text hash). Default: 512.
999    #[serde(default = "default_bandit_cache_size")]
1000    pub cache_size: usize,
1001
1002    /// Path for persisting bandit state. Defaults to `~/.config/zeph/router_bandit_state.json`.
1003    ///
1004    /// # Security
1005    ///
1006    /// This path is user-controlled. The file is created with mode `0o600` on Unix.
1007    /// Do not place it in world-writable directories.
1008    #[serde(default)]
1009    pub state_path: Option<String>,
1010
1011    /// MAR (Memory-Augmented Routing) confidence threshold.
1012    ///
1013    /// When the top-1 semantic recall score for the current query is >= this value,
1014    /// the bandit biases toward cheaper providers (the answer is likely in memory).
1015    /// Set to 1.0 to disable MAR. Default: 0.9.
1016    #[serde(default = "default_bandit_memory_confidence_threshold")]
1017    pub memory_confidence_threshold: f32,
1018
1019    /// Minimum number of queries before `LinUCB` takes over from Thompson warmup.
1020    ///
1021    /// When unset or `0`, defaults to `10 × number of providers` (computed at startup).
1022    /// Set explicitly to control how long the bandit explores uniformly before
1023    /// switching to context-aware routing. Setting `0` preserves the computed default.
1024    #[serde(default)]
1025    pub warmup_queries: Option<u64>,
1026}
1027
1028fn default_bandit_memory_confidence_threshold() -> f32 {
1029    0.9
1030}
1031
1032impl Default for BanditConfig {
1033    fn default() -> Self {
1034        Self {
1035            alpha: default_bandit_alpha(),
1036            dim: default_bandit_dim(),
1037            cost_weight: default_bandit_cost_weight(),
1038            decay_factor: default_bandit_decay_factor(),
1039            embedding_provider: ProviderName::default(),
1040            embedding_timeout_ms: default_bandit_embedding_timeout_ms(),
1041            cache_size: default_bandit_cache_size(),
1042            state_path: None,
1043            memory_confidence_threshold: default_bandit_memory_confidence_threshold(),
1044            warmup_queries: None,
1045        }
1046    }
1047}
1048
1049#[derive(Debug, Deserialize, Serialize)]
1050pub struct CandleConfig {
1051    #[serde(default = "default_candle_source")]
1052    pub source: String,
1053    #[serde(default)]
1054    pub local_path: String,
1055    #[serde(default)]
1056    pub filename: Option<String>,
1057    #[serde(default = "default_chat_template")]
1058    pub chat_template: String,
1059    #[serde(default = "default_candle_device")]
1060    pub device: String,
1061    #[serde(default)]
1062    pub embedding_repo: Option<String>,
1063    /// Resolved `HuggingFace` Hub API token for authenticated model downloads.
1064    ///
1065    /// Must be the **token value** — resolved by the caller before constructing this config.
1066    #[serde(default)]
1067    pub hf_token: Option<String>,
1068    #[serde(default)]
1069    pub generation: GenerationParams,
1070    /// Maximum seconds to wait for each half of a single inference request.
1071    ///
1072    /// The timeout is applied **twice** per `chat()` call: once for the channel send
1073    /// (waiting for a free slot) and once for the oneshot reply (waiting for the worker
1074    /// to finish). The effective maximum wall-clock wait per request is therefore
1075    /// `2 × inference_timeout_secs`. CPU inference can be slow; 120s is a conservative
1076    /// default for large models, giving up to 240s total before an error is returned.
1077    /// Values of 0 are silently promoted to 1 at bootstrap.
1078    #[serde(default = "default_inference_timeout_secs")]
1079    pub inference_timeout_secs: u64,
1080}
1081
1082fn default_inference_timeout_secs() -> u64 {
1083    120
1084}
1085
1086/// Sampling / generation parameters for Candle local inference.
1087///
1088/// Used inside `[llm.candle.generation]` or a `[[llm.providers]]` Candle entry.
1089#[derive(Debug, Clone, Deserialize, Serialize)]
1090pub struct GenerationParams {
1091    /// Sampling temperature. Higher values produce more creative outputs. Default: `0.7`.
1092    #[serde(default = "default_temperature")]
1093    pub temperature: f64,
1094    /// Nucleus sampling threshold. When set, tokens with cumulative probability above
1095    /// this value are excluded. Default: `None` (disabled).
1096    #[serde(default)]
1097    pub top_p: Option<f64>,
1098    /// Top-k sampling. When set, only the top-k most probable tokens are considered.
1099    /// Default: `None` (disabled).
1100    #[serde(default)]
1101    pub top_k: Option<usize>,
1102    /// Maximum number of tokens to generate per response. Capped at [`MAX_TOKENS_CAP`].
1103    /// Default: `2048`.
1104    #[serde(default = "default_max_tokens")]
1105    pub max_tokens: usize,
1106    /// Random seed for reproducible outputs. Default: `42`.
1107    #[serde(default = "default_seed")]
1108    pub seed: u64,
1109    /// Repetition penalty applied during sampling. Default: `1.1`.
1110    #[serde(default = "default_repeat_penalty")]
1111    pub repeat_penalty: f32,
1112    /// Number of last tokens to consider for the repetition penalty window. Default: `64`.
1113    #[serde(default = "default_repeat_last_n")]
1114    pub repeat_last_n: usize,
1115}
1116
1117/// Hard upper bound on `GenerationParams::max_tokens` to prevent unbounded generation.
1118pub const MAX_TOKENS_CAP: usize = 32768;
1119
1120impl GenerationParams {
1121    /// Returns `max_tokens` clamped to [`MAX_TOKENS_CAP`].
1122    ///
1123    /// # Examples
1124    ///
1125    /// ```
1126    /// use zeph_config::GenerationParams;
1127    ///
1128    /// let params = GenerationParams::default();
1129    /// assert!(params.capped_max_tokens() <= 32768);
1130    /// ```
1131    #[must_use]
1132    pub fn capped_max_tokens(&self) -> usize {
1133        self.max_tokens.min(MAX_TOKENS_CAP)
1134    }
1135}
1136
1137impl Default for GenerationParams {
1138    fn default() -> Self {
1139        Self {
1140            temperature: default_temperature(),
1141            top_p: None,
1142            top_k: None,
1143            max_tokens: default_max_tokens(),
1144            seed: default_seed(),
1145            repeat_penalty: default_repeat_penalty(),
1146            repeat_last_n: default_repeat_last_n(),
1147        }
1148    }
1149}
1150
1151// ─── Unified config types ─────────────────────────────────────────────────────
1152
1153/// Routing strategy for the `[[llm.providers]]` pool.
1154#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
1155#[serde(rename_all = "lowercase")]
1156pub enum LlmRoutingStrategy {
1157    /// Single provider or first-in-pool (default).
1158    #[default]
1159    None,
1160    /// Exponential moving average latency-aware ordering.
1161    Ema,
1162    /// Thompson Sampling with Beta distributions.
1163    Thompson,
1164    /// Cascade: try cheapest provider first, escalate on degenerate output.
1165    Cascade,
1166    /// Complexity triage routing: pre-classify each request, delegate to appropriate tier.
1167    Triage,
1168    /// PILOT: `LinUCB` contextual bandit with online learning and budget-aware reward.
1169    Bandit,
1170}
1171
1172fn default_triage_timeout_secs() -> u64 {
1173    5
1174}
1175
1176fn default_max_triage_tokens() -> u32 {
1177    50
1178}
1179
1180fn default_true() -> bool {
1181    true
1182}
1183
1184#[allow(clippy::trivially_copy_pass_by_ref)]
1185fn is_true(v: &bool) -> bool {
1186    *v
1187}
1188
1189/// Tier-to-provider name mapping for complexity routing.
1190#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1191pub struct TierMapping {
1192    pub simple: Option<String>,
1193    pub medium: Option<String>,
1194    pub complex: Option<String>,
1195    pub expert: Option<String>,
1196}
1197
1198/// Configuration for complexity-based triage routing (`routing = "triage"`).
1199///
1200/// When `[llm] routing = "triage"` is set, a cheap triage model classifies each request
1201/// and routes it to the appropriate tier provider. Requires at least one tier mapping.
1202///
1203/// # Example
1204///
1205/// ```toml
1206/// [llm]
1207/// routing = "triage"
1208///
1209/// [llm.complexity_routing]
1210/// triage_provider = "local-fast"
1211///
1212/// [llm.complexity_routing.tiers]
1213/// simple = "local-fast"
1214/// medium = "haiku"
1215/// complex = "sonnet"
1216/// expert = "opus"
1217/// ```
1218#[derive(Debug, Clone, Deserialize, Serialize)]
1219pub struct ComplexityRoutingConfig {
1220    /// Provider name from `[[llm.providers]]` used for triage classification.
1221    #[serde(default)]
1222    pub triage_provider: Option<ProviderName>,
1223
1224    /// Skip triage when all tiers map to the same provider.
1225    #[serde(default = "default_true")]
1226    pub bypass_single_provider: bool,
1227
1228    /// Tier-to-provider name mapping.
1229    #[serde(default)]
1230    pub tiers: TierMapping,
1231
1232    /// Max output tokens for the triage classification call. Default: 50.
1233    #[serde(default = "default_max_triage_tokens")]
1234    pub max_triage_tokens: u32,
1235
1236    /// Timeout in seconds for the triage classification call. Default: 5.
1237    /// On timeout, falls back to the default (first) tier provider.
1238    #[serde(default = "default_triage_timeout_secs")]
1239    pub triage_timeout_secs: u64,
1240
1241    /// Optional fallback strategy when triage misclassifies.
1242    /// Only `"cascade"` is currently supported (Phase 4).
1243    #[serde(default)]
1244    pub fallback_strategy: Option<String>,
1245}
1246
1247impl Default for ComplexityRoutingConfig {
1248    fn default() -> Self {
1249        Self {
1250            triage_provider: None,
1251            bypass_single_provider: true,
1252            tiers: TierMapping::default(),
1253            max_triage_tokens: default_max_triage_tokens(),
1254            triage_timeout_secs: default_triage_timeout_secs(),
1255            fallback_strategy: None,
1256        }
1257    }
1258}
1259
1260/// Configuration for the Collaborative Entropy (`CoE`) subsystem (`[llm.coe]` TOML section).
1261///
1262/// `CoE` detects uncertain responses from the primary provider and escalates to a
1263/// secondary provider when either the intra-entropy or inter-divergence signal crosses
1264/// its threshold. Only active for `RouterStrategy::Ema` and `RouterStrategy::Thompson`.
1265///
1266/// # Example
1267///
1268/// ```toml
1269/// [llm.coe]
1270/// enabled = true
1271/// intra_threshold = 0.8
1272/// inter_threshold = 0.20
1273/// shadow_sample_rate = 0.1
1274/// secondary_provider = "quality"
1275/// embed_provider = ""
1276/// ```
1277#[derive(Debug, Clone, Deserialize, Serialize)]
1278#[serde(default)]
1279pub struct CoeConfig {
1280    /// Enable `CoE`. When `false`, the struct is ignored.
1281    pub enabled: bool,
1282    /// Mean negative log-prob threshold; responses above this trigger intra escalation.
1283    pub intra_threshold: f64,
1284    /// Divergence threshold in `[0.0, 1.0]`.
1285    pub inter_threshold: f64,
1286    /// Baseline rate at which secondary is called even when intra is low.
1287    pub shadow_sample_rate: f64,
1288    /// Provider name from `[[llm.providers]]` used as the escalation target.
1289    pub secondary_provider: ProviderName,
1290    /// Provider name for inter-divergence embeddings. Empty → inherit bandit's embed provider.
1291    pub embed_provider: ProviderName,
1292}
1293
1294impl Default for CoeConfig {
1295    fn default() -> Self {
1296        Self {
1297            enabled: false,
1298            intra_threshold: 0.8,
1299            inter_threshold: 0.20,
1300            shadow_sample_rate: 0.1,
1301            secondary_provider: ProviderName::default(),
1302            embed_provider: ProviderName::default(),
1303        }
1304    }
1305}
1306
1307/// A single Gonka network node endpoint.
1308///
1309/// Used in `[[llm.providers]]` entries with `type = "gonka"` to declare
1310/// the node pool for blockchain inference routing.
1311#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
1312pub struct GonkaNode {
1313    /// HTTP(S) URL of the Gonka node (e.g. `"https://node1.gonka.ai"`).
1314    pub url: String,
1315    /// On-chain bech32 address of this node (e.g. `"gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"`).
1316    ///
1317    /// Required for signature construction: every signed request binds to the target node's
1318    /// on-chain address, making signatures non-replayable across different nodes.
1319    pub address: String,
1320    /// Optional human-readable label for `zeph gonka doctor` output.
1321    #[serde(default, skip_serializing_if = "Option::is_none")]
1322    pub name: Option<String>,
1323}
1324
1325/// Inline candle config for use inside `ProviderEntry`.
1326/// Re-uses the generation params from `CandleConfig`.
1327#[derive(Debug, Clone, Deserialize, Serialize)]
1328pub struct CandleInlineConfig {
1329    #[serde(default = "default_candle_source")]
1330    pub source: String,
1331    #[serde(default)]
1332    pub local_path: String,
1333    #[serde(default)]
1334    pub filename: Option<String>,
1335    #[serde(default = "default_chat_template")]
1336    pub chat_template: String,
1337    #[serde(default = "default_candle_device")]
1338    pub device: String,
1339    #[serde(default)]
1340    pub embedding_repo: Option<String>,
1341    /// Resolved `HuggingFace` Hub API token for authenticated model downloads.
1342    #[serde(default)]
1343    pub hf_token: Option<String>,
1344    #[serde(default)]
1345    pub generation: GenerationParams,
1346    /// Maximum wall-clock seconds to wait for a single inference request.
1347    ///
1348    /// Effective timeout is `2 × inference_timeout_secs` (send + recv each have this budget).
1349    /// CPU inference can be slow; 120s is a conservative default. Floored at 1s.
1350    #[serde(default = "default_inference_timeout_secs")]
1351    pub inference_timeout_secs: u64,
1352}
1353
1354impl Default for CandleInlineConfig {
1355    fn default() -> Self {
1356        Self {
1357            source: default_candle_source(),
1358            local_path: String::new(),
1359            filename: None,
1360            chat_template: default_chat_template(),
1361            device: default_candle_device(),
1362            embedding_repo: None,
1363            hf_token: None,
1364            generation: GenerationParams::default(),
1365            inference_timeout_secs: default_inference_timeout_secs(),
1366        }
1367    }
1368}
1369
1370/// Per-1K-token pricing for a Cocoon provider, in cents.
1371///
1372/// Cocoon model names (e.g. `Qwen/Qwen3-0.6B`) are not in the built-in pricing table.
1373/// When this struct is present in a provider entry, its values are registered with
1374/// `CostTracker` at startup so that token costs are tracked accurately.
1375///
1376/// Reasoning tokens (when the model uses chain-of-thought) are folded into
1377/// `completion_tokens` by the Cocoon sidecar and counted at the completion price.
1378#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
1379pub struct CocoonPricing {
1380    /// Prompt (input) token price in cents per 1K tokens.
1381    #[serde(default)]
1382    pub prompt_cents_per_1k: f64,
1383    /// Completion (output) token price in cents per 1K tokens.
1384    /// Reasoning tokens are counted here since the sidecar folds them into completion tokens.
1385    #[serde(default)]
1386    pub completion_cents_per_1k: f64,
1387}
1388
1389/// Unified provider entry: one struct replaces `CloudLlmConfig`, `OpenAiConfig`,
1390/// `GeminiConfig`, `OllamaConfig`, `CompatibleConfig`, and `OrchestratorProviderConfig`.
1391///
1392/// Provider-specific fields use `#[serde(default)]` and are ignored by backends
1393/// that do not use them (flat-union pattern).
1394#[derive(Debug, Clone, Deserialize, Serialize)]
1395#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
1396pub struct ProviderEntry {
1397    /// Required: provider backend type.
1398    #[serde(rename = "type")]
1399    pub provider_type: ProviderKind,
1400
1401    /// Optional name for multi-provider configs. Auto-generated from type if absent.
1402    #[serde(default)]
1403    pub name: Option<String>,
1404
1405    /// Model identifier. Required for most types.
1406    #[serde(default)]
1407    pub model: Option<String>,
1408
1409    /// API base URL. Each type has its own default.
1410    #[serde(default)]
1411    pub base_url: Option<String>,
1412
1413    /// Max output tokens.
1414    #[serde(default)]
1415    pub max_tokens: Option<u32>,
1416
1417    /// Embedding model. When set, this provider supports `embed()` calls.
1418    #[serde(default)]
1419    pub embedding_model: Option<String>,
1420
1421    /// STT model. When set, this provider supports speech-to-text via the Whisper API or
1422    /// Candle-local inference.
1423    #[serde(default)]
1424    pub stt_model: Option<String>,
1425
1426    /// Mark this entry as the embedding provider (handles `embed()` calls).
1427    #[serde(default)]
1428    pub embed: bool,
1429
1430    /// Mark this entry as the default chat provider (overrides position-based default).
1431    #[serde(default)]
1432    pub default: bool,
1433
1434    // --- Claude-specific ---
1435    #[serde(default)]
1436    pub thinking: Option<ThinkingConfig>,
1437    #[serde(default)]
1438    pub server_compaction: bool,
1439    #[serde(default)]
1440    pub enable_extended_context: bool,
1441    /// Prompt cache TTL variant. `None` keeps the default ~5-minute ephemeral TTL.
1442    /// Set to `"1h"` to enable the extended 1-hour TTL (beta, ~2× write cost).
1443    #[serde(default)]
1444    pub prompt_cache_ttl: Option<CacheTtl>,
1445
1446    // --- OpenAI-specific ---
1447    #[serde(default)]
1448    pub reasoning_effort: Option<String>,
1449
1450    // --- Gemini-specific ---
1451    #[serde(default)]
1452    pub thinking_level: Option<GeminiThinkingLevel>,
1453    #[serde(default)]
1454    pub thinking_budget: Option<i32>,
1455    #[serde(default)]
1456    pub include_thoughts: Option<bool>,
1457
1458    // --- Compatible-specific: optional inline api_key ---
1459    #[serde(default)]
1460    pub api_key: Option<String>,
1461
1462    // --- Candle-specific ---
1463    #[serde(default)]
1464    pub candle: Option<CandleInlineConfig>,
1465
1466    // --- Vision ---
1467    #[serde(default)]
1468    pub vision_model: Option<String>,
1469
1470    // --- Gonka-specific ---
1471    /// Gonka network node pool. Required (non-empty) when `type = "gonka"`.
1472    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1473    pub gonka_nodes: Vec<GonkaNode>,
1474    /// bech32 chain prefix for address encoding. Defaults to `"gonka"` when omitted.
1475    #[serde(default, skip_serializing_if = "Option::is_none")]
1476    pub gonka_chain_prefix: Option<String>,
1477
1478    // --- Cocoon-specific ---
1479    /// Cocoon sidecar HTTP URL. Defaults to `"http://localhost:10000"` when absent.
1480    #[serde(default, skip_serializing_if = "Option::is_none")]
1481    pub cocoon_client_url: Option<String>,
1482    /// Sentinel field for access hash. Leave empty in config; actual value
1483    /// is resolved from the age vault as `ZEPH_COCOON_ACCESS_HASH`.
1484    #[serde(default, skip_serializing_if = "Option::is_none")]
1485    pub cocoon_access_hash: Option<String>,
1486    /// Whether to perform a health check against `/stats` at provider construction time.
1487    #[serde(default = "default_true", skip_serializing_if = "is_true")]
1488    pub cocoon_health_check: bool,
1489    /// Manual per-1K-token pricing for this Cocoon provider.
1490    ///
1491    /// Cocoon model names (e.g. `Qwen/Qwen3-0.6B`) are not in the built-in pricing table.
1492    /// When this section is present, the values are registered with `CostTracker` at startup
1493    /// so that token costs are tracked accurately.
1494    ///
1495    /// Example TOML:
1496    /// ```toml
1497    /// [llm.providers.cocoon_pricing]
1498    /// prompt_cents_per_1k = 0.01
1499    /// completion_cents_per_1k = 0.03
1500    /// ```
1501    #[serde(default, skip_serializing_if = "Option::is_none")]
1502    pub cocoon_pricing: Option<CocoonPricing>,
1503
1504    /// Provider-specific instruction file.
1505    #[serde(default)]
1506    pub instruction_file: Option<std::path::PathBuf>,
1507
1508    /// Maximum concurrent LLM calls from orchestrated sub-agents to this provider.
1509    ///
1510    /// When set, `DagScheduler` acquires a semaphore permit before dispatching a
1511    /// sub-agent that targets this provider. Dispatch is deferred (using the existing
1512    /// `deferral_backoff` mechanism) when the semaphore is saturated.
1513    ///
1514    /// `None` (default) = unlimited — no admission control applied.
1515    ///
1516    /// # Example (TOML)
1517    ///
1518    /// ```toml
1519    /// [[llm.providers]]
1520    /// name = "quality"
1521    /// type = "openai"
1522    /// model = "gpt-5"
1523    /// max_concurrent = 3
1524    /// ```
1525    #[serde(default, skip_serializing_if = "Option::is_none")]
1526    pub max_concurrent: Option<u32>,
1527}
1528
1529impl Default for ProviderEntry {
1530    fn default() -> Self {
1531        Self {
1532            provider_type: ProviderKind::Ollama,
1533            name: None,
1534            model: None,
1535            base_url: None,
1536            max_tokens: None,
1537            embedding_model: None,
1538            stt_model: None,
1539            embed: false,
1540            default: false,
1541            thinking: None,
1542            server_compaction: false,
1543            enable_extended_context: false,
1544            prompt_cache_ttl: None,
1545            reasoning_effort: None,
1546            thinking_level: None,
1547            thinking_budget: None,
1548            include_thoughts: None,
1549            api_key: None,
1550            candle: None,
1551            vision_model: None,
1552            gonka_nodes: Vec::new(),
1553            gonka_chain_prefix: None,
1554            cocoon_client_url: None,
1555            cocoon_access_hash: None,
1556            cocoon_health_check: true,
1557            cocoon_pricing: None,
1558            instruction_file: None,
1559            max_concurrent: None,
1560        }
1561    }
1562}
1563
1564impl ProviderEntry {
1565    /// Resolve the effective name: explicit `name` field or type string.
1566    #[must_use]
1567    pub fn effective_name(&self) -> String {
1568        self.name
1569            .clone()
1570            .unwrap_or_else(|| self.provider_type.as_str().to_owned())
1571    }
1572
1573    /// Resolve the effective model: explicit `model` field or the provider-type default.
1574    ///
1575    /// Defaults mirror those used in `build_provider_from_entry` so that `runtime.model_name`
1576    /// always reflects the actual model being used rather than the provider type string.
1577    #[must_use]
1578    pub fn effective_model(&self) -> String {
1579        if let Some(ref m) = self.model {
1580            return m.clone();
1581        }
1582        match self.provider_type {
1583            ProviderKind::Ollama => "qwen3:8b".to_owned(),
1584            ProviderKind::Claude => "claude-haiku-4-5-20251001".to_owned(),
1585            ProviderKind::OpenAi => "gpt-4o-mini".to_owned(),
1586            ProviderKind::Gemini => "gemini-2.0-flash".to_owned(),
1587            // Compatible/Candle return empty because the model is resolved elsewhere.
1588            // Gonka returns empty because it is a blockchain provider, not an LLM — there is no model concept.
1589            ProviderKind::Compatible | ProviderKind::Candle | ProviderKind::Gonka => String::new(),
1590            ProviderKind::Cocoon => "Qwen/Qwen3-0.6B".to_owned(),
1591        }
1592    }
1593
1594    /// Validate this entry for cross-field consistency.
1595    ///
1596    /// # Errors
1597    ///
1598    /// Returns `ConfigError` when a fatal invariant is violated (e.g. compatible provider
1599    /// without a name).
1600    pub fn validate(&self) -> Result<(), crate::error::ConfigError> {
1601        use crate::error::ConfigError;
1602
1603        // B2: compatible provider MUST have name set.
1604        if self.provider_type == ProviderKind::Compatible && self.name.is_none() {
1605            return Err(ConfigError::Validation(
1606                "[[llm.providers]] entry with type=\"compatible\" must set `name`".into(),
1607            ));
1608        }
1609
1610        // B3: gonka provider MUST have name and valid gonka_nodes.
1611        if self.provider_type == ProviderKind::Gonka {
1612            if self.name.is_none() {
1613                return Err(ConfigError::Validation(
1614                    "[[llm.providers]] entry with type=\"gonka\" must set `name`".into(),
1615                ));
1616            }
1617            self.validate_gonka_nodes()?;
1618        }
1619
1620        // B4: cocoon provider MUST have a name.
1621        if self.provider_type == ProviderKind::Cocoon
1622            && self.name.as_ref().is_none_or(String::is_empty)
1623        {
1624            return Err(ConfigError::Validation(
1625                "[[llm.providers]] entry with type=\"cocoon\" must set `name`".into(),
1626            ));
1627        }
1628
1629        // B5: cocoon URL must be valid http/https; cocoon model must not be empty.
1630        if self.provider_type == ProviderKind::Cocoon {
1631            let name = self.effective_name();
1632            if let Some(ref url_str) = self.cocoon_client_url {
1633                match url::Url::parse(url_str) {
1634                    Err(_) => {
1635                        return Err(ConfigError::Validation(format!(
1636                            "[[llm.providers]] entry '{name}': cocoon_client_url \
1637                             '{url_str}' is not a valid URL; expected format: \
1638                             http://localhost:10000"
1639                        )));
1640                    }
1641                    Ok(u) if !matches!(u.host_str(), Some("localhost" | "127.0.0.1" | "::1")) => {
1642                        return Err(ConfigError::Validation(format!(
1643                            "[[llm.providers]] entry '{name}': cocoon_client_url host must be \
1644                             localhost or 127.0.0.1, got '{}'",
1645                            u.host_str().unwrap_or("<none>")
1646                        )));
1647                    }
1648                    Ok(u) if u.scheme() != "http" && u.scheme() != "https" => {
1649                        return Err(ConfigError::Validation(format!(
1650                            "[[llm.providers]] entry '{name}': cocoon_client_url \
1651                             scheme must be http or https, got '{}'",
1652                            u.scheme()
1653                        )));
1654                    }
1655                    _ => {}
1656                }
1657            }
1658            if self.model.as_deref().is_some_and(|m| m.trim().is_empty()) {
1659                return Err(ConfigError::Validation(format!(
1660                    "[[llm.providers]] entry '{name}': model must not be empty \
1661                     for cocoon provider"
1662                )));
1663            }
1664            if let Some(ref p) = self.cocoon_pricing {
1665                if !p.prompt_cents_per_1k.is_finite() || p.prompt_cents_per_1k < 0.0 {
1666                    return Err(ConfigError::Validation(format!(
1667                        "[[llm.providers]] entry '{name}': cocoon_pricing.prompt_cents_per_1k \
1668                         must be a finite non-negative number"
1669                    )));
1670                }
1671                if !p.completion_cents_per_1k.is_finite() || p.completion_cents_per_1k < 0.0 {
1672                    return Err(ConfigError::Validation(format!(
1673                        "[[llm.providers]] entry '{name}': \
1674                         cocoon_pricing.completion_cents_per_1k \
1675                         must be a finite non-negative number"
1676                    )));
1677                }
1678            }
1679        }
1680
1681        // B1: warn on irrelevant fields.
1682        self.warn_irrelevant_fields();
1683
1684        // W6: Candle STT-only provider (stt_model set, no model) is valid — no warning needed.
1685        // Warn if Ollama has stt_model set (Ollama does not support Whisper API).
1686        if self.stt_model.is_some() && self.provider_type == ProviderKind::Ollama {
1687            tracing::warn!(
1688                provider = self.effective_name(),
1689                "field `stt_model` is set on an Ollama provider; Ollama does not support the \
1690                 Whisper STT API — use OpenAI, compatible, or candle instead"
1691            );
1692        }
1693
1694        Ok(())
1695    }
1696
1697    /// Resolve the effective Gonka chain prefix: explicit value or `"gonka"` default.
1698    #[must_use]
1699    pub fn effective_gonka_chain_prefix(&self) -> &str {
1700        self.gonka_chain_prefix.as_deref().unwrap_or("gonka")
1701    }
1702
1703    fn warn_irrelevant_fields(&self) {
1704        let name = self.effective_name();
1705        match self.provider_type {
1706            ProviderKind::Ollama => {
1707                if self.thinking.is_some() {
1708                    tracing::warn!(
1709                        provider = name,
1710                        "field `thinking` is only used by Claude providers"
1711                    );
1712                }
1713                if self.reasoning_effort.is_some() {
1714                    tracing::warn!(
1715                        provider = name,
1716                        "field `reasoning_effort` is only used by OpenAI providers"
1717                    );
1718                }
1719                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1720                    tracing::warn!(
1721                        provider = name,
1722                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1723                    );
1724                }
1725            }
1726            ProviderKind::Claude => {
1727                if self.reasoning_effort.is_some() {
1728                    tracing::warn!(
1729                        provider = name,
1730                        "field `reasoning_effort` is only used by OpenAI providers"
1731                    );
1732                }
1733                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1734                    tracing::warn!(
1735                        provider = name,
1736                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1737                    );
1738                }
1739            }
1740            ProviderKind::OpenAi => {
1741                if self.thinking.is_some() {
1742                    tracing::warn!(
1743                        provider = name,
1744                        "field `thinking` is only used by Claude providers"
1745                    );
1746                }
1747                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1748                    tracing::warn!(
1749                        provider = name,
1750                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1751                    );
1752                }
1753            }
1754            ProviderKind::Gemini => {
1755                if self.thinking.is_some() {
1756                    tracing::warn!(
1757                        provider = name,
1758                        "field `thinking` is only used by Claude providers"
1759                    );
1760                }
1761                if self.reasoning_effort.is_some() {
1762                    tracing::warn!(
1763                        provider = name,
1764                        "field `reasoning_effort` is only used by OpenAI providers"
1765                    );
1766                }
1767            }
1768            ProviderKind::Gonka => {
1769                if self.thinking.is_some() {
1770                    tracing::warn!(
1771                        provider = name,
1772                        "field `thinking` is only used by Claude providers"
1773                    );
1774                }
1775                if self.reasoning_effort.is_some() {
1776                    tracing::warn!(
1777                        provider = name,
1778                        "field `reasoning_effort` is only used by OpenAI providers"
1779                    );
1780                }
1781                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1782                    tracing::warn!(
1783                        provider = name,
1784                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1785                    );
1786                }
1787            }
1788            ProviderKind::Compatible | ProviderKind::Candle => {}
1789            ProviderKind::Cocoon => {
1790                if self.base_url.is_some() {
1791                    tracing::warn!(
1792                        provider = name,
1793                        "field `base_url` is ignored for cocoon providers; use `cocoon_client_url` instead"
1794                    );
1795                }
1796            }
1797        }
1798    }
1799
1800    fn validate_gonka_nodes(&self) -> Result<(), crate::error::ConfigError> {
1801        use crate::error::ConfigError;
1802        if self.gonka_nodes.is_empty() {
1803            return Err(ConfigError::Validation(format!(
1804                "[[llm.providers]] entry '{}' with type=\"gonka\" must set non-empty `gonka_nodes`",
1805                self.effective_name()
1806            )));
1807        }
1808        for (i, node) in self.gonka_nodes.iter().enumerate() {
1809            if node.url.is_empty() {
1810                return Err(ConfigError::Validation(format!(
1811                    "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must not be empty",
1812                    self.effective_name()
1813                )));
1814            }
1815            if !node.url.starts_with("http://") && !node.url.starts_with("https://") {
1816                return Err(ConfigError::Validation(format!(
1817                    "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must start with http:// or https://",
1818                    self.effective_name()
1819                )));
1820            }
1821        }
1822        Ok(())
1823    }
1824}
1825
1826/// Validate a pool of `ProviderEntry` items.
1827///
1828/// # Errors
1829///
1830/// Returns `ConfigError` for fatal validation failures:
1831/// - Empty pool
1832/// - Duplicate names
1833/// - Multiple entries marked `default = true`
1834/// - Individual entry validation errors
1835pub fn validate_pool(entries: &[ProviderEntry]) -> Result<(), crate::error::ConfigError> {
1836    use crate::error::ConfigError;
1837    use std::collections::HashSet;
1838
1839    if entries.is_empty() {
1840        return Err(ConfigError::Validation(
1841            "at least one LLM provider must be configured in [[llm.providers]]".into(),
1842        ));
1843    }
1844
1845    let default_count = entries.iter().filter(|e| e.default).count();
1846    if default_count > 1 {
1847        return Err(ConfigError::Validation(
1848            "only one [[llm.providers]] entry can be marked `default = true`".into(),
1849        ));
1850    }
1851
1852    let mut seen_names: HashSet<String> = HashSet::new();
1853    for entry in entries {
1854        let name = entry.effective_name();
1855        if !seen_names.insert(name.clone()) {
1856            return Err(ConfigError::Validation(format!(
1857                "duplicate provider name \"{name}\" in [[llm.providers]]"
1858            )));
1859        }
1860        entry.validate()?;
1861    }
1862
1863    Ok(())
1864}
1865
1866#[cfg(test)]
1867mod tests {
1868    use super::*;
1869
1870    fn ollama_entry() -> ProviderEntry {
1871        ProviderEntry {
1872            provider_type: ProviderKind::Ollama,
1873            name: Some("ollama".into()),
1874            model: Some("qwen3:8b".into()),
1875            ..Default::default()
1876        }
1877    }
1878
1879    fn claude_entry() -> ProviderEntry {
1880        ProviderEntry {
1881            provider_type: ProviderKind::Claude,
1882            name: Some("claude".into()),
1883            model: Some("claude-sonnet-4-6".into()),
1884            max_tokens: Some(8192),
1885            ..Default::default()
1886        }
1887    }
1888
1889    // ─── ProviderEntry::validate ─────────────────────────────────────────────
1890
1891    #[test]
1892    fn validate_ollama_valid() {
1893        assert!(ollama_entry().validate().is_ok());
1894    }
1895
1896    #[test]
1897    fn validate_claude_valid() {
1898        assert!(claude_entry().validate().is_ok());
1899    }
1900
1901    #[test]
1902    fn validate_compatible_without_name_errors() {
1903        let entry = ProviderEntry {
1904            provider_type: ProviderKind::Compatible,
1905            name: None,
1906            ..Default::default()
1907        };
1908        let err = entry.validate().unwrap_err();
1909        assert!(
1910            err.to_string().contains("compatible"),
1911            "error should mention compatible: {err}"
1912        );
1913    }
1914
1915    #[test]
1916    fn validate_compatible_with_name_ok() {
1917        let entry = ProviderEntry {
1918            provider_type: ProviderKind::Compatible,
1919            name: Some("my-proxy".into()),
1920            base_url: Some("http://localhost:8080".into()),
1921            model: Some("gpt-4o".into()),
1922            max_tokens: Some(4096),
1923            ..Default::default()
1924        };
1925        assert!(entry.validate().is_ok());
1926    }
1927
1928    #[test]
1929    fn validate_openai_valid() {
1930        let entry = ProviderEntry {
1931            provider_type: ProviderKind::OpenAi,
1932            name: Some("openai".into()),
1933            model: Some("gpt-4o".into()),
1934            max_tokens: Some(4096),
1935            ..Default::default()
1936        };
1937        assert!(entry.validate().is_ok());
1938    }
1939
1940    #[test]
1941    fn validate_gemini_valid() {
1942        let entry = ProviderEntry {
1943            provider_type: ProviderKind::Gemini,
1944            name: Some("gemini".into()),
1945            model: Some("gemini-2.0-flash".into()),
1946            ..Default::default()
1947        };
1948        assert!(entry.validate().is_ok());
1949    }
1950
1951    // ─── validate_pool ───────────────────────────────────────────────────────
1952
1953    #[test]
1954    fn validate_pool_empty_errors() {
1955        let err = validate_pool(&[]).unwrap_err();
1956        assert!(err.to_string().contains("at least one"), "{err}");
1957    }
1958
1959    #[test]
1960    fn validate_pool_single_entry_ok() {
1961        assert!(validate_pool(&[ollama_entry()]).is_ok());
1962    }
1963
1964    #[test]
1965    fn validate_pool_duplicate_names_errors() {
1966        let a = ollama_entry();
1967        let b = ollama_entry(); // same effective name "ollama"
1968        let err = validate_pool(&[a, b]).unwrap_err();
1969        assert!(err.to_string().contains("duplicate"), "{err}");
1970    }
1971
1972    #[test]
1973    fn validate_pool_multiple_defaults_errors() {
1974        let mut a = ollama_entry();
1975        let mut b = claude_entry();
1976        a.default = true;
1977        b.default = true;
1978        let err = validate_pool(&[a, b]).unwrap_err();
1979        assert!(err.to_string().contains("default"), "{err}");
1980    }
1981
1982    #[test]
1983    fn validate_pool_two_different_providers_ok() {
1984        assert!(validate_pool(&[ollama_entry(), claude_entry()]).is_ok());
1985    }
1986
1987    #[test]
1988    fn validate_pool_propagates_entry_error() {
1989        let bad = ProviderEntry {
1990            provider_type: ProviderKind::Compatible,
1991            name: None, // invalid: compatible without name
1992            ..Default::default()
1993        };
1994        assert!(validate_pool(&[bad]).is_err());
1995    }
1996
1997    // ─── ProviderEntry::effective_model ──────────────────────────────────────
1998
1999    #[test]
2000    fn effective_model_returns_explicit_when_set() {
2001        let entry = ProviderEntry {
2002            provider_type: ProviderKind::Claude,
2003            model: Some("claude-sonnet-4-6".into()),
2004            ..Default::default()
2005        };
2006        assert_eq!(entry.effective_model(), "claude-sonnet-4-6");
2007    }
2008
2009    #[test]
2010    fn effective_model_ollama_default_when_none() {
2011        let entry = ProviderEntry {
2012            provider_type: ProviderKind::Ollama,
2013            model: None,
2014            ..Default::default()
2015        };
2016        assert_eq!(entry.effective_model(), "qwen3:8b");
2017    }
2018
2019    #[test]
2020    fn effective_model_claude_default_when_none() {
2021        let entry = ProviderEntry {
2022            provider_type: ProviderKind::Claude,
2023            model: None,
2024            ..Default::default()
2025        };
2026        assert_eq!(entry.effective_model(), "claude-haiku-4-5-20251001");
2027    }
2028
2029    #[test]
2030    fn effective_model_openai_default_when_none() {
2031        let entry = ProviderEntry {
2032            provider_type: ProviderKind::OpenAi,
2033            model: None,
2034            ..Default::default()
2035        };
2036        assert_eq!(entry.effective_model(), "gpt-4o-mini");
2037    }
2038
2039    #[test]
2040    fn effective_model_gemini_default_when_none() {
2041        let entry = ProviderEntry {
2042            provider_type: ProviderKind::Gemini,
2043            model: None,
2044            ..Default::default()
2045        };
2046        assert_eq!(entry.effective_model(), "gemini-2.0-flash");
2047    }
2048
2049    // ─── LlmConfig::check_legacy_format ──────────────────────────────────────
2050
2051    // Parse a complete TOML snippet that includes the [llm] header.
2052    fn parse_llm(toml: &str) -> LlmConfig {
2053        #[derive(serde::Deserialize)]
2054        struct Wrapper {
2055            llm: LlmConfig,
2056        }
2057        toml::from_str::<Wrapper>(toml).unwrap().llm
2058    }
2059
2060    #[test]
2061    fn check_legacy_format_new_format_ok() {
2062        let cfg = parse_llm(
2063            r#"
2064[llm]
2065
2066[[llm.providers]]
2067type = "ollama"
2068model = "qwen3:8b"
2069"#,
2070        );
2071        assert!(cfg.check_legacy_format().is_ok());
2072    }
2073
2074    #[test]
2075    fn check_legacy_format_empty_providers_no_legacy_ok() {
2076        // No providers, no legacy fields — passes (empty [llm] is acceptable here)
2077        let cfg = parse_llm("[llm]\n");
2078        assert!(cfg.check_legacy_format().is_ok());
2079    }
2080
2081    // ─── LlmConfig::effective_* helpers ──────────────────────────────────────
2082
2083    #[test]
2084    fn effective_provider_falls_back_to_ollama_when_no_providers() {
2085        let cfg = parse_llm("[llm]\n");
2086        assert_eq!(cfg.effective_provider(), ProviderKind::Ollama);
2087    }
2088
2089    #[test]
2090    fn effective_provider_reads_from_providers_first() {
2091        let cfg = parse_llm(
2092            r#"
2093[llm]
2094
2095[[llm.providers]]
2096type = "claude"
2097model = "claude-sonnet-4-6"
2098"#,
2099        );
2100        assert_eq!(cfg.effective_provider(), ProviderKind::Claude);
2101    }
2102
2103    #[test]
2104    fn effective_model_reads_from_providers_first() {
2105        let cfg = parse_llm(
2106            r#"
2107[llm]
2108
2109[[llm.providers]]
2110type = "ollama"
2111model = "qwen3:8b"
2112"#,
2113        );
2114        assert_eq!(cfg.effective_model(), "qwen3:8b");
2115    }
2116
2117    #[test]
2118    fn effective_model_skips_embed_only_provider() {
2119        let cfg = parse_llm(
2120            r#"
2121[llm]
2122
2123[[llm.providers]]
2124type = "ollama"
2125model = "gemma4:26b"
2126embed = true
2127
2128[[llm.providers]]
2129type = "openai"
2130model = "gpt-4o-mini"
2131"#,
2132        );
2133        assert_eq!(cfg.effective_model(), "gpt-4o-mini");
2134    }
2135
2136    #[test]
2137    fn effective_base_url_default_when_absent() {
2138        let cfg = parse_llm("[llm]\n");
2139        assert_eq!(cfg.effective_base_url(), "http://localhost:11434");
2140    }
2141
2142    #[test]
2143    fn effective_base_url_from_providers_entry() {
2144        let cfg = parse_llm(
2145            r#"
2146[llm]
2147
2148[[llm.providers]]
2149type = "ollama"
2150base_url = "http://myhost:11434"
2151"#,
2152        );
2153        assert_eq!(cfg.effective_base_url(), "http://myhost:11434");
2154    }
2155
2156    // ─── ComplexityRoutingConfig / LlmRoutingStrategy::Triage TOML parsing ──
2157
2158    #[test]
2159    fn complexity_routing_defaults() {
2160        let cr = ComplexityRoutingConfig::default();
2161        assert!(
2162            cr.bypass_single_provider,
2163            "bypass_single_provider must default to true"
2164        );
2165        assert_eq!(cr.triage_timeout_secs, 5);
2166        assert_eq!(cr.max_triage_tokens, 50);
2167        assert!(cr.triage_provider.is_none());
2168        assert!(cr.tiers.simple.is_none());
2169    }
2170
2171    #[test]
2172    fn complexity_routing_toml_round_trip() {
2173        let cfg = parse_llm(
2174            r#"
2175[llm]
2176routing = "triage"
2177
2178[llm.complexity_routing]
2179triage_provider = "fast"
2180bypass_single_provider = false
2181triage_timeout_secs = 10
2182max_triage_tokens = 100
2183
2184[llm.complexity_routing.tiers]
2185simple = "fast"
2186medium = "medium"
2187complex = "large"
2188expert = "opus"
2189"#,
2190        );
2191        assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2192        let cr = cfg
2193            .complexity_routing
2194            .expect("complexity_routing must be present");
2195        assert_eq!(cr.triage_provider.as_deref(), Some("fast"));
2196        assert!(!cr.bypass_single_provider);
2197        assert_eq!(cr.triage_timeout_secs, 10);
2198        assert_eq!(cr.max_triage_tokens, 100);
2199        assert_eq!(cr.tiers.simple.as_deref(), Some("fast"));
2200        assert_eq!(cr.tiers.medium.as_deref(), Some("medium"));
2201        assert_eq!(cr.tiers.complex.as_deref(), Some("large"));
2202        assert_eq!(cr.tiers.expert.as_deref(), Some("opus"));
2203    }
2204
2205    #[test]
2206    fn complexity_routing_partial_tiers_toml() {
2207        // Only simple + complex configured; medium and expert are None.
2208        let cfg = parse_llm(
2209            r#"
2210[llm]
2211routing = "triage"
2212
2213[llm.complexity_routing.tiers]
2214simple = "haiku"
2215complex = "sonnet"
2216"#,
2217        );
2218        let cr = cfg
2219            .complexity_routing
2220            .expect("complexity_routing must be present");
2221        assert_eq!(cr.tiers.simple.as_deref(), Some("haiku"));
2222        assert!(cr.tiers.medium.is_none());
2223        assert_eq!(cr.tiers.complex.as_deref(), Some("sonnet"));
2224        assert!(cr.tiers.expert.is_none());
2225        // Defaults still applied.
2226        assert!(cr.bypass_single_provider);
2227        assert_eq!(cr.triage_timeout_secs, 5);
2228    }
2229
2230    #[test]
2231    fn routing_strategy_triage_deserialized() {
2232        let cfg = parse_llm(
2233            r#"
2234[llm]
2235routing = "triage"
2236"#,
2237        );
2238        assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2239    }
2240
2241    // ─── stt_provider_entry ───────────────────────────────────────────────────
2242
2243    #[test]
2244    fn stt_provider_entry_by_name_match() {
2245        let cfg = parse_llm(
2246            r#"
2247[llm]
2248
2249[[llm.providers]]
2250type = "openai"
2251name = "quality"
2252model = "gpt-5.4"
2253stt_model = "gpt-4o-mini-transcribe"
2254
2255[llm.stt]
2256provider = "quality"
2257"#,
2258        );
2259        let entry = cfg.stt_provider_entry().expect("should find stt provider");
2260        assert_eq!(entry.effective_name(), "quality");
2261        assert_eq!(entry.stt_model.as_deref(), Some("gpt-4o-mini-transcribe"));
2262    }
2263
2264    #[test]
2265    fn stt_provider_entry_auto_detect_when_provider_empty() {
2266        let cfg = parse_llm(
2267            r#"
2268[llm]
2269
2270[[llm.providers]]
2271type = "openai"
2272name = "openai-stt"
2273stt_model = "whisper-1"
2274
2275[llm.stt]
2276provider = ""
2277"#,
2278        );
2279        let entry = cfg.stt_provider_entry().expect("should auto-detect");
2280        assert_eq!(entry.effective_name(), "openai-stt");
2281    }
2282
2283    #[test]
2284    fn stt_provider_entry_auto_detect_no_stt_section() {
2285        let cfg = parse_llm(
2286            r#"
2287[llm]
2288
2289[[llm.providers]]
2290type = "openai"
2291name = "openai-stt"
2292stt_model = "whisper-1"
2293"#,
2294        );
2295        // No [llm.stt] section — should still find first provider with stt_model.
2296        let entry = cfg.stt_provider_entry().expect("should auto-detect");
2297        assert_eq!(entry.effective_name(), "openai-stt");
2298    }
2299
2300    #[test]
2301    fn stt_provider_entry_none_when_no_stt_model() {
2302        let cfg = parse_llm(
2303            r#"
2304[llm]
2305
2306[[llm.providers]]
2307type = "openai"
2308name = "quality"
2309model = "gpt-5.4"
2310"#,
2311        );
2312        assert!(cfg.stt_provider_entry().is_none());
2313    }
2314
2315    #[test]
2316    fn stt_provider_entry_name_mismatch_falls_back_to_none() {
2317        // Named provider exists but has no stt_model; another unnamed has stt_model.
2318        let cfg = parse_llm(
2319            r#"
2320[llm]
2321
2322[[llm.providers]]
2323type = "openai"
2324name = "quality"
2325model = "gpt-5.4"
2326
2327[[llm.providers]]
2328type = "openai"
2329name = "openai-stt"
2330stt_model = "whisper-1"
2331
2332[llm.stt]
2333provider = "quality"
2334"#,
2335        );
2336        // "quality" has no stt_model — returns None for name-based lookup.
2337        assert!(cfg.stt_provider_entry().is_none());
2338    }
2339
2340    #[test]
2341    fn stt_config_deserializes_new_slim_format() {
2342        let cfg = parse_llm(
2343            r#"
2344[llm]
2345
2346[[llm.providers]]
2347type = "openai"
2348name = "quality"
2349stt_model = "whisper-1"
2350
2351[llm.stt]
2352provider = "quality"
2353language = "en"
2354"#,
2355        );
2356        let stt = cfg.stt.as_ref().expect("stt section present");
2357        assert_eq!(stt.provider, "quality");
2358        assert_eq!(stt.language, "en");
2359    }
2360
2361    #[test]
2362    fn stt_config_default_provider_is_empty() {
2363        // Verify that W4 fix: default_stt_provider() returns "" not "whisper".
2364        assert_eq!(default_stt_provider(), "");
2365    }
2366
2367    #[test]
2368    fn validate_stt_missing_provider_ok() {
2369        let cfg = parse_llm("[llm]\n");
2370        assert!(cfg.validate_stt().is_ok());
2371    }
2372
2373    #[test]
2374    fn validate_stt_valid_reference() {
2375        let cfg = parse_llm(
2376            r#"
2377[llm]
2378
2379[[llm.providers]]
2380type = "openai"
2381name = "quality"
2382stt_model = "whisper-1"
2383
2384[llm.stt]
2385provider = "quality"
2386"#,
2387        );
2388        assert!(cfg.validate_stt().is_ok());
2389    }
2390
2391    #[test]
2392    fn validate_stt_nonexistent_provider_errors() {
2393        let cfg = parse_llm(
2394            r#"
2395[llm]
2396
2397[[llm.providers]]
2398type = "openai"
2399name = "quality"
2400model = "gpt-5.4"
2401
2402[llm.stt]
2403provider = "nonexistent"
2404"#,
2405        );
2406        assert!(cfg.validate_stt().is_err());
2407    }
2408
2409    #[test]
2410    fn validate_stt_provider_exists_but_no_stt_model_returns_ok_with_warn() {
2411        // MEDIUM: provider is found but has no stt_model — should return Ok (warn path, not error).
2412        let cfg = parse_llm(
2413            r#"
2414[llm]
2415
2416[[llm.providers]]
2417type = "openai"
2418name = "quality"
2419model = "gpt-5.4"
2420
2421[llm.stt]
2422provider = "quality"
2423"#,
2424        );
2425        // validate_stt must succeed (only a tracing::warn is emitted — not an error).
2426        assert!(cfg.validate_stt().is_ok());
2427        // stt_provider_entry must return None because no stt_model is set.
2428        assert!(
2429            cfg.stt_provider_entry().is_none(),
2430            "stt_provider_entry must be None when provider has no stt_model"
2431        );
2432    }
2433
2434    // ─── BanditConfig::warmup_queries deserialization ─────────────────────────
2435
2436    #[test]
2437    fn bandit_warmup_queries_explicit_value_is_deserialized() {
2438        let cfg = parse_llm(
2439            r#"
2440[llm]
2441
2442[llm.router]
2443strategy = "bandit"
2444
2445[llm.router.bandit]
2446warmup_queries = 50
2447"#,
2448        );
2449        let bandit = cfg
2450            .router
2451            .expect("router section must be present")
2452            .bandit
2453            .expect("bandit section must be present");
2454        assert_eq!(
2455            bandit.warmup_queries,
2456            Some(50),
2457            "warmup_queries = 50 must deserialize to Some(50)"
2458        );
2459    }
2460
2461    #[test]
2462    fn bandit_warmup_queries_explicit_null_is_none() {
2463        // Explicitly writing the field as absent: field simply not present is
2464        // equivalent due to #[serde(default)]. Test that an explicit 0 is Some(0).
2465        let cfg = parse_llm(
2466            r#"
2467[llm]
2468
2469[llm.router]
2470strategy = "bandit"
2471
2472[llm.router.bandit]
2473warmup_queries = 0
2474"#,
2475        );
2476        let bandit = cfg
2477            .router
2478            .expect("router section must be present")
2479            .bandit
2480            .expect("bandit section must be present");
2481        // 0 is a valid explicit value — it means "preserve computed default".
2482        assert_eq!(
2483            bandit.warmup_queries,
2484            Some(0),
2485            "warmup_queries = 0 must deserialize to Some(0)"
2486        );
2487    }
2488
2489    #[test]
2490    fn bandit_warmup_queries_missing_field_defaults_to_none() {
2491        // When warmup_queries is omitted entirely, #[serde(default)] must produce None.
2492        let cfg = parse_llm(
2493            r#"
2494[llm]
2495
2496[llm.router]
2497strategy = "bandit"
2498
2499[llm.router.bandit]
2500alpha = 1.5
2501"#,
2502        );
2503        let bandit = cfg
2504            .router
2505            .expect("router section must be present")
2506            .bandit
2507            .expect("bandit section must be present");
2508        assert_eq!(
2509            bandit.warmup_queries, None,
2510            "omitted warmup_queries must default to None"
2511        );
2512    }
2513
2514    #[test]
2515    fn provider_name_new_and_as_str() {
2516        let n = ProviderName::new("fast");
2517        assert_eq!(n.as_str(), "fast");
2518        assert!(!n.is_empty());
2519    }
2520
2521    #[test]
2522    fn provider_name_default_is_empty() {
2523        let n = ProviderName::default();
2524        assert!(n.is_empty());
2525        assert_eq!(n.as_str(), "");
2526    }
2527
2528    #[test]
2529    fn provider_name_deref_to_str() {
2530        let n = ProviderName::new("quality");
2531        let s: &str = &n;
2532        assert_eq!(s, "quality");
2533    }
2534
2535    #[test]
2536    fn provider_name_partial_eq_str() {
2537        let n = ProviderName::new("fast");
2538        assert_eq!(n, "fast");
2539        assert_ne!(n, "slow");
2540    }
2541
2542    #[test]
2543    fn provider_name_serde_roundtrip() {
2544        let n = ProviderName::new("my-provider");
2545        let json = serde_json::to_string(&n).expect("serialize");
2546        assert_eq!(json, "\"my-provider\"");
2547        let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2548        assert_eq!(back, n);
2549    }
2550
2551    #[test]
2552    fn provider_name_serde_empty_roundtrip() {
2553        let n = ProviderName::default();
2554        let json = serde_json::to_string(&n).expect("serialize");
2555        assert_eq!(json, "\"\"");
2556        let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2557        assert_eq!(back, n);
2558        assert!(back.is_empty());
2559    }
2560
2561    // ─── GonkaNode / ProviderKind::Gonka ─────────────────────────────────────
2562
2563    fn gonka_entry_with_nodes(nodes: Vec<GonkaNode>) -> ProviderEntry {
2564        ProviderEntry {
2565            provider_type: ProviderKind::Gonka,
2566            name: Some("my-gonka".into()),
2567            gonka_nodes: nodes,
2568            ..Default::default()
2569        }
2570    }
2571
2572    fn valid_gonka_nodes() -> Vec<GonkaNode> {
2573        vec![
2574            GonkaNode {
2575                url: "https://node1.gonka.ai".into(),
2576                address: "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6".into(),
2577                name: Some("node1".into()),
2578            },
2579            GonkaNode {
2580                url: "https://node2.gonka.ai".into(),
2581                address: "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum".into(),
2582                name: Some("node2".into()),
2583            },
2584            GonkaNode {
2585                url: "http://node3.internal".into(),
2586                address: "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg".into(),
2587                name: None,
2588            },
2589        ]
2590    }
2591
2592    #[test]
2593    fn validate_gonka_valid() {
2594        let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2595        assert!(entry.validate().is_ok());
2596    }
2597
2598    #[test]
2599    fn validate_gonka_empty_nodes_errors() {
2600        let entry = gonka_entry_with_nodes(vec![]);
2601        let err = entry.validate().unwrap_err();
2602        assert!(
2603            err.to_string().contains("gonka_nodes"),
2604            "error should mention gonka_nodes: {err}"
2605        );
2606    }
2607
2608    #[test]
2609    fn validate_gonka_node_empty_url_errors() {
2610        let entry = gonka_entry_with_nodes(vec![GonkaNode {
2611            url: String::new(),
2612            address: "gonka1test".into(),
2613            name: None,
2614        }]);
2615        let err = entry.validate().unwrap_err();
2616        assert!(err.to_string().contains("url"), "{err}");
2617    }
2618
2619    #[test]
2620    fn validate_gonka_node_invalid_scheme_errors() {
2621        let entry = gonka_entry_with_nodes(vec![GonkaNode {
2622            url: "ftp://node.gonka.ai".into(),
2623            address: "gonka1test".into(),
2624            name: None,
2625        }]);
2626        let err = entry.validate().unwrap_err();
2627        assert!(err.to_string().contains("http"), "{err}");
2628    }
2629
2630    #[test]
2631    fn validate_gonka_without_name_errors() {
2632        let entry = ProviderEntry {
2633            provider_type: ProviderKind::Gonka,
2634            name: None,
2635            gonka_nodes: valid_gonka_nodes(),
2636            ..Default::default()
2637        };
2638        let err = entry.validate().unwrap_err();
2639        assert!(err.to_string().contains("gonka"), "{err}");
2640    }
2641
2642    #[test]
2643    fn gonka_toml_round_trip() {
2644        let toml = r#"
2645[llm]
2646
2647[[llm.providers]]
2648type = "gonka"
2649name = "my-gonka"
2650gonka_chain_prefix = "custom-chain"
2651
2652[[llm.providers.gonka_nodes]]
2653url = "https://node1.gonka.ai"
2654address = "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2655name = "node1"
2656
2657[[llm.providers.gonka_nodes]]
2658url = "https://node2.gonka.ai"
2659address = "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum"
2660name = "node2"
2661
2662[[llm.providers.gonka_nodes]]
2663url = "https://node3.gonka.ai"
2664address = "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg"
2665"#;
2666        let cfg = parse_llm(toml);
2667        assert_eq!(cfg.providers.len(), 1);
2668        let entry = &cfg.providers[0];
2669        assert_eq!(entry.provider_type, ProviderKind::Gonka);
2670        assert_eq!(entry.name.as_deref(), Some("my-gonka"));
2671        let nodes = &entry.gonka_nodes;
2672        assert_eq!(nodes.len(), 3);
2673        assert_eq!(nodes[0].url, "https://node1.gonka.ai");
2674        assert_eq!(
2675            nodes[0].address,
2676            "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2677        );
2678        assert_eq!(nodes[0].name.as_deref(), Some("node1"));
2679        assert_eq!(nodes[2].name, None);
2680        assert_eq!(entry.gonka_chain_prefix.as_deref(), Some("custom-chain"));
2681    }
2682
2683    #[test]
2684    fn gonka_default_chain_prefix() {
2685        let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2686        assert_eq!(entry.effective_gonka_chain_prefix(), "gonka");
2687    }
2688
2689    #[test]
2690    fn gonka_explicit_chain_prefix() {
2691        let entry = ProviderEntry {
2692            provider_type: ProviderKind::Gonka,
2693            name: Some("my-gonka".into()),
2694            gonka_nodes: valid_gonka_nodes(),
2695            gonka_chain_prefix: Some("my-chain".into()),
2696            ..Default::default()
2697        };
2698        assert_eq!(entry.effective_gonka_chain_prefix(), "my-chain");
2699    }
2700
2701    #[test]
2702    fn effective_model_gonka_is_empty() {
2703        let entry = ProviderEntry {
2704            provider_type: ProviderKind::Gonka,
2705            model: None,
2706            ..Default::default()
2707        };
2708        assert_eq!(entry.effective_model(), "");
2709    }
2710
2711    #[test]
2712    fn existing_configs_still_parse() {
2713        let toml = r#"
2714[llm]
2715
2716[[llm.providers]]
2717type = "ollama"
2718model = "qwen3:8b"
2719
2720[[llm.providers]]
2721type = "claude"
2722name = "claude"
2723model = "claude-sonnet-4-6"
2724"#;
2725        let cfg = parse_llm(toml);
2726        assert_eq!(cfg.providers.len(), 2);
2727        assert_eq!(cfg.providers[0].provider_type, ProviderKind::Ollama);
2728        assert_eq!(cfg.providers[1].provider_type, ProviderKind::Claude);
2729    }
2730
2731    // ── ProviderEntry::validate — Cocoon URL and model validation ─────────────
2732
2733    fn cocoon_entry(url: Option<&str>, model: Option<&str>) -> ProviderEntry {
2734        ProviderEntry {
2735            provider_type: ProviderKind::Cocoon,
2736            name: Some("cocoon".into()),
2737            cocoon_client_url: url.map(str::to_owned),
2738            model: model.map(str::to_owned),
2739            ..Default::default()
2740        }
2741    }
2742
2743    #[test]
2744    fn test_cocoon_url_validation_accepts_http() {
2745        assert!(
2746            cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2747                .validate()
2748                .is_ok()
2749        );
2750    }
2751
2752    #[test]
2753    fn test_cocoon_url_validation_accepts_https_localhost() {
2754        assert!(
2755            cocoon_entry(Some("https://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2756                .validate()
2757                .is_ok()
2758        );
2759    }
2760
2761    #[test]
2762    fn test_cocoon_url_validation_rejects_non_localhost() {
2763        let err = cocoon_entry(Some("http://192.168.1.10:10000"), Some("Qwen/Qwen3-0.6B"))
2764            .validate()
2765            .unwrap_err();
2766        assert!(
2767            err.to_string().contains("localhost"),
2768            "error should mention localhost restriction: {err}"
2769        );
2770    }
2771
2772    #[test]
2773    fn test_cocoon_url_validation_rejects_non_http_scheme() {
2774        let err = cocoon_entry(Some("ftp://localhost"), Some("Qwen/Qwen3-0.6B"))
2775            .validate()
2776            .unwrap_err();
2777        assert!(
2778            err.to_string().contains("ftp"),
2779            "error should mention the bad scheme: {err}"
2780        );
2781    }
2782
2783    #[test]
2784    fn test_cocoon_url_validation_rejects_invalid_url() {
2785        let err = cocoon_entry(Some("not-a-url"), Some("Qwen/Qwen3-0.6B"))
2786            .validate()
2787            .unwrap_err();
2788        assert!(
2789            err.to_string().contains("not-a-url"),
2790            "error should mention the bad value: {err}"
2791        );
2792    }
2793
2794    #[test]
2795    fn test_cocoon_url_none_passes() {
2796        assert!(
2797            cocoon_entry(None, Some("Qwen/Qwen3-0.6B"))
2798                .validate()
2799                .is_ok()
2800        );
2801    }
2802
2803    #[test]
2804    fn test_cocoon_model_empty_rejected() {
2805        let err = cocoon_entry(Some("http://localhost:10000"), Some(""))
2806            .validate()
2807            .unwrap_err();
2808        assert!(
2809            err.to_string().contains("empty"),
2810            "error should mention 'empty': {err}"
2811        );
2812    }
2813
2814    #[test]
2815    fn test_cocoon_model_none_passes() {
2816        assert!(
2817            cocoon_entry(Some("http://localhost:10000"), None)
2818                .validate()
2819                .is_ok()
2820        );
2821    }
2822
2823    #[test]
2824    fn validate_cocoon_pricing_negative_prompt_errors() {
2825        let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2826        e.cocoon_pricing = Some(CocoonPricing {
2827            prompt_cents_per_1k: -1.0,
2828            completion_cents_per_1k: 0.03,
2829        });
2830        assert!(e.validate().is_err());
2831    }
2832
2833    #[test]
2834    fn validate_cocoon_pricing_negative_completion_errors() {
2835        let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2836        e.cocoon_pricing = Some(CocoonPricing {
2837            prompt_cents_per_1k: 0.01,
2838            completion_cents_per_1k: -0.5,
2839        });
2840        assert!(e.validate().is_err());
2841    }
2842
2843    #[test]
2844    fn validate_cocoon_pricing_valid_passes() {
2845        let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2846        e.cocoon_pricing = Some(CocoonPricing {
2847            prompt_cents_per_1k: 0.01,
2848            completion_cents_per_1k: 0.03,
2849        });
2850        assert!(e.validate().is_ok());
2851    }
2852}
zeph_config/providers.rs

zeph_config/
providers.rs