Skip to main content

zeph_config/
providers.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use serde::{Deserialize, Serialize};
5
6// ── LLM provider config types (moved from zeph-llm) ─────────────────────────
7
8/// Extended or adaptive thinking mode for Claude.
9///
10/// Serializes with `mode` as tag:
11/// `{ "mode": "extended", "budget_tokens": 10000 }` or `{ "mode": "adaptive" }`.
12#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
13#[serde(tag = "mode", rename_all = "snake_case")]
14pub enum ThinkingConfig {
15    /// Extended thinking with an explicit token budget.
16    Extended {
17        /// Maximum thinking tokens to allocate.
18        budget_tokens: u32,
19    },
20    /// Adaptive thinking that selects effort automatically.
21    Adaptive {
22        /// Explicit effort hint when provided; model-chosen when `None`.
23        #[serde(default, skip_serializing_if = "Option::is_none")]
24        effort: Option<ThinkingEffort>,
25    },
26}
27
28/// Effort level for adaptive thinking.
29#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
30#[serde(rename_all = "lowercase")]
31#[non_exhaustive]
32pub enum ThinkingEffort {
33    /// Minimal thinking; fastest responses.
34    Low,
35    /// Balanced thinking depth. This is the default.
36    #[default]
37    Medium,
38    /// Maximum thinking depth; slowest responses.
39    High,
40}
41
42/// Prompt-cache TTL variant for the Anthropic API.
43///
44/// When used as a TOML config value the accepted strings are `"ephemeral"` and `"1h"`.
45/// On the wire (Anthropic API), `OneHour` serializes as `"1h"` inside the `cache_control.ttl`
46/// field.
47#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Default)]
48#[serde(rename_all = "snake_case")]
49pub enum CacheTtl {
50    /// Default ephemeral TTL (~5 minutes). No beta header required.
51    #[default]
52    Ephemeral,
53    /// Extended 1-hour TTL. Requires the `extended-cache-ttl-2025-04-25` beta header.
54    /// Cache writes cost approximately 2× more than `Ephemeral`.
55    #[serde(rename = "1h")]
56    OneHour,
57}
58
59impl CacheTtl {
60    /// Returns `true` when this TTL variant requires the `extended-cache-ttl-2025-04-25` beta
61    /// header to be sent with each request.
62    #[must_use]
63    pub fn requires_beta(self) -> bool {
64        match self {
65            Self::OneHour => true,
66            Self::Ephemeral => false,
67        }
68    }
69}
70
71/// Thinking level for Gemini models that support extended reasoning.
72///
73/// Maps to `generationConfig.thinkingConfig.thinkingLevel` in the Gemini API.
74/// Valid for Gemini 3+ models. For Gemini 2.5, use `thinking_budget` instead.
75#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
76#[serde(rename_all = "lowercase")]
77#[non_exhaustive]
78pub enum GeminiThinkingLevel {
79    /// Minimal reasoning pass.
80    Minimal,
81    /// Low reasoning depth.
82    Low,
83    /// Medium reasoning depth.
84    Medium,
85    /// Full reasoning depth.
86    High,
87}
88
89pub use zeph_common::ProviderName;
90
91fn default_response_cache_ttl_secs() -> u64 {
92    3600
93}
94
95fn default_semantic_cache_threshold() -> f32 {
96    0.95
97}
98
99fn default_semantic_cache_max_candidates() -> u32 {
100    10
101}
102
103fn default_router_ema_alpha() -> f64 {
104    0.1
105}
106
107fn default_router_reorder_interval() -> u64 {
108    10
109}
110
111fn default_embedding_model() -> String {
112    "qwen3-embedding".into()
113}
114
115fn default_candle_source() -> String {
116    "huggingface".into()
117}
118
119fn default_chat_template() -> String {
120    "chatml".into()
121}
122
123fn default_candle_device() -> String {
124    "cpu".into()
125}
126
127fn default_temperature() -> f64 {
128    0.7
129}
130
131fn default_max_tokens() -> usize {
132    2048
133}
134
135fn default_seed() -> u64 {
136    42
137}
138
139fn default_repeat_penalty() -> f32 {
140    1.1
141}
142
143fn default_repeat_last_n() -> usize {
144    64
145}
146
147fn default_cascade_quality_threshold() -> f64 {
148    0.5
149}
150
151fn default_cascade_max_escalations() -> u8 {
152    2
153}
154
155fn default_cascade_window_size() -> usize {
156    50
157}
158
159fn default_cascade_judge_timeout_ms() -> u64 {
160    5_000
161}
162
163fn default_reputation_decay_factor() -> f64 {
164    0.95
165}
166
167fn default_reputation_weight() -> f64 {
168    0.3
169}
170
171fn default_reputation_min_observations() -> u64 {
172    5
173}
174
175/// Returns the default STT provider name (empty string — auto-detect).
176#[must_use]
177pub fn default_stt_provider() -> String {
178    String::new()
179}
180
181/// Returns the default STT transcription language hint (`"auto"`).
182#[must_use]
183pub fn default_stt_language() -> String {
184    "auto".into()
185}
186
187/// Returns the default embedding model name used by `[llm] embedding_model`.
188#[must_use]
189pub(crate) fn get_default_embedding_model() -> String {
190    default_embedding_model()
191}
192
193/// Returns the default response cache TTL in seconds.
194#[must_use]
195pub(crate) fn get_default_response_cache_ttl_secs() -> u64 {
196    default_response_cache_ttl_secs()
197}
198
199/// Returns the default EMA alpha for the router latency estimator.
200#[must_use]
201pub(crate) fn get_default_router_ema_alpha() -> f64 {
202    default_router_ema_alpha()
203}
204
205/// Returns the default router reorder interval (turns between provider re-ranking).
206#[must_use]
207pub(crate) fn get_default_router_reorder_interval() -> u64 {
208    default_router_reorder_interval()
209}
210
211/// LLM provider backend selector.
212///
213/// Used in `[[llm.providers]]` entries as the `type` field.
214///
215/// # Example (TOML)
216///
217/// ```toml
218/// [[llm.providers]]
219/// type = "openai"
220/// model = "gpt-4o"
221/// name = "quality"
222/// ```
223#[non_exhaustive]
224#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
225#[serde(rename_all = "lowercase")]
226pub enum ProviderKind {
227    /// Local Ollama server (default base URL: `http://localhost:11434`).
228    Ollama,
229    /// Anthropic Claude API.
230    Claude,
231    /// `OpenAI` API.
232    OpenAi,
233    /// Google Gemini API.
234    Gemini,
235    /// Local Candle inference (CPU/GPU, no external server required).
236    Candle,
237    /// OpenAI-compatible third-party API (e.g. Groq, Together AI, LM Studio).
238    Compatible,
239    /// Native Gonka blockchain provider.
240    Gonka,
241    /// Cocoon confidential compute network via localhost sidecar.
242    Cocoon,
243}
244
245impl ProviderKind {
246    /// Return the lowercase string identifier for this provider kind.
247    ///
248    /// # Examples
249    ///
250    /// ```
251    /// use zeph_config::ProviderKind;
252    ///
253    /// assert_eq!(ProviderKind::Claude.as_str(), "claude");
254    /// assert_eq!(ProviderKind::OpenAi.as_str(), "openai");
255    /// ```
256    #[must_use]
257    pub fn as_str(self) -> &'static str {
258        match self {
259            Self::Ollama => "ollama",
260            Self::Claude => "claude",
261            Self::OpenAi => "openai",
262            Self::Gemini => "gemini",
263            Self::Candle => "candle",
264            Self::Compatible => "compatible",
265            Self::Gonka => "gonka",
266            Self::Cocoon => "cocoon",
267        }
268    }
269}
270
271impl std::fmt::Display for ProviderKind {
272    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
273        f.write_str(self.as_str())
274    }
275}
276
277/// LLM configuration, nested under `[llm]` in TOML.
278///
279/// Declares the provider pool and controls routing, embedding, caching, and STT.
280/// All providers are declared in `[[llm.providers]]`; subsystems reference them by
281/// the `name` field using a `*_provider` config key.
282///
283/// # Example (TOML)
284///
285/// ```toml
286/// [[llm.providers]]
287/// name = "fast"
288/// type = "openai"
289/// model = "gpt-4o-mini"
290///
291/// [[llm.providers]]
292/// name = "quality"
293/// type = "claude"
294/// model = "claude-opus-4-5"
295///
296/// [llm]
297/// routing = "none"
298/// embedding_model = "qwen3-embedding"
299/// ```
300#[derive(Debug, Deserialize, Serialize)]
301pub struct LlmConfig {
302    /// Provider pool. First entry is default unless one is marked `default = true`.
303    #[serde(default, skip_serializing_if = "Vec::is_empty")]
304    pub providers: Vec<ProviderEntry>,
305
306    /// Routing strategy for multi-provider configs.
307    #[serde(default, skip_serializing_if = "is_routing_none")]
308    pub routing: LlmRoutingStrategy,
309
310    #[serde(default = "default_embedding_model_opt")]
311    pub embedding_model: String,
312    #[serde(default, skip_serializing_if = "Option::is_none")]
313    pub candle: Option<CandleConfig>,
314    #[serde(default)]
315    pub stt: Option<SttConfig>,
316    #[serde(default)]
317    pub response_cache_enabled: bool,
318    #[serde(default = "default_response_cache_ttl_secs")]
319    pub response_cache_ttl_secs: u64,
320    /// Enable semantic similarity-based response caching. Requires embedding support.
321    #[serde(default)]
322    pub semantic_cache_enabled: bool,
323    /// Cosine similarity threshold for semantic cache hits (0.0–1.0).
324    ///
325    /// Only the highest-scoring candidate above this threshold is returned.
326    /// Lower values produce more cache hits but risk returning less relevant responses.
327    /// Recommended range: 0.92–0.98; default: 0.95.
328    #[serde(default = "default_semantic_cache_threshold")]
329    pub semantic_cache_threshold: f32,
330    /// Maximum cached entries to examine per semantic lookup (SQL `LIMIT` clause in
331    /// `ResponseCache::get_semantic()`). Controls the recall-vs-performance tradeoff:
332    ///
333    /// - **Higher values** (e.g. 50): scan more entries, better chance of finding a
334    ///   semantically similar cached response, but slower queries.
335    /// - **Lower values** (e.g. 5): faster queries, but may miss relevant cached entries
336    ///   when the cache is large.
337    /// - **Default (10)**: balanced middle ground for typical workloads.
338    ///
339    /// Tuning guidance: set to 50+ when recall matters more than latency (e.g. long-running
340    /// sessions with many cached responses); reduce to 5 for low-latency interactive use.
341    /// Env override: `ZEPH_LLM_SEMANTIC_CACHE_MAX_CANDIDATES`.
342    #[serde(default = "default_semantic_cache_max_candidates")]
343    pub semantic_cache_max_candidates: u32,
344    #[serde(default)]
345    pub router_ema_enabled: bool,
346    #[serde(default = "default_router_ema_alpha")]
347    pub router_ema_alpha: f64,
348    #[serde(default = "default_router_reorder_interval")]
349    pub router_reorder_interval: u64,
350    /// Routing configuration for Thompson/Cascade strategies.
351    #[serde(default, skip_serializing_if = "Option::is_none")]
352    pub router: Option<RouterConfig>,
353    /// Provider-specific instruction file to inject into the system prompt.
354    /// Merged with `agent.instruction_files` at startup.
355    #[serde(default, skip_serializing_if = "Option::is_none")]
356    pub instruction_file: Option<std::path::PathBuf>,
357    /// Shorthand model spec for tool-pair summarization and context compaction.
358    /// Format: `ollama/<model>`, `claude[/<model>]`, `openai[/<model>]`, `compatible/<name>`, `candle`.
359    /// Ignored when `[llm.summary_provider]` is set.
360    #[serde(default, skip_serializing_if = "Option::is_none")]
361    pub summary_model: Option<String>,
362    /// Structured provider config for summarization. Takes precedence over `summary_model`.
363    #[serde(default, skip_serializing_if = "Option::is_none")]
364    pub summary_provider: Option<ProviderEntry>,
365
366    /// Complexity triage routing configuration. Required when `routing = "triage"`.
367    #[serde(default, skip_serializing_if = "Option::is_none")]
368    pub complexity_routing: Option<ComplexityRoutingConfig>,
369
370    /// Collaborative Entropy (`CoE`) configuration. `None` = `CoE` disabled.
371    #[serde(default, skip_serializing_if = "Option::is_none")]
372    pub coe: Option<CoeConfig>,
373}
374
375fn default_embedding_model_opt() -> String {
376    default_embedding_model()
377}
378
379#[allow(clippy::trivially_copy_pass_by_ref)]
380fn is_routing_none(s: &LlmRoutingStrategy) -> bool {
381    *s == LlmRoutingStrategy::None
382}
383
384impl LlmConfig {
385    /// Effective provider kind for the primary (first/default) provider in the pool.
386    #[must_use]
387    pub fn effective_provider(&self) -> ProviderKind {
388        self.providers
389            .first()
390            .map_or(ProviderKind::Ollama, |e| e.provider_type)
391    }
392
393    /// Effective base URL for the primary provider.
394    #[must_use]
395    pub fn effective_base_url(&self) -> &str {
396        self.providers
397            .first()
398            .and_then(|e| e.base_url.as_deref())
399            .unwrap_or("http://localhost:11434")
400    }
401
402    /// Effective model for the primary chat-capable provider.
403    ///
404    /// Skips embed-only entries (those with `embed = true`) and returns the model of the
405    /// first provider that can handle chat requests. Falls back to `"qwen3:8b"` when no
406    /// chat-capable provider is configured.
407    #[must_use]
408    pub fn effective_model(&self) -> &str {
409        self.providers
410            .iter()
411            .find(|e| !e.embed)
412            .and_then(|e| e.model.as_deref())
413            .unwrap_or("qwen3:8b")
414    }
415
416    /// Find the provider entry designated for STT.
417    ///
418    /// Resolution priority:
419    /// 1. `[llm.stt].provider` matches `[[llm.providers]].name` and the entry has `stt_model`
420    /// 2. `[llm.stt].provider` is empty — fall through to auto-detect
421    /// 3. First provider with `stt_model` set (auto-detect fallback)
422    /// 4. `None` — STT disabled
423    #[must_use]
424    pub fn stt_provider_entry(&self) -> Option<&ProviderEntry> {
425        let name_hint = self.stt.as_ref().map_or("", |s| s.provider.as_str());
426        if name_hint.is_empty() {
427            self.providers.iter().find(|p| p.stt_model.is_some())
428        } else {
429            self.providers
430                .iter()
431                .find(|p| p.effective_name() == name_hint && p.stt_model.is_some())
432        }
433    }
434
435    /// Validate that the config uses the new `[[llm.providers]]` format.
436    ///
437    /// # Errors
438    ///
439    /// Returns `ConfigError::Validation` when no providers are configured.
440    pub fn check_legacy_format(&self) -> Result<(), crate::error::ConfigError> {
441        Ok(())
442    }
443
444    /// Validate STT config cross-references.
445    ///
446    /// # Errors
447    ///
448    /// Returns `ConfigError::Validation` when the referenced STT provider does not exist.
449    pub fn validate_stt(&self) -> Result<(), crate::error::ConfigError> {
450        use crate::error::ConfigError;
451
452        let Some(stt) = &self.stt else {
453            return Ok(());
454        };
455        if stt.provider.is_empty() {
456            return Ok(());
457        }
458        let found = self
459            .providers
460            .iter()
461            .find(|p| p.effective_name() == stt.provider);
462        match found {
463            None => {
464                return Err(ConfigError::Validation(format!(
465                    "[llm.stt].provider = {:?} does not match any [[llm.providers]] entry",
466                    stt.provider
467                )));
468            }
469            Some(entry) if entry.stt_model.is_none() => {
470                tracing::warn!(
471                    provider = stt.provider,
472                    "[[llm.providers]] entry exists but has no `stt_model` — STT will not be activated"
473                );
474            }
475            _ => {}
476        }
477        Ok(())
478    }
479
480    /// Resolve `provider_name` to its model string and emit a startup warning when the
481    /// model does not look like a fast-tier model.
482    ///
483    /// **Soft check — never returns an error.** Misconfiguration produces a single
484    /// `tracing::warn!` at startup so operators can fix configs without being blocked.
485    ///
486    /// Rules:
487    /// - Empty `provider_name` → silently OK (caller will use the primary provider).
488    /// - Provider not found in pool → warns `"<label> provider '<name>' not found"`.
489    /// - Model resolved but not in `FAST_TIER_MODEL_HINTS` and not in `extra_allowlist` →
490    ///   warns `"<label> provider '<name>' uses '<model>' which may not be fast-tier"`.
491    /// - Model matches a hint or allowlist entry → silently OK.
492    ///
493    /// # Examples
494    ///
495    /// ```no_run
496    /// use zeph_config::providers::{LlmConfig, ProviderName};
497    ///
498    /// // LlmConfig is constructed via config file; here we illustrate the call shape.
499    /// # let cfg: LlmConfig = unimplemented!();
500    /// // empty provider name is silently ok
501    /// cfg.warn_non_fast_tier_provider(&ProviderName::default(), "memcot.distill_provider", &[]);
502    /// ```
503    pub fn warn_non_fast_tier_provider(
504        &self,
505        provider_name: &ProviderName,
506        feature_label: &str,
507        extra_allowlist: &[String],
508    ) {
509        if provider_name.is_empty() {
510            return;
511        }
512        let name = provider_name.as_str();
513        let Some(entry) = self.providers.iter().find(|p| p.effective_name() == name) else {
514            tracing::warn!(
515                provider = name,
516                "{feature_label} provider '{name}' not found in [[llm.providers]]"
517            );
518            return;
519        };
520        let model = entry.model.as_deref().unwrap_or("");
521        if model.is_empty() {
522            return;
523        }
524        let lower = model.to_lowercase();
525        let in_hints = FAST_TIER_MODEL_HINTS.iter().any(|h| lower.contains(h));
526        let in_extra = extra_allowlist.iter().any(|h| lower.contains(h.as_str()));
527        if !in_hints && !in_extra {
528            tracing::warn!(
529                provider = name,
530                actual = model,
531                "{feature_label} provider '{name}' uses model '{model}' \
532                 which may not be fast-tier; prefer a fast model to bound distillation cost"
533            );
534        }
535    }
536}
537
538/// Lowercased substrings that identify commonly accepted fast-tier models.
539///
540/// Used by [`LlmConfig::warn_non_fast_tier_provider`] for a soft startup check.
541/// Updating this list is non-breaking; missing a fast model only suppresses a warning.
542pub const FAST_TIER_MODEL_HINTS: &[&str] = &[
543    "gpt-4o-mini",
544    "gpt-4.1-mini",
545    "gpt-5-mini",
546    "gpt-5-nano",
547    "claude-haiku",
548    "claude-3-haiku",
549    "claude-3-5-haiku",
550    "qwen3:8b",
551    "qwen2.5:7b",
552    "qwen2:7b",
553    "llama3.2:3b",
554    "llama3.1:8b",
555    "gemma3:4b",
556    "gemma3:8b",
557    "phi4:mini",
558    "mistral:7b",
559];
560
561/// Speech-to-text configuration, nested under `[llm.stt]` in TOML.
562///
563/// When set, Zeph uses the referenced provider for voice transcription.
564/// The provider must have an `stt_model` field set in its `[[llm.providers]]` entry.
565///
566/// # Example (TOML)
567///
568/// ```toml
569/// [llm.stt]
570/// provider = "fast"
571/// language = "en"
572/// ```
573#[derive(Debug, Clone, Deserialize, Serialize)]
574pub struct SttConfig {
575    /// Provider name from `[[llm.providers]]`. Empty string means auto-detect first provider
576    /// with `stt_model` set.
577    #[serde(default = "default_stt_provider")]
578    pub provider: String,
579    /// Language hint for transcription (e.g. `"en"`, `"auto"`).
580    #[serde(default = "default_stt_language")]
581    pub language: String,
582}
583
584/// Routing strategy selection for multi-provider routing.
585#[non_exhaustive]
586#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
587#[serde(rename_all = "lowercase")]
588pub enum RouterStrategyConfig {
589    /// Exponential moving average latency-aware ordering.
590    #[default]
591    Ema,
592    /// Thompson Sampling with Beta distributions (persistence-backed).
593    Thompson,
594    /// Cascade routing: try cheapest provider first, escalate on degenerate output.
595    Cascade,
596    /// PILOT: `LinUCB` contextual bandit with online learning and cost-aware reward.
597    Bandit,
598}
599
600/// Agent Stability Index (ASI) configuration.
601///
602/// Tracks per-provider response coherence via a sliding window of response embeddings.
603/// When coherence drops below `coherence_threshold`, the provider's routing prior is
604/// penalized by `penalty_weight`. Disabled by default; session-only (no persistence).
605///
606/// # Known Limitation
607///
608/// ASI embeddings are computed in a background `tokio::spawn` task after the response is
609/// returned to the caller. Under high request rates, the coherence score used for routing
610/// may lag 1–2 responses behind due to this fire-and-forget design. With the default
611/// `window = 5`, this lag is tolerable — coherence is a slow-moving signal.
612#[derive(Debug, Clone, Deserialize, Serialize)]
613pub struct AsiConfig {
614    /// Enable ASI coherence tracking. Default: false.
615    #[serde(default)]
616    pub enabled: bool,
617
618    /// Sliding window size for response embeddings per provider. Default: 5.
619    #[serde(default = "default_asi_window")]
620    pub window: usize,
621
622    /// Coherence score [0.0, 1.0] below which the provider is penalized. Default: 0.7.
623    #[serde(default = "default_asi_coherence_threshold")]
624    pub coherence_threshold: f32,
625
626    /// Penalty weight applied to Thompson beta / EMA score on low coherence. Default: 0.3.
627    ///
628    /// For Thompson, this shifts the beta prior: `beta += penalty_weight * (threshold - coherence)`.
629    /// For EMA, the score is multiplied by `max(0.5, coherence / threshold)`.
630    #[serde(default = "default_asi_penalty_weight")]
631    pub penalty_weight: f32,
632}
633
634fn default_asi_window() -> usize {
635    5
636}
637
638fn default_asi_coherence_threshold() -> f32 {
639    0.7
640}
641
642fn default_asi_penalty_weight() -> f32 {
643    0.3
644}
645
646impl Default for AsiConfig {
647    fn default() -> Self {
648        Self {
649            enabled: false,
650            window: default_asi_window(),
651            coherence_threshold: default_asi_coherence_threshold(),
652            penalty_weight: default_asi_penalty_weight(),
653        }
654    }
655}
656
657/// Routing configuration for multi-provider setups.
658#[derive(Debug, Clone, Deserialize, Serialize)]
659pub struct RouterConfig {
660    /// Routing strategy: `"ema"` (default), `"thompson"`, `"cascade"`, or `"bandit"`.
661    #[serde(default)]
662    pub strategy: RouterStrategyConfig,
663    /// Path for persisting Thompson Sampling state. Defaults to `~/.zeph/router_thompson_state.json`.
664    ///
665    /// # Security
666    ///
667    /// This path is user-controlled. The application writes and reads a JSON file at
668    /// this location. Ensure the path is within a directory that is not world-writable
669    /// (e.g., avoid `/tmp`). The file is created with mode `0o600` on Unix.
670    #[serde(default)]
671    pub thompson_state_path: Option<String>,
672    /// Cascade routing configuration. Only used when `strategy = "cascade"`.
673    #[serde(default)]
674    pub cascade: Option<CascadeConfig>,
675    /// Bayesian reputation scoring configuration (RAPS). Disabled by default.
676    #[serde(default)]
677    pub reputation: Option<ReputationConfig>,
678    /// PILOT bandit routing configuration. Only used when `strategy = "bandit"`.
679    #[serde(default)]
680    pub bandit: Option<BanditConfig>,
681    /// Embedding-based quality gate threshold for Thompson/EMA routing. Default: disabled.
682    ///
683    /// When set, after provider selection, the cosine similarity between the query embedding
684    /// and the response embedding is computed. If below this threshold, the next provider in
685    /// the ordered list is tried. On exhaustion, the best response seen is returned.
686    ///
687    /// Only applies to Thompson and EMA strategies. Cascade uses its own quality classifier.
688    /// Fail-open: embedding errors disable the gate for that request.
689    #[serde(default)]
690    pub quality_gate: Option<f32>,
691    /// Agent Stability Index configuration. Disabled by default.
692    #[serde(default)]
693    pub asi: Option<AsiConfig>,
694    /// Maximum number of concurrent `embed_batch` calls through the router.
695    ///
696    /// Limits simultaneous embedding HTTP requests to prevent provider rate-limiting
697    /// and memory pressure during indexing or high-frequency recall. Default: 4.
698    /// Set to 0 to disable the semaphore (unlimited concurrency).
699    #[serde(default = "default_embed_concurrency")]
700    pub embed_concurrency: usize,
701}
702
703fn default_embed_concurrency() -> usize {
704    4
705}
706
707/// Configuration for Bayesian reputation scoring (RAPS — Reputation-Adjusted Provider Selection).
708///
709/// When enabled, quality outcomes from tool execution shift the routing scores over time,
710/// giving an advantage to providers that consistently produce valid tool arguments.
711///
712/// Default: disabled. Set `enabled = true` to activate.
713#[derive(Debug, Clone, Deserialize, Serialize)]
714pub struct ReputationConfig {
715    /// Enable reputation scoring. Default: false.
716    #[serde(default)]
717    pub enabled: bool,
718    /// Session-level decay factor applied on each load. Range: (0.0, 1.0]. Default: 0.95.
719    /// Lower values make reputation forget faster; 1.0 = no decay.
720    #[serde(default = "default_reputation_decay_factor")]
721    pub decay_factor: f64,
722    /// Weight of reputation in routing score blend. Range: [0.0, 1.0]. Default: 0.3.
723    ///
724    /// **Warning**: values above 0.5 can aggressively suppress low-reputation providers.
725    /// At `weight = 1.0` with `rep_factor = 0.0` (all failures), the routing score
726    /// drops to zero — the provider becomes unreachable for that session. Stick to
727    /// the default (0.3) unless you intentionally want strong reputation gating.
728    #[serde(default = "default_reputation_weight")]
729    pub weight: f64,
730    /// Minimum quality observations before reputation influences routing. Default: 5.
731    #[serde(default = "default_reputation_min_observations")]
732    pub min_observations: u64,
733    /// Path for persisting reputation state. Defaults to `~/.config/zeph/router_reputation_state.json`.
734    #[serde(default)]
735    pub state_path: Option<String>,
736}
737
738/// Configuration for cascade routing (`strategy = "cascade"`).
739///
740/// Cascade routing tries providers in chain order (cheapest first), escalating to
741/// the next provider when the response is classified as degenerate (empty, repetitive,
742/// incoherent). Chain order determines cost order: first provider = cheapest.
743///
744/// # Limitations
745///
746/// The heuristic classifier detects degenerate outputs only, not semantic failures.
747/// Use `classifier_mode = "judge"` for semantic quality gating (adds LLM call cost).
748#[derive(Debug, Clone, Deserialize, Serialize)]
749pub struct CascadeConfig {
750    /// Minimum quality score [0.0, 1.0] to accept a response without escalating.
751    /// Responses scoring below this threshold trigger escalation.
752    #[serde(default = "default_cascade_quality_threshold")]
753    pub quality_threshold: f64,
754
755    /// Maximum number of quality-based escalations per request.
756    /// Network/API errors do not count against this budget.
757    /// Default: 2 (allows up to 3 providers: cheap → mid → expensive).
758    #[serde(default = "default_cascade_max_escalations")]
759    pub max_escalations: u8,
760
761    /// Quality classifier mode: `"heuristic"` (default) or `"judge"`.
762    /// Heuristic is zero-cost but detects only degenerate outputs.
763    /// Judge requires a configured `summary_model` and adds one LLM call per evaluation.
764    #[serde(default)]
765    pub classifier_mode: CascadeClassifierMode,
766
767    /// Rolling quality history window size per provider. Default: 50.
768    #[serde(default = "default_cascade_window_size")]
769    pub window_size: usize,
770
771    /// Maximum cumulative input+output tokens across all escalation levels.
772    /// When exceeded, returns the best-seen response instead of escalating further.
773    /// `None` disables the budget (unbounded escalation cost).
774    #[serde(default)]
775    pub max_cascade_tokens: Option<u32>,
776
777    /// Explicit cost ordering of provider names (cheapest first).
778    /// When set, cascade routing sorts providers by their position in this list before
779    /// trying them. Providers not in the list are appended after listed ones in their
780    /// original chain order. When unset, chain order is used (default behavior).
781    #[serde(default, skip_serializing_if = "Option::is_none")]
782    pub cost_tiers: Option<Vec<String>>,
783
784    /// Hard timeout for the judge LLM call (milliseconds).
785    /// If the judge does not respond within this budget, the call is treated as a failure
786    /// and heuristic scoring is used instead. Default: 5000 (5 s).
787    #[serde(default = "default_cascade_judge_timeout_ms")]
788    pub judge_timeout_ms: u64,
789}
790
791impl Default for CascadeConfig {
792    fn default() -> Self {
793        Self {
794            quality_threshold: default_cascade_quality_threshold(),
795            max_escalations: default_cascade_max_escalations(),
796            classifier_mode: CascadeClassifierMode::default(),
797            window_size: default_cascade_window_size(),
798            max_cascade_tokens: None,
799            cost_tiers: None,
800            judge_timeout_ms: default_cascade_judge_timeout_ms(),
801        }
802    }
803}
804
805/// Quality classifier mode for cascade routing.
806#[non_exhaustive]
807#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
808#[serde(rename_all = "lowercase")]
809pub enum CascadeClassifierMode {
810    /// Zero-cost heuristic: detects degenerate outputs (empty, repetitive, incoherent).
811    /// Does not detect semantic failures (hallucinations, wrong answers).
812    #[default]
813    Heuristic,
814    /// LLM-based judge: more accurate but adds latency. Falls back to heuristic on failure.
815    /// Requires `summary_model` to be configured.
816    Judge,
817}
818
819fn default_bandit_alpha() -> f32 {
820    1.0
821}
822
823fn default_bandit_dim() -> usize {
824    32
825}
826
827fn default_bandit_cost_weight() -> f32 {
828    0.1
829}
830
831fn default_bandit_decay_factor() -> f32 {
832    1.0
833}
834
835fn default_bandit_embedding_timeout_ms() -> u64 {
836    50
837}
838
839fn default_bandit_cache_size() -> usize {
840    512
841}
842
843/// Configuration for PILOT bandit routing (`strategy = "bandit"`).
844///
845/// PILOT (Provider Intelligence via Learned Online Tuning) uses a `LinUCB` contextual
846/// bandit to learn which provider performs best for a given query context. The feature
847/// vector is derived from the query embedding (first `dim` components, L2-normalised).
848///
849/// **Cold start**: the bandit falls back to Thompson sampling for the first
850/// `10 * num_providers` queries (configurable). After warmup, `LinUCB` takes over.
851///
852/// **Embedding**: an `embedding_provider` must be set for feature vectors. If the embed
853/// call exceeds `embedding_timeout_ms` or fails, the bandit falls back to Thompson/uniform.
854/// Use a local provider (Ollama, Candle) to avoid network latency on the hot path.
855#[derive(Debug, Clone, Deserialize, Serialize)]
856pub struct BanditConfig {
857    /// `LinUCB` exploration parameter. Default: 1.0.
858    /// Higher values increase exploration; lower values favour exploitation.
859    #[serde(default = "default_bandit_alpha")]
860    pub alpha: f32,
861
862    /// Feature vector dimension (first `dim` components of the embedding).
863    ///
864    /// This is simple truncation, not PCA. The first raw embedding dimensions do not
865    /// necessarily capture the most variance. For `OpenAI` `text-embedding-3-*` models,
866    /// consider using the `dimensions` API parameter (Matryoshka embeddings) instead.
867    /// Default: 32.
868    #[serde(default = "default_bandit_dim")]
869    pub dim: usize,
870
871    /// Cost penalty weight in the reward signal: `reward = quality - cost_weight * cost_fraction`.
872    /// Default: 0.1. Increase to penalise expensive providers more aggressively.
873    #[serde(default = "default_bandit_cost_weight")]
874    pub cost_weight: f32,
875
876    /// Session-level decay applied to arm state on startup: `A = I + decay*(A-I)`, `b = decay*b`.
877    /// Values < 1.0 cause re-exploration after provider quality changes. Default: 1.0 (no decay).
878    #[serde(default = "default_bandit_decay_factor")]
879    pub decay_factor: f32,
880
881    /// Provider name from `[[llm.providers]]` used for query embeddings.
882    ///
883    /// SLM recommended: prefer a fast local model (e.g. Ollama `nomic-embed-text`,
884    /// Candle, or `text-embedding-3-small`) — this is called on every bandit request.
885    /// Empty string disables `LinUCB` (bandit always falls back to Thompson/uniform).
886    #[serde(default)]
887    pub embedding_provider: ProviderName,
888
889    /// Hard timeout for the embedding call in milliseconds. Default: 50.
890    /// If exceeded, the request falls back to Thompson/uniform selection.
891    #[serde(default = "default_bandit_embedding_timeout_ms")]
892    pub embedding_timeout_ms: u64,
893
894    /// Maximum cached embeddings (keyed by query text hash). Default: 512.
895    #[serde(default = "default_bandit_cache_size")]
896    pub cache_size: usize,
897
898    /// Path for persisting bandit state. Defaults to `~/.config/zeph/router_bandit_state.json`.
899    ///
900    /// # Security
901    ///
902    /// This path is user-controlled. The file is created with mode `0o600` on Unix.
903    /// Do not place it in world-writable directories.
904    #[serde(default)]
905    pub state_path: Option<String>,
906
907    /// MAR (Memory-Augmented Routing) confidence threshold.
908    ///
909    /// When the top-1 semantic recall score for the current query is >= this value,
910    /// the bandit biases toward cheaper providers (the answer is likely in memory).
911    /// Set to 1.0 to disable MAR. Default: 0.9.
912    #[serde(default = "default_bandit_memory_confidence_threshold")]
913    pub memory_confidence_threshold: f32,
914
915    /// Minimum number of queries before `LinUCB` takes over from Thompson warmup.
916    ///
917    /// When unset or `0`, defaults to `10 × number of providers` (computed at startup).
918    /// Set explicitly to control how long the bandit explores uniformly before
919    /// switching to context-aware routing. Setting `0` preserves the computed default.
920    #[serde(default)]
921    pub warmup_queries: Option<u64>,
922}
923
924fn default_bandit_memory_confidence_threshold() -> f32 {
925    0.9
926}
927
928impl Default for BanditConfig {
929    fn default() -> Self {
930        Self {
931            alpha: default_bandit_alpha(),
932            dim: default_bandit_dim(),
933            cost_weight: default_bandit_cost_weight(),
934            decay_factor: default_bandit_decay_factor(),
935            embedding_provider: ProviderName::default(),
936            embedding_timeout_ms: default_bandit_embedding_timeout_ms(),
937            cache_size: default_bandit_cache_size(),
938            state_path: None,
939            memory_confidence_threshold: default_bandit_memory_confidence_threshold(),
940            warmup_queries: None,
941        }
942    }
943}
944
945#[derive(Debug, Deserialize, Serialize)]
946pub struct CandleConfig {
947    #[serde(default = "default_candle_source")]
948    pub source: String,
949    #[serde(default)]
950    pub local_path: String,
951    #[serde(default)]
952    pub filename: Option<String>,
953    #[serde(default = "default_chat_template")]
954    pub chat_template: String,
955    #[serde(default = "default_candle_device")]
956    pub device: String,
957    #[serde(default)]
958    pub embedding_repo: Option<String>,
959    /// Resolved `HuggingFace` Hub API token for authenticated model downloads.
960    ///
961    /// Must be the **token value** — resolved by the caller before constructing this config.
962    #[serde(default)]
963    pub hf_token: Option<String>,
964    #[serde(default)]
965    pub generation: GenerationParams,
966    /// Maximum seconds to wait for each half of a single inference request.
967    ///
968    /// The timeout is applied **twice** per `chat()` call: once for the channel send
969    /// (waiting for a free slot) and once for the oneshot reply (waiting for the worker
970    /// to finish). The effective maximum wall-clock wait per request is therefore
971    /// `2 × inference_timeout_secs`. CPU inference can be slow; 120s is a conservative
972    /// default for large models, giving up to 240s total before an error is returned.
973    /// Values of 0 are silently promoted to 1 at bootstrap.
974    #[serde(default = "default_inference_timeout_secs")]
975    pub inference_timeout_secs: u64,
976}
977
978fn default_inference_timeout_secs() -> u64 {
979    120
980}
981
982/// Sampling / generation parameters for Candle local inference.
983///
984/// Used inside `[llm.candle.generation]` or a `[[llm.providers]]` Candle entry.
985#[derive(Debug, Clone, Deserialize, Serialize)]
986pub struct GenerationParams {
987    /// Sampling temperature. Higher values produce more creative outputs. Default: `0.7`.
988    #[serde(default = "default_temperature")]
989    pub temperature: f64,
990    /// Nucleus sampling threshold. When set, tokens with cumulative probability above
991    /// this value are excluded. Default: `None` (disabled).
992    #[serde(default)]
993    pub top_p: Option<f64>,
994    /// Top-k sampling. When set, only the top-k most probable tokens are considered.
995    /// Default: `None` (disabled).
996    #[serde(default)]
997    pub top_k: Option<usize>,
998    /// Maximum number of tokens to generate per response. Capped at [`MAX_TOKENS_CAP`].
999    /// Default: `2048`.
1000    #[serde(default = "default_max_tokens")]
1001    pub max_tokens: usize,
1002    /// Random seed for reproducible outputs. Default: `42`.
1003    #[serde(default = "default_seed")]
1004    pub seed: u64,
1005    /// Repetition penalty applied during sampling. Default: `1.1`.
1006    #[serde(default = "default_repeat_penalty")]
1007    pub repeat_penalty: f32,
1008    /// Number of last tokens to consider for the repetition penalty window. Default: `64`.
1009    #[serde(default = "default_repeat_last_n")]
1010    pub repeat_last_n: usize,
1011}
1012
1013/// Hard upper bound on `GenerationParams::max_tokens` to prevent unbounded generation.
1014pub const MAX_TOKENS_CAP: usize = 32768;
1015
1016impl GenerationParams {
1017    /// Returns `max_tokens` clamped to [`MAX_TOKENS_CAP`].
1018    ///
1019    /// # Examples
1020    ///
1021    /// ```
1022    /// use zeph_config::GenerationParams;
1023    ///
1024    /// let params = GenerationParams::default();
1025    /// assert!(params.capped_max_tokens() <= 32768);
1026    /// ```
1027    #[must_use]
1028    pub fn capped_max_tokens(&self) -> usize {
1029        self.max_tokens.min(MAX_TOKENS_CAP)
1030    }
1031}
1032
1033impl Default for GenerationParams {
1034    fn default() -> Self {
1035        Self {
1036            temperature: default_temperature(),
1037            top_p: None,
1038            top_k: None,
1039            max_tokens: default_max_tokens(),
1040            seed: default_seed(),
1041            repeat_penalty: default_repeat_penalty(),
1042            repeat_last_n: default_repeat_last_n(),
1043        }
1044    }
1045}
1046
1047// ─── Unified config types ─────────────────────────────────────────────────────
1048
1049/// Routing strategy for the `[[llm.providers]]` pool.
1050#[non_exhaustive]
1051#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
1052#[serde(rename_all = "lowercase")]
1053pub enum LlmRoutingStrategy {
1054    /// Single provider or first-in-pool (default).
1055    #[default]
1056    None,
1057    /// Exponential moving average latency-aware ordering.
1058    Ema,
1059    /// Thompson Sampling with Beta distributions.
1060    Thompson,
1061    /// Cascade: try cheapest provider first, escalate on degenerate output.
1062    Cascade,
1063    /// Complexity triage routing: pre-classify each request, delegate to appropriate tier.
1064    Triage,
1065    /// PILOT: `LinUCB` contextual bandit with online learning and budget-aware reward.
1066    Bandit,
1067}
1068
1069fn default_triage_timeout_secs() -> u64 {
1070    5
1071}
1072
1073fn default_max_triage_tokens() -> u32 {
1074    50
1075}
1076
1077fn default_true() -> bool {
1078    true
1079}
1080
1081#[allow(clippy::trivially_copy_pass_by_ref)]
1082fn is_true(v: &bool) -> bool {
1083    *v
1084}
1085
1086/// Tier-to-provider name mapping for complexity routing.
1087#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1088pub struct TierMapping {
1089    pub simple: Option<String>,
1090    pub medium: Option<String>,
1091    pub complex: Option<String>,
1092    pub expert: Option<String>,
1093}
1094
1095/// Configuration for complexity-based triage routing (`routing = "triage"`).
1096///
1097/// When `[llm] routing = "triage"` is set, a cheap triage model classifies each request
1098/// and routes it to the appropriate tier provider. Requires at least one tier mapping.
1099///
1100/// # Example
1101///
1102/// ```toml
1103/// [llm]
1104/// routing = "triage"
1105///
1106/// [llm.complexity_routing]
1107/// triage_provider = "local-fast"
1108///
1109/// [llm.complexity_routing.tiers]
1110/// simple = "local-fast"
1111/// medium = "haiku"
1112/// complex = "sonnet"
1113/// expert = "opus"
1114/// ```
1115#[derive(Debug, Clone, Deserialize, Serialize)]
1116pub struct ComplexityRoutingConfig {
1117    /// Provider name from `[[llm.providers]]` used for triage classification.
1118    #[serde(default)]
1119    pub triage_provider: Option<ProviderName>,
1120
1121    /// Skip triage when all tiers map to the same provider.
1122    #[serde(default = "default_true")]
1123    pub bypass_single_provider: bool,
1124
1125    /// Tier-to-provider name mapping.
1126    #[serde(default)]
1127    pub tiers: TierMapping,
1128
1129    /// Max output tokens for the triage classification call. Default: 50.
1130    #[serde(default = "default_max_triage_tokens")]
1131    pub max_triage_tokens: u32,
1132
1133    /// Timeout in seconds for the triage classification call. Default: 5.
1134    /// On timeout, falls back to the default (first) tier provider.
1135    #[serde(default = "default_triage_timeout_secs")]
1136    pub triage_timeout_secs: u64,
1137
1138    /// Optional fallback strategy when triage misclassifies.
1139    /// Only `"cascade"` is currently supported (Phase 4).
1140    #[serde(default)]
1141    pub fallback_strategy: Option<String>,
1142}
1143
1144impl Default for ComplexityRoutingConfig {
1145    fn default() -> Self {
1146        Self {
1147            triage_provider: None,
1148            bypass_single_provider: true,
1149            tiers: TierMapping::default(),
1150            max_triage_tokens: default_max_triage_tokens(),
1151            triage_timeout_secs: default_triage_timeout_secs(),
1152            fallback_strategy: None,
1153        }
1154    }
1155}
1156
1157/// Configuration for the Collaborative Entropy (`CoE`) subsystem (`[llm.coe]` TOML section).
1158///
1159/// `CoE` detects uncertain responses from the primary provider and escalates to a
1160/// secondary provider when either the intra-entropy or inter-divergence signal crosses
1161/// its threshold. Only active for `RouterStrategy::Ema` and `RouterStrategy::Thompson`.
1162///
1163/// # Example
1164///
1165/// ```toml
1166/// [llm.coe]
1167/// enabled = true
1168/// intra_threshold = 0.8
1169/// inter_threshold = 0.20
1170/// shadow_sample_rate = 0.1
1171/// secondary_provider = "quality"
1172/// embedding_provider = ""
1173/// ```
1174#[derive(Debug, Clone, Deserialize, Serialize)]
1175#[serde(default)]
1176pub struct CoeConfig {
1177    /// Enable `CoE`. When `false`, the struct is ignored.
1178    pub enabled: bool,
1179    /// Mean negative log-prob threshold; responses above this trigger intra escalation.
1180    pub intra_threshold: f64,
1181    /// Divergence threshold in `[0.0, 1.0]`.
1182    pub inter_threshold: f64,
1183    /// Baseline rate at which secondary is called even when intra is low.
1184    pub shadow_sample_rate: f64,
1185    /// Provider name from `[[llm.providers]]` used as the escalation target.
1186    pub secondary_provider: ProviderName,
1187    /// Provider name for inter-divergence embeddings. Empty → inherit bandit's embedding provider.
1188    pub embedding_provider: ProviderName,
1189}
1190
1191impl Default for CoeConfig {
1192    fn default() -> Self {
1193        Self {
1194            enabled: false,
1195            intra_threshold: 0.8,
1196            inter_threshold: 0.20,
1197            shadow_sample_rate: 0.1,
1198            secondary_provider: ProviderName::default(),
1199            embedding_provider: ProviderName::default(),
1200        }
1201    }
1202}
1203
1204/// A single Gonka network node endpoint.
1205///
1206/// Used in `[[llm.providers]]` entries with `type = "gonka"` to declare
1207/// the node pool for blockchain inference routing.
1208#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
1209pub struct GonkaNode {
1210    /// HTTP(S) URL of the Gonka node (e.g. `"https://node1.gonka.ai"`).
1211    pub url: String,
1212    /// On-chain bech32 address of this node (e.g. `"gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"`).
1213    ///
1214    /// Required for signature construction: every signed request binds to the target node's
1215    /// on-chain address, making signatures non-replayable across different nodes.
1216    pub address: String,
1217    /// Optional human-readable label for `zeph gonka doctor` output.
1218    #[serde(default, skip_serializing_if = "Option::is_none")]
1219    pub name: Option<String>,
1220}
1221
1222/// Inline candle config for use inside `ProviderEntry`.
1223/// Re-uses the generation params from `CandleConfig`.
1224#[derive(Debug, Clone, Deserialize, Serialize)]
1225pub struct CandleInlineConfig {
1226    #[serde(default = "default_candle_source")]
1227    pub source: String,
1228    #[serde(default)]
1229    pub local_path: String,
1230    #[serde(default)]
1231    pub filename: Option<String>,
1232    #[serde(default = "default_chat_template")]
1233    pub chat_template: String,
1234    #[serde(default = "default_candle_device")]
1235    pub device: String,
1236    #[serde(default)]
1237    pub embedding_repo: Option<String>,
1238    /// Resolved `HuggingFace` Hub API token for authenticated model downloads.
1239    #[serde(default)]
1240    pub hf_token: Option<String>,
1241    #[serde(default)]
1242    pub generation: GenerationParams,
1243    /// Maximum wall-clock seconds to wait for a single inference request.
1244    ///
1245    /// Effective timeout is `2 × inference_timeout_secs` (send + recv each have this budget).
1246    /// CPU inference can be slow; 120s is a conservative default. Floored at 1s.
1247    #[serde(default = "default_inference_timeout_secs")]
1248    pub inference_timeout_secs: u64,
1249}
1250
1251impl Default for CandleInlineConfig {
1252    fn default() -> Self {
1253        Self {
1254            source: default_candle_source(),
1255            local_path: String::new(),
1256            filename: None,
1257            chat_template: default_chat_template(),
1258            device: default_candle_device(),
1259            embedding_repo: None,
1260            hf_token: None,
1261            generation: GenerationParams::default(),
1262            inference_timeout_secs: default_inference_timeout_secs(),
1263        }
1264    }
1265}
1266
1267/// Per-1K-token pricing for a Cocoon provider, in cents.
1268///
1269/// Cocoon model names (e.g. `Qwen/Qwen3-0.6B`) are not in the built-in pricing table.
1270/// When this struct is present in a provider entry, its values are registered with
1271/// `CostTracker` at startup so that token costs are tracked accurately.
1272///
1273/// Reasoning tokens (when the model uses chain-of-thought) are folded into
1274/// `completion_tokens` by the Cocoon sidecar and counted at the completion price.
1275#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
1276pub struct CocoonPricing {
1277    /// Prompt (input) token price in cents per 1K tokens.
1278    #[serde(default)]
1279    pub prompt_cents_per_1k: f64,
1280    /// Completion (output) token price in cents per 1K tokens.
1281    /// Reasoning tokens are counted here since the sidecar folds them into completion tokens.
1282    #[serde(default)]
1283    pub completion_cents_per_1k: f64,
1284}
1285
1286/// Unified provider entry: one struct replaces `CloudLlmConfig`, `OpenAiConfig`,
1287/// `GeminiConfig`, `OllamaConfig`, `CompatibleConfig`, and `OrchestratorProviderConfig`.
1288///
1289/// Provider-specific fields use `#[serde(default)]` and are ignored by backends
1290/// that do not use them (flat-union pattern).
1291#[derive(Debug, Clone, Deserialize, Serialize)]
1292#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
1293pub struct ProviderEntry {
1294    /// Required: provider backend type.
1295    #[serde(rename = "type")]
1296    pub provider_type: ProviderKind,
1297
1298    /// Optional name for multi-provider configs. Auto-generated from type if absent.
1299    #[serde(default)]
1300    pub name: Option<String>,
1301
1302    /// Model identifier. Required for most types.
1303    #[serde(default)]
1304    pub model: Option<String>,
1305
1306    /// API base URL. Each type has its own default.
1307    #[serde(default)]
1308    pub base_url: Option<String>,
1309
1310    /// Max output tokens.
1311    #[serde(default)]
1312    pub max_tokens: Option<u32>,
1313
1314    /// Embedding model. When set, this provider supports `embed()` calls.
1315    #[serde(default)]
1316    pub embedding_model: Option<String>,
1317
1318    /// STT model. When set, this provider supports speech-to-text via the Whisper API or
1319    /// Candle-local inference.
1320    #[serde(default)]
1321    pub stt_model: Option<String>,
1322
1323    /// Mark this entry as the embedding provider (handles `embed()` calls).
1324    #[serde(default)]
1325    pub embed: bool,
1326
1327    /// Mark this entry as the default chat provider (overrides position-based default).
1328    #[serde(default)]
1329    pub default: bool,
1330
1331    // --- Claude-specific ---
1332    #[serde(default)]
1333    pub thinking: Option<ThinkingConfig>,
1334    #[serde(default)]
1335    pub server_compaction: bool,
1336    #[serde(default)]
1337    pub enable_extended_context: bool,
1338    /// Prompt cache TTL variant. `None` keeps the default ~5-minute ephemeral TTL.
1339    /// Set to `"1h"` to enable the extended 1-hour TTL (beta, ~2× write cost).
1340    #[serde(default)]
1341    pub prompt_cache_ttl: Option<CacheTtl>,
1342
1343    // --- OpenAI-specific ---
1344    #[serde(default)]
1345    pub reasoning_effort: Option<String>,
1346
1347    // --- Gemini-specific ---
1348    #[serde(default)]
1349    pub thinking_level: Option<GeminiThinkingLevel>,
1350    #[serde(default)]
1351    pub thinking_budget: Option<i32>,
1352    #[serde(default)]
1353    pub include_thoughts: Option<bool>,
1354
1355    // --- Compatible-specific: optional inline api_key ---
1356    #[serde(default)]
1357    pub api_key: Option<String>,
1358
1359    // --- Candle-specific ---
1360    #[serde(default)]
1361    pub candle: Option<CandleInlineConfig>,
1362
1363    // --- Vision ---
1364    #[serde(default)]
1365    pub vision_model: Option<String>,
1366
1367    // --- Gonka-specific ---
1368    /// Gonka network node pool. Required (non-empty) when `type = "gonka"`.
1369    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1370    pub gonka_nodes: Vec<GonkaNode>,
1371    /// bech32 chain prefix for address encoding. Defaults to `"gonka"` when omitted.
1372    #[serde(default, skip_serializing_if = "Option::is_none")]
1373    pub gonka_chain_prefix: Option<String>,
1374
1375    // --- Cocoon-specific ---
1376    /// Cocoon sidecar HTTP URL. Defaults to `"http://localhost:10000"` when absent.
1377    #[serde(default, skip_serializing_if = "Option::is_none")]
1378    pub cocoon_client_url: Option<String>,
1379    /// Sentinel field for access hash. Leave empty in config; actual value
1380    /// is resolved from the age vault as `ZEPH_COCOON_ACCESS_HASH`.
1381    #[serde(default, skip_serializing_if = "Option::is_none")]
1382    pub cocoon_access_hash: Option<String>,
1383    /// Whether to perform a health check against `/stats` at provider construction time.
1384    #[serde(default = "default_true", skip_serializing_if = "is_true")]
1385    pub cocoon_health_check: bool,
1386    /// Manual per-1K-token pricing for this Cocoon provider.
1387    ///
1388    /// Cocoon model names (e.g. `Qwen/Qwen3-0.6B`) are not in the built-in pricing table.
1389    /// When this section is present, the values are registered with `CostTracker` at startup
1390    /// so that token costs are tracked accurately.
1391    ///
1392    /// Example TOML:
1393    /// ```toml
1394    /// [llm.providers.cocoon_pricing]
1395    /// prompt_cents_per_1k = 0.01
1396    /// completion_cents_per_1k = 0.03
1397    /// ```
1398    #[serde(default, skip_serializing_if = "Option::is_none")]
1399    pub cocoon_pricing: Option<CocoonPricing>,
1400
1401    /// Provider-specific instruction file.
1402    #[serde(default)]
1403    pub instruction_file: Option<std::path::PathBuf>,
1404
1405    /// Maximum concurrent LLM calls from orchestrated sub-agents to this provider.
1406    ///
1407    /// When set, `DagScheduler` acquires a semaphore permit before dispatching a
1408    /// sub-agent that targets this provider. Dispatch is deferred (using the existing
1409    /// `deferral_backoff` mechanism) when the semaphore is saturated.
1410    ///
1411    /// `None` (default) = unlimited — no admission control applied.
1412    ///
1413    /// # Example (TOML)
1414    ///
1415    /// ```toml
1416    /// [[llm.providers]]
1417    /// name = "quality"
1418    /// type = "openai"
1419    /// model = "gpt-5"
1420    /// max_concurrent = 3
1421    /// ```
1422    #[serde(default, skip_serializing_if = "Option::is_none")]
1423    pub max_concurrent: Option<u32>,
1424}
1425
1426impl Default for ProviderEntry {
1427    fn default() -> Self {
1428        Self {
1429            provider_type: ProviderKind::Ollama,
1430            name: None,
1431            model: None,
1432            base_url: None,
1433            max_tokens: None,
1434            embedding_model: None,
1435            stt_model: None,
1436            embed: false,
1437            default: false,
1438            thinking: None,
1439            server_compaction: false,
1440            enable_extended_context: false,
1441            prompt_cache_ttl: None,
1442            reasoning_effort: None,
1443            thinking_level: None,
1444            thinking_budget: None,
1445            include_thoughts: None,
1446            api_key: None,
1447            candle: None,
1448            vision_model: None,
1449            gonka_nodes: Vec::new(),
1450            gonka_chain_prefix: None,
1451            cocoon_client_url: None,
1452            cocoon_access_hash: None,
1453            cocoon_health_check: true,
1454            cocoon_pricing: None,
1455            instruction_file: None,
1456            max_concurrent: None,
1457        }
1458    }
1459}
1460
1461impl ProviderEntry {
1462    /// Resolve the effective name: explicit `name` field or type string.
1463    #[must_use]
1464    pub fn effective_name(&self) -> String {
1465        self.name
1466            .clone()
1467            .unwrap_or_else(|| self.provider_type.as_str().to_owned())
1468    }
1469
1470    /// Resolve the effective model: explicit `model` field or the provider-type default.
1471    ///
1472    /// Defaults mirror those used in `build_provider_from_entry` so that `runtime.model_name`
1473    /// always reflects the actual model being used rather than the provider type string.
1474    #[must_use]
1475    pub fn effective_model(&self) -> String {
1476        if let Some(ref m) = self.model {
1477            return m.clone();
1478        }
1479        match self.provider_type {
1480            ProviderKind::Ollama => "qwen3:8b".to_owned(),
1481            ProviderKind::Claude => "claude-haiku-4-5-20251001".to_owned(),
1482            ProviderKind::OpenAi => "gpt-4o-mini".to_owned(),
1483            ProviderKind::Gemini => "gemini-2.0-flash".to_owned(),
1484            // Compatible/Candle return empty because the model is resolved elsewhere.
1485            // Gonka returns empty because it is a blockchain provider, not an LLM — there is no model concept.
1486            ProviderKind::Compatible | ProviderKind::Candle | ProviderKind::Gonka => String::new(),
1487            ProviderKind::Cocoon => "Qwen/Qwen3-0.6B".to_owned(),
1488        }
1489    }
1490
1491    /// Validate this entry for cross-field consistency.
1492    ///
1493    /// # Errors
1494    ///
1495    /// Returns `ConfigError` when a fatal invariant is violated (e.g. compatible provider
1496    /// without a name).
1497    pub fn validate(&self) -> Result<(), crate::error::ConfigError> {
1498        use crate::error::ConfigError;
1499
1500        // B2: compatible provider MUST have name set.
1501        if self.provider_type == ProviderKind::Compatible && self.name.is_none() {
1502            return Err(ConfigError::Validation(
1503                "[[llm.providers]] entry with type=\"compatible\" must set `name`".into(),
1504            ));
1505        }
1506
1507        // B3: gonka provider MUST have name and valid gonka_nodes.
1508        if self.provider_type == ProviderKind::Gonka {
1509            if self.name.is_none() {
1510                return Err(ConfigError::Validation(
1511                    "[[llm.providers]] entry with type=\"gonka\" must set `name`".into(),
1512                ));
1513            }
1514            self.validate_gonka_nodes()?;
1515        }
1516
1517        // B4: cocoon provider MUST have a name.
1518        if self.provider_type == ProviderKind::Cocoon
1519            && self.name.as_ref().is_none_or(String::is_empty)
1520        {
1521            return Err(ConfigError::Validation(
1522                "[[llm.providers]] entry with type=\"cocoon\" must set `name`".into(),
1523            ));
1524        }
1525
1526        // B5: cocoon URL must be valid http/https; cocoon model must not be empty.
1527        if self.provider_type == ProviderKind::Cocoon {
1528            let name = self.effective_name();
1529            if let Some(ref url_str) = self.cocoon_client_url {
1530                match url::Url::parse(url_str) {
1531                    Err(_) => {
1532                        return Err(ConfigError::Validation(format!(
1533                            "[[llm.providers]] entry '{name}': cocoon_client_url \
1534                             '{url_str}' is not a valid URL; expected format: \
1535                             http://localhost:10000"
1536                        )));
1537                    }
1538                    Ok(u) if !matches!(u.host_str(), Some("localhost" | "127.0.0.1" | "::1")) => {
1539                        return Err(ConfigError::Validation(format!(
1540                            "[[llm.providers]] entry '{name}': cocoon_client_url host must be \
1541                             localhost or 127.0.0.1, got '{}'",
1542                            u.host_str().unwrap_or("<none>")
1543                        )));
1544                    }
1545                    Ok(u) if u.scheme() != "http" && u.scheme() != "https" => {
1546                        return Err(ConfigError::Validation(format!(
1547                            "[[llm.providers]] entry '{name}': cocoon_client_url \
1548                             scheme must be http or https, got '{}'",
1549                            u.scheme()
1550                        )));
1551                    }
1552                    _ => {}
1553                }
1554            }
1555            if self.model.as_deref().is_some_and(|m| m.trim().is_empty()) {
1556                return Err(ConfigError::Validation(format!(
1557                    "[[llm.providers]] entry '{name}': model must not be empty \
1558                     for cocoon provider"
1559                )));
1560            }
1561            if let Some(ref p) = self.cocoon_pricing {
1562                if !p.prompt_cents_per_1k.is_finite() || p.prompt_cents_per_1k < 0.0 {
1563                    return Err(ConfigError::Validation(format!(
1564                        "[[llm.providers]] entry '{name}': cocoon_pricing.prompt_cents_per_1k \
1565                         must be a finite non-negative number"
1566                    )));
1567                }
1568                if !p.completion_cents_per_1k.is_finite() || p.completion_cents_per_1k < 0.0 {
1569                    return Err(ConfigError::Validation(format!(
1570                        "[[llm.providers]] entry '{name}': \
1571                         cocoon_pricing.completion_cents_per_1k \
1572                         must be a finite non-negative number"
1573                    )));
1574                }
1575            }
1576        }
1577
1578        // B1: warn on irrelevant fields.
1579        self.warn_irrelevant_fields();
1580
1581        // W6: Candle STT-only provider (stt_model set, no model) is valid — no warning needed.
1582        // Warn if Ollama has stt_model set (Ollama does not support Whisper API).
1583        if self.stt_model.is_some() && self.provider_type == ProviderKind::Ollama {
1584            tracing::warn!(
1585                provider = self.effective_name(),
1586                "field `stt_model` is set on an Ollama provider; Ollama does not support the \
1587                 Whisper STT API — use OpenAI, compatible, or candle instead"
1588            );
1589        }
1590
1591        Ok(())
1592    }
1593
1594    /// Resolve the effective Gonka chain prefix: explicit value or `"gonka"` default.
1595    #[must_use]
1596    pub fn effective_gonka_chain_prefix(&self) -> &str {
1597        self.gonka_chain_prefix.as_deref().unwrap_or("gonka")
1598    }
1599
1600    fn warn_irrelevant_fields(&self) {
1601        let name = self.effective_name();
1602        match self.provider_type {
1603            ProviderKind::Ollama => {
1604                if self.thinking.is_some() {
1605                    tracing::warn!(
1606                        provider = name,
1607                        "field `thinking` is only used by Claude providers"
1608                    );
1609                }
1610                if self.reasoning_effort.is_some() {
1611                    tracing::warn!(
1612                        provider = name,
1613                        "field `reasoning_effort` is only used by OpenAI providers"
1614                    );
1615                }
1616                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1617                    tracing::warn!(
1618                        provider = name,
1619                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1620                    );
1621                }
1622            }
1623            ProviderKind::Claude => {
1624                if self.reasoning_effort.is_some() {
1625                    tracing::warn!(
1626                        provider = name,
1627                        "field `reasoning_effort` is only used by OpenAI providers"
1628                    );
1629                }
1630                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1631                    tracing::warn!(
1632                        provider = name,
1633                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1634                    );
1635                }
1636            }
1637            ProviderKind::OpenAi => {
1638                if self.thinking.is_some() {
1639                    tracing::warn!(
1640                        provider = name,
1641                        "field `thinking` is only used by Claude providers"
1642                    );
1643                }
1644                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1645                    tracing::warn!(
1646                        provider = name,
1647                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1648                    );
1649                }
1650            }
1651            ProviderKind::Gemini => {
1652                if self.thinking.is_some() {
1653                    tracing::warn!(
1654                        provider = name,
1655                        "field `thinking` is only used by Claude providers"
1656                    );
1657                }
1658                if self.reasoning_effort.is_some() {
1659                    tracing::warn!(
1660                        provider = name,
1661                        "field `reasoning_effort` is only used by OpenAI providers"
1662                    );
1663                }
1664            }
1665            ProviderKind::Gonka => {
1666                if self.thinking.is_some() {
1667                    tracing::warn!(
1668                        provider = name,
1669                        "field `thinking` is only used by Claude providers"
1670                    );
1671                }
1672                if self.reasoning_effort.is_some() {
1673                    tracing::warn!(
1674                        provider = name,
1675                        "field `reasoning_effort` is only used by OpenAI providers"
1676                    );
1677                }
1678                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1679                    tracing::warn!(
1680                        provider = name,
1681                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1682                    );
1683                }
1684            }
1685            ProviderKind::Compatible | ProviderKind::Candle => {}
1686            ProviderKind::Cocoon => {
1687                if self.base_url.is_some() {
1688                    tracing::warn!(
1689                        provider = name,
1690                        "field `base_url` is ignored for cocoon providers; use `cocoon_client_url` instead"
1691                    );
1692                }
1693            }
1694        }
1695    }
1696
1697    fn validate_gonka_nodes(&self) -> Result<(), crate::error::ConfigError> {
1698        use crate::error::ConfigError;
1699        if self.gonka_nodes.is_empty() {
1700            return Err(ConfigError::Validation(format!(
1701                "[[llm.providers]] entry '{}' with type=\"gonka\" must set non-empty `gonka_nodes`",
1702                self.effective_name()
1703            )));
1704        }
1705        for (i, node) in self.gonka_nodes.iter().enumerate() {
1706            if node.url.is_empty() {
1707                return Err(ConfigError::Validation(format!(
1708                    "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must not be empty",
1709                    self.effective_name()
1710                )));
1711            }
1712            if !node.url.starts_with("http://") && !node.url.starts_with("https://") {
1713                return Err(ConfigError::Validation(format!(
1714                    "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must start with http:// or https://",
1715                    self.effective_name()
1716                )));
1717            }
1718        }
1719        Ok(())
1720    }
1721}
1722
1723/// Validate a pool of `ProviderEntry` items.
1724///
1725/// # Errors
1726///
1727/// Returns `ConfigError` for fatal validation failures:
1728/// - Empty pool
1729/// - Duplicate names
1730/// - Multiple entries marked `default = true`
1731/// - Individual entry validation errors
1732pub fn validate_pool(entries: &[ProviderEntry]) -> Result<(), crate::error::ConfigError> {
1733    use crate::error::ConfigError;
1734    use std::collections::HashSet;
1735
1736    if entries.is_empty() {
1737        return Err(ConfigError::Validation(
1738            "at least one LLM provider must be configured in [[llm.providers]]".into(),
1739        ));
1740    }
1741
1742    let default_count = entries.iter().filter(|e| e.default).count();
1743    if default_count > 1 {
1744        return Err(ConfigError::Validation(
1745            "only one [[llm.providers]] entry can be marked `default = true`".into(),
1746        ));
1747    }
1748
1749    let mut seen_names: HashSet<String> = HashSet::new();
1750    for entry in entries {
1751        let name = entry.effective_name();
1752        if !seen_names.insert(name.clone()) {
1753            return Err(ConfigError::Validation(format!(
1754                "duplicate provider name \"{name}\" in [[llm.providers]]"
1755            )));
1756        }
1757        entry.validate()?;
1758    }
1759
1760    Ok(())
1761}
1762
1763#[cfg(test)]
1764mod tests {
1765    use super::*;
1766
1767    fn ollama_entry() -> ProviderEntry {
1768        ProviderEntry {
1769            provider_type: ProviderKind::Ollama,
1770            name: Some("ollama".into()),
1771            model: Some("qwen3:8b".into()),
1772            ..Default::default()
1773        }
1774    }
1775
1776    fn claude_entry() -> ProviderEntry {
1777        ProviderEntry {
1778            provider_type: ProviderKind::Claude,
1779            name: Some("claude".into()),
1780            model: Some("claude-sonnet-4-6".into()),
1781            max_tokens: Some(8192),
1782            ..Default::default()
1783        }
1784    }
1785
1786    // ─── ProviderEntry::validate ─────────────────────────────────────────────
1787
1788    #[test]
1789    fn validate_ollama_valid() {
1790        assert!(ollama_entry().validate().is_ok());
1791    }
1792
1793    #[test]
1794    fn validate_claude_valid() {
1795        assert!(claude_entry().validate().is_ok());
1796    }
1797
1798    #[test]
1799    fn validate_compatible_without_name_errors() {
1800        let entry = ProviderEntry {
1801            provider_type: ProviderKind::Compatible,
1802            name: None,
1803            ..Default::default()
1804        };
1805        let err = entry.validate().unwrap_err();
1806        assert!(
1807            err.to_string().contains("compatible"),
1808            "error should mention compatible: {err}"
1809        );
1810    }
1811
1812    #[test]
1813    fn validate_compatible_with_name_ok() {
1814        let entry = ProviderEntry {
1815            provider_type: ProviderKind::Compatible,
1816            name: Some("my-proxy".into()),
1817            base_url: Some("http://localhost:8080".into()),
1818            model: Some("gpt-4o".into()),
1819            max_tokens: Some(4096),
1820            ..Default::default()
1821        };
1822        assert!(entry.validate().is_ok());
1823    }
1824
1825    #[test]
1826    fn validate_openai_valid() {
1827        let entry = ProviderEntry {
1828            provider_type: ProviderKind::OpenAi,
1829            name: Some("openai".into()),
1830            model: Some("gpt-4o".into()),
1831            max_tokens: Some(4096),
1832            ..Default::default()
1833        };
1834        assert!(entry.validate().is_ok());
1835    }
1836
1837    #[test]
1838    fn validate_gemini_valid() {
1839        let entry = ProviderEntry {
1840            provider_type: ProviderKind::Gemini,
1841            name: Some("gemini".into()),
1842            model: Some("gemini-2.0-flash".into()),
1843            ..Default::default()
1844        };
1845        assert!(entry.validate().is_ok());
1846    }
1847
1848    // ─── validate_pool ───────────────────────────────────────────────────────
1849
1850    #[test]
1851    fn validate_pool_empty_errors() {
1852        let err = validate_pool(&[]).unwrap_err();
1853        assert!(err.to_string().contains("at least one"), "{err}");
1854    }
1855
1856    #[test]
1857    fn validate_pool_single_entry_ok() {
1858        assert!(validate_pool(&[ollama_entry()]).is_ok());
1859    }
1860
1861    #[test]
1862    fn validate_pool_duplicate_names_errors() {
1863        let a = ollama_entry();
1864        let b = ollama_entry(); // same effective name "ollama"
1865        let err = validate_pool(&[a, b]).unwrap_err();
1866        assert!(err.to_string().contains("duplicate"), "{err}");
1867    }
1868
1869    #[test]
1870    fn validate_pool_multiple_defaults_errors() {
1871        let mut a = ollama_entry();
1872        let mut b = claude_entry();
1873        a.default = true;
1874        b.default = true;
1875        let err = validate_pool(&[a, b]).unwrap_err();
1876        assert!(err.to_string().contains("default"), "{err}");
1877    }
1878
1879    #[test]
1880    fn validate_pool_two_different_providers_ok() {
1881        assert!(validate_pool(&[ollama_entry(), claude_entry()]).is_ok());
1882    }
1883
1884    #[test]
1885    fn validate_pool_propagates_entry_error() {
1886        let bad = ProviderEntry {
1887            provider_type: ProviderKind::Compatible,
1888            name: None, // invalid: compatible without name
1889            ..Default::default()
1890        };
1891        assert!(validate_pool(&[bad]).is_err());
1892    }
1893
1894    // ─── ProviderEntry::effective_model ──────────────────────────────────────
1895
1896    #[test]
1897    fn effective_model_returns_explicit_when_set() {
1898        let entry = ProviderEntry {
1899            provider_type: ProviderKind::Claude,
1900            model: Some("claude-sonnet-4-6".into()),
1901            ..Default::default()
1902        };
1903        assert_eq!(entry.effective_model(), "claude-sonnet-4-6");
1904    }
1905
1906    #[test]
1907    fn effective_model_ollama_default_when_none() {
1908        let entry = ProviderEntry {
1909            provider_type: ProviderKind::Ollama,
1910            model: None,
1911            ..Default::default()
1912        };
1913        assert_eq!(entry.effective_model(), "qwen3:8b");
1914    }
1915
1916    #[test]
1917    fn effective_model_claude_default_when_none() {
1918        let entry = ProviderEntry {
1919            provider_type: ProviderKind::Claude,
1920            model: None,
1921            ..Default::default()
1922        };
1923        assert_eq!(entry.effective_model(), "claude-haiku-4-5-20251001");
1924    }
1925
1926    #[test]
1927    fn effective_model_openai_default_when_none() {
1928        let entry = ProviderEntry {
1929            provider_type: ProviderKind::OpenAi,
1930            model: None,
1931            ..Default::default()
1932        };
1933        assert_eq!(entry.effective_model(), "gpt-4o-mini");
1934    }
1935
1936    #[test]
1937    fn effective_model_gemini_default_when_none() {
1938        let entry = ProviderEntry {
1939            provider_type: ProviderKind::Gemini,
1940            model: None,
1941            ..Default::default()
1942        };
1943        assert_eq!(entry.effective_model(), "gemini-2.0-flash");
1944    }
1945
1946    // ─── LlmConfig::check_legacy_format ──────────────────────────────────────
1947
1948    // Parse a complete TOML snippet that includes the [llm] header.
1949    fn parse_llm(toml: &str) -> LlmConfig {
1950        #[derive(serde::Deserialize)]
1951        struct Wrapper {
1952            llm: LlmConfig,
1953        }
1954        toml::from_str::<Wrapper>(toml).unwrap().llm
1955    }
1956
1957    #[test]
1958    fn check_legacy_format_new_format_ok() {
1959        let cfg = parse_llm(
1960            r#"
1961[llm]
1962
1963[[llm.providers]]
1964type = "ollama"
1965model = "qwen3:8b"
1966"#,
1967        );
1968        assert!(cfg.check_legacy_format().is_ok());
1969    }
1970
1971    #[test]
1972    fn check_legacy_format_empty_providers_no_legacy_ok() {
1973        // No providers, no legacy fields — passes (empty [llm] is acceptable here)
1974        let cfg = parse_llm("[llm]\n");
1975        assert!(cfg.check_legacy_format().is_ok());
1976    }
1977
1978    // ─── LlmConfig::effective_* helpers ──────────────────────────────────────
1979
1980    #[test]
1981    fn effective_provider_falls_back_to_ollama_when_no_providers() {
1982        let cfg = parse_llm("[llm]\n");
1983        assert_eq!(cfg.effective_provider(), ProviderKind::Ollama);
1984    }
1985
1986    #[test]
1987    fn effective_provider_reads_from_providers_first() {
1988        let cfg = parse_llm(
1989            r#"
1990[llm]
1991
1992[[llm.providers]]
1993type = "claude"
1994model = "claude-sonnet-4-6"
1995"#,
1996        );
1997        assert_eq!(cfg.effective_provider(), ProviderKind::Claude);
1998    }
1999
2000    #[test]
2001    fn effective_model_reads_from_providers_first() {
2002        let cfg = parse_llm(
2003            r#"
2004[llm]
2005
2006[[llm.providers]]
2007type = "ollama"
2008model = "qwen3:8b"
2009"#,
2010        );
2011        assert_eq!(cfg.effective_model(), "qwen3:8b");
2012    }
2013
2014    #[test]
2015    fn effective_model_skips_embed_only_provider() {
2016        let cfg = parse_llm(
2017            r#"
2018[llm]
2019
2020[[llm.providers]]
2021type = "ollama"
2022model = "gemma4:26b"
2023embed = true
2024
2025[[llm.providers]]
2026type = "openai"
2027model = "gpt-4o-mini"
2028"#,
2029        );
2030        assert_eq!(cfg.effective_model(), "gpt-4o-mini");
2031    }
2032
2033    #[test]
2034    fn effective_base_url_default_when_absent() {
2035        let cfg = parse_llm("[llm]\n");
2036        assert_eq!(cfg.effective_base_url(), "http://localhost:11434");
2037    }
2038
2039    #[test]
2040    fn effective_base_url_from_providers_entry() {
2041        let cfg = parse_llm(
2042            r#"
2043[llm]
2044
2045[[llm.providers]]
2046type = "ollama"
2047base_url = "http://myhost:11434"
2048"#,
2049        );
2050        assert_eq!(cfg.effective_base_url(), "http://myhost:11434");
2051    }
2052
2053    // ─── ComplexityRoutingConfig / LlmRoutingStrategy::Triage TOML parsing ──
2054
2055    #[test]
2056    fn complexity_routing_defaults() {
2057        let cr = ComplexityRoutingConfig::default();
2058        assert!(
2059            cr.bypass_single_provider,
2060            "bypass_single_provider must default to true"
2061        );
2062        assert_eq!(cr.triage_timeout_secs, 5);
2063        assert_eq!(cr.max_triage_tokens, 50);
2064        assert!(cr.triage_provider.is_none());
2065        assert!(cr.tiers.simple.is_none());
2066    }
2067
2068    #[test]
2069    fn complexity_routing_toml_round_trip() {
2070        let cfg = parse_llm(
2071            r#"
2072[llm]
2073routing = "triage"
2074
2075[llm.complexity_routing]
2076triage_provider = "fast"
2077bypass_single_provider = false
2078triage_timeout_secs = 10
2079max_triage_tokens = 100
2080
2081[llm.complexity_routing.tiers]
2082simple = "fast"
2083medium = "medium"
2084complex = "large"
2085expert = "opus"
2086"#,
2087        );
2088        assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2089        let cr = cfg
2090            .complexity_routing
2091            .expect("complexity_routing must be present");
2092        assert_eq!(
2093            cr.triage_provider.as_ref().map(ProviderName::as_str),
2094            Some("fast")
2095        );
2096        assert!(!cr.bypass_single_provider);
2097        assert_eq!(cr.triage_timeout_secs, 10);
2098        assert_eq!(cr.max_triage_tokens, 100);
2099        assert_eq!(cr.tiers.simple.as_deref(), Some("fast"));
2100        assert_eq!(cr.tiers.medium.as_deref(), Some("medium"));
2101        assert_eq!(cr.tiers.complex.as_deref(), Some("large"));
2102        assert_eq!(cr.tiers.expert.as_deref(), Some("opus"));
2103    }
2104
2105    #[test]
2106    fn complexity_routing_partial_tiers_toml() {
2107        // Only simple + complex configured; medium and expert are None.
2108        let cfg = parse_llm(
2109            r#"
2110[llm]
2111routing = "triage"
2112
2113[llm.complexity_routing.tiers]
2114simple = "haiku"
2115complex = "sonnet"
2116"#,
2117        );
2118        let cr = cfg
2119            .complexity_routing
2120            .expect("complexity_routing must be present");
2121        assert_eq!(cr.tiers.simple.as_deref(), Some("haiku"));
2122        assert!(cr.tiers.medium.is_none());
2123        assert_eq!(cr.tiers.complex.as_deref(), Some("sonnet"));
2124        assert!(cr.tiers.expert.is_none());
2125        // Defaults still applied.
2126        assert!(cr.bypass_single_provider);
2127        assert_eq!(cr.triage_timeout_secs, 5);
2128    }
2129
2130    #[test]
2131    fn routing_strategy_triage_deserialized() {
2132        let cfg = parse_llm(
2133            r#"
2134[llm]
2135routing = "triage"
2136"#,
2137        );
2138        assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2139    }
2140
2141    // ─── stt_provider_entry ───────────────────────────────────────────────────
2142
2143    #[test]
2144    fn stt_provider_entry_by_name_match() {
2145        let cfg = parse_llm(
2146            r#"
2147[llm]
2148
2149[[llm.providers]]
2150type = "openai"
2151name = "quality"
2152model = "gpt-5.4"
2153stt_model = "gpt-4o-mini-transcribe"
2154
2155[llm.stt]
2156provider = "quality"
2157"#,
2158        );
2159        let entry = cfg.stt_provider_entry().expect("should find stt provider");
2160        assert_eq!(entry.effective_name(), "quality");
2161        assert_eq!(entry.stt_model.as_deref(), Some("gpt-4o-mini-transcribe"));
2162    }
2163
2164    #[test]
2165    fn stt_provider_entry_auto_detect_when_provider_empty() {
2166        let cfg = parse_llm(
2167            r#"
2168[llm]
2169
2170[[llm.providers]]
2171type = "openai"
2172name = "openai-stt"
2173stt_model = "whisper-1"
2174
2175[llm.stt]
2176provider = ""
2177"#,
2178        );
2179        let entry = cfg.stt_provider_entry().expect("should auto-detect");
2180        assert_eq!(entry.effective_name(), "openai-stt");
2181    }
2182
2183    #[test]
2184    fn stt_provider_entry_auto_detect_no_stt_section() {
2185        let cfg = parse_llm(
2186            r#"
2187[llm]
2188
2189[[llm.providers]]
2190type = "openai"
2191name = "openai-stt"
2192stt_model = "whisper-1"
2193"#,
2194        );
2195        // No [llm.stt] section — should still find first provider with stt_model.
2196        let entry = cfg.stt_provider_entry().expect("should auto-detect");
2197        assert_eq!(entry.effective_name(), "openai-stt");
2198    }
2199
2200    #[test]
2201    fn stt_provider_entry_none_when_no_stt_model() {
2202        let cfg = parse_llm(
2203            r#"
2204[llm]
2205
2206[[llm.providers]]
2207type = "openai"
2208name = "quality"
2209model = "gpt-5.4"
2210"#,
2211        );
2212        assert!(cfg.stt_provider_entry().is_none());
2213    }
2214
2215    #[test]
2216    fn stt_provider_entry_name_mismatch_falls_back_to_none() {
2217        // Named provider exists but has no stt_model; another unnamed has stt_model.
2218        let cfg = parse_llm(
2219            r#"
2220[llm]
2221
2222[[llm.providers]]
2223type = "openai"
2224name = "quality"
2225model = "gpt-5.4"
2226
2227[[llm.providers]]
2228type = "openai"
2229name = "openai-stt"
2230stt_model = "whisper-1"
2231
2232[llm.stt]
2233provider = "quality"
2234"#,
2235        );
2236        // "quality" has no stt_model — returns None for name-based lookup.
2237        assert!(cfg.stt_provider_entry().is_none());
2238    }
2239
2240    #[test]
2241    fn stt_config_deserializes_new_slim_format() {
2242        let cfg = parse_llm(
2243            r#"
2244[llm]
2245
2246[[llm.providers]]
2247type = "openai"
2248name = "quality"
2249stt_model = "whisper-1"
2250
2251[llm.stt]
2252provider = "quality"
2253language = "en"
2254"#,
2255        );
2256        let stt = cfg.stt.as_ref().expect("stt section present");
2257        assert_eq!(stt.provider, "quality");
2258        assert_eq!(stt.language, "en");
2259    }
2260
2261    #[test]
2262    fn stt_config_default_provider_is_empty() {
2263        // Verify that W4 fix: default_stt_provider() returns "" not "whisper".
2264        assert_eq!(default_stt_provider(), "");
2265    }
2266
2267    #[test]
2268    fn validate_stt_missing_provider_ok() {
2269        let cfg = parse_llm("[llm]\n");
2270        assert!(cfg.validate_stt().is_ok());
2271    }
2272
2273    #[test]
2274    fn validate_stt_valid_reference() {
2275        let cfg = parse_llm(
2276            r#"
2277[llm]
2278
2279[[llm.providers]]
2280type = "openai"
2281name = "quality"
2282stt_model = "whisper-1"
2283
2284[llm.stt]
2285provider = "quality"
2286"#,
2287        );
2288        assert!(cfg.validate_stt().is_ok());
2289    }
2290
2291    #[test]
2292    fn validate_stt_nonexistent_provider_errors() {
2293        let cfg = parse_llm(
2294            r#"
2295[llm]
2296
2297[[llm.providers]]
2298type = "openai"
2299name = "quality"
2300model = "gpt-5.4"
2301
2302[llm.stt]
2303provider = "nonexistent"
2304"#,
2305        );
2306        assert!(cfg.validate_stt().is_err());
2307    }
2308
2309    #[test]
2310    fn validate_stt_provider_exists_but_no_stt_model_returns_ok_with_warn() {
2311        // MEDIUM: provider is found but has no stt_model — should return Ok (warn path, not error).
2312        let cfg = parse_llm(
2313            r#"
2314[llm]
2315
2316[[llm.providers]]
2317type = "openai"
2318name = "quality"
2319model = "gpt-5.4"
2320
2321[llm.stt]
2322provider = "quality"
2323"#,
2324        );
2325        // validate_stt must succeed (only a tracing::warn is emitted — not an error).
2326        assert!(cfg.validate_stt().is_ok());
2327        // stt_provider_entry must return None because no stt_model is set.
2328        assert!(
2329            cfg.stt_provider_entry().is_none(),
2330            "stt_provider_entry must be None when provider has no stt_model"
2331        );
2332    }
2333
2334    // ─── BanditConfig::warmup_queries deserialization ─────────────────────────
2335
2336    #[test]
2337    fn bandit_warmup_queries_explicit_value_is_deserialized() {
2338        let cfg = parse_llm(
2339            r#"
2340[llm]
2341
2342[llm.router]
2343strategy = "bandit"
2344
2345[llm.router.bandit]
2346warmup_queries = 50
2347"#,
2348        );
2349        let bandit = cfg
2350            .router
2351            .expect("router section must be present")
2352            .bandit
2353            .expect("bandit section must be present");
2354        assert_eq!(
2355            bandit.warmup_queries,
2356            Some(50),
2357            "warmup_queries = 50 must deserialize to Some(50)"
2358        );
2359    }
2360
2361    #[test]
2362    fn bandit_warmup_queries_explicit_null_is_none() {
2363        // Explicitly writing the field as absent: field simply not present is
2364        // equivalent due to #[serde(default)]. Test that an explicit 0 is Some(0).
2365        let cfg = parse_llm(
2366            r#"
2367[llm]
2368
2369[llm.router]
2370strategy = "bandit"
2371
2372[llm.router.bandit]
2373warmup_queries = 0
2374"#,
2375        );
2376        let bandit = cfg
2377            .router
2378            .expect("router section must be present")
2379            .bandit
2380            .expect("bandit section must be present");
2381        // 0 is a valid explicit value — it means "preserve computed default".
2382        assert_eq!(
2383            bandit.warmup_queries,
2384            Some(0),
2385            "warmup_queries = 0 must deserialize to Some(0)"
2386        );
2387    }
2388
2389    #[test]
2390    fn bandit_warmup_queries_missing_field_defaults_to_none() {
2391        // When warmup_queries is omitted entirely, #[serde(default)] must produce None.
2392        let cfg = parse_llm(
2393            r#"
2394[llm]
2395
2396[llm.router]
2397strategy = "bandit"
2398
2399[llm.router.bandit]
2400alpha = 1.5
2401"#,
2402        );
2403        let bandit = cfg
2404            .router
2405            .expect("router section must be present")
2406            .bandit
2407            .expect("bandit section must be present");
2408        assert_eq!(
2409            bandit.warmup_queries, None,
2410            "omitted warmup_queries must default to None"
2411        );
2412    }
2413
2414    #[test]
2415    fn provider_name_new_and_as_str() {
2416        let n = ProviderName::new("fast");
2417        assert_eq!(n.as_str(), "fast");
2418        assert!(!n.is_empty());
2419    }
2420
2421    #[test]
2422    fn provider_name_default_is_empty() {
2423        let n = ProviderName::default();
2424        assert!(n.is_empty());
2425        assert_eq!(n.as_str(), "");
2426    }
2427
2428    #[test]
2429    fn provider_name_partial_eq_str() {
2430        let n = ProviderName::new("fast");
2431        assert_eq!(n, "fast");
2432        assert_ne!(n, "slow");
2433    }
2434
2435    #[test]
2436    fn provider_name_serde_roundtrip() {
2437        let n = ProviderName::new("my-provider");
2438        let json = serde_json::to_string(&n).expect("serialize");
2439        assert_eq!(json, "\"my-provider\"");
2440        let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2441        assert_eq!(back, n);
2442    }
2443
2444    #[test]
2445    fn provider_name_serde_empty_roundtrip() {
2446        let n = ProviderName::default();
2447        let json = serde_json::to_string(&n).expect("serialize");
2448        assert_eq!(json, "\"\"");
2449        let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2450        assert_eq!(back, n);
2451        assert!(back.is_empty());
2452    }
2453
2454    // ─── GonkaNode / ProviderKind::Gonka ─────────────────────────────────────
2455
2456    fn gonka_entry_with_nodes(nodes: Vec<GonkaNode>) -> ProviderEntry {
2457        ProviderEntry {
2458            provider_type: ProviderKind::Gonka,
2459            name: Some("my-gonka".into()),
2460            gonka_nodes: nodes,
2461            ..Default::default()
2462        }
2463    }
2464
2465    fn valid_gonka_nodes() -> Vec<GonkaNode> {
2466        vec![
2467            GonkaNode {
2468                url: "https://node1.gonka.ai".into(),
2469                address: "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6".into(),
2470                name: Some("node1".into()),
2471            },
2472            GonkaNode {
2473                url: "https://node2.gonka.ai".into(),
2474                address: "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum".into(),
2475                name: Some("node2".into()),
2476            },
2477            GonkaNode {
2478                url: "http://node3.internal".into(),
2479                address: "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg".into(),
2480                name: None,
2481            },
2482        ]
2483    }
2484
2485    #[test]
2486    fn validate_gonka_valid() {
2487        let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2488        assert!(entry.validate().is_ok());
2489    }
2490
2491    #[test]
2492    fn validate_gonka_empty_nodes_errors() {
2493        let entry = gonka_entry_with_nodes(vec![]);
2494        let err = entry.validate().unwrap_err();
2495        assert!(
2496            err.to_string().contains("gonka_nodes"),
2497            "error should mention gonka_nodes: {err}"
2498        );
2499    }
2500
2501    #[test]
2502    fn validate_gonka_node_empty_url_errors() {
2503        let entry = gonka_entry_with_nodes(vec![GonkaNode {
2504            url: String::new(),
2505            address: "gonka1test".into(),
2506            name: None,
2507        }]);
2508        let err = entry.validate().unwrap_err();
2509        assert!(err.to_string().contains("url"), "{err}");
2510    }
2511
2512    #[test]
2513    fn validate_gonka_node_invalid_scheme_errors() {
2514        let entry = gonka_entry_with_nodes(vec![GonkaNode {
2515            url: "ftp://node.gonka.ai".into(),
2516            address: "gonka1test".into(),
2517            name: None,
2518        }]);
2519        let err = entry.validate().unwrap_err();
2520        assert!(err.to_string().contains("http"), "{err}");
2521    }
2522
2523    #[test]
2524    fn validate_gonka_without_name_errors() {
2525        let entry = ProviderEntry {
2526            provider_type: ProviderKind::Gonka,
2527            name: None,
2528            gonka_nodes: valid_gonka_nodes(),
2529            ..Default::default()
2530        };
2531        let err = entry.validate().unwrap_err();
2532        assert!(err.to_string().contains("gonka"), "{err}");
2533    }
2534
2535    #[test]
2536    fn gonka_toml_round_trip() {
2537        let toml = r#"
2538[llm]
2539
2540[[llm.providers]]
2541type = "gonka"
2542name = "my-gonka"
2543gonka_chain_prefix = "custom-chain"
2544
2545[[llm.providers.gonka_nodes]]
2546url = "https://node1.gonka.ai"
2547address = "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2548name = "node1"
2549
2550[[llm.providers.gonka_nodes]]
2551url = "https://node2.gonka.ai"
2552address = "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum"
2553name = "node2"
2554
2555[[llm.providers.gonka_nodes]]
2556url = "https://node3.gonka.ai"
2557address = "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg"
2558"#;
2559        let cfg = parse_llm(toml);
2560        assert_eq!(cfg.providers.len(), 1);
2561        let entry = &cfg.providers[0];
2562        assert_eq!(entry.provider_type, ProviderKind::Gonka);
2563        assert_eq!(entry.name.as_deref(), Some("my-gonka"));
2564        let nodes = &entry.gonka_nodes;
2565        assert_eq!(nodes.len(), 3);
2566        assert_eq!(nodes[0].url, "https://node1.gonka.ai");
2567        assert_eq!(
2568            nodes[0].address,
2569            "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2570        );
2571        assert_eq!(nodes[0].name.as_deref(), Some("node1"));
2572        assert_eq!(nodes[2].name, None);
2573        assert_eq!(entry.gonka_chain_prefix.as_deref(), Some("custom-chain"));
2574    }
2575
2576    #[test]
2577    fn gonka_default_chain_prefix() {
2578        let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2579        assert_eq!(entry.effective_gonka_chain_prefix(), "gonka");
2580    }
2581
2582    #[test]
2583    fn gonka_explicit_chain_prefix() {
2584        let entry = ProviderEntry {
2585            provider_type: ProviderKind::Gonka,
2586            name: Some("my-gonka".into()),
2587            gonka_nodes: valid_gonka_nodes(),
2588            gonka_chain_prefix: Some("my-chain".into()),
2589            ..Default::default()
2590        };
2591        assert_eq!(entry.effective_gonka_chain_prefix(), "my-chain");
2592    }
2593
2594    #[test]
2595    fn effective_model_gonka_is_empty() {
2596        let entry = ProviderEntry {
2597            provider_type: ProviderKind::Gonka,
2598            model: None,
2599            ..Default::default()
2600        };
2601        assert_eq!(entry.effective_model(), "");
2602    }
2603
2604    #[test]
2605    fn existing_configs_still_parse() {
2606        let toml = r#"
2607[llm]
2608
2609[[llm.providers]]
2610type = "ollama"
2611model = "qwen3:8b"
2612
2613[[llm.providers]]
2614type = "claude"
2615name = "claude"
2616model = "claude-sonnet-4-6"
2617"#;
2618        let cfg = parse_llm(toml);
2619        assert_eq!(cfg.providers.len(), 2);
2620        assert_eq!(cfg.providers[0].provider_type, ProviderKind::Ollama);
2621        assert_eq!(cfg.providers[1].provider_type, ProviderKind::Claude);
2622    }
2623
2624    // ── ProviderEntry::validate — Cocoon URL and model validation ─────────────
2625
2626    fn cocoon_entry(url: Option<&str>, model: Option<&str>) -> ProviderEntry {
2627        ProviderEntry {
2628            provider_type: ProviderKind::Cocoon,
2629            name: Some("cocoon".into()),
2630            cocoon_client_url: url.map(str::to_owned),
2631            model: model.map(str::to_owned),
2632            ..Default::default()
2633        }
2634    }
2635
2636    #[test]
2637    fn test_cocoon_url_validation_accepts_http() {
2638        assert!(
2639            cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2640                .validate()
2641                .is_ok()
2642        );
2643    }
2644
2645    #[test]
2646    fn test_cocoon_url_validation_accepts_https_localhost() {
2647        assert!(
2648            cocoon_entry(Some("https://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2649                .validate()
2650                .is_ok()
2651        );
2652    }
2653
2654    #[test]
2655    fn test_cocoon_url_validation_rejects_non_localhost() {
2656        let err = cocoon_entry(Some("http://192.168.1.10:10000"), Some("Qwen/Qwen3-0.6B"))
2657            .validate()
2658            .unwrap_err();
2659        assert!(
2660            err.to_string().contains("localhost"),
2661            "error should mention localhost restriction: {err}"
2662        );
2663    }
2664
2665    #[test]
2666    fn test_cocoon_url_validation_rejects_non_http_scheme() {
2667        let err = cocoon_entry(Some("ftp://localhost"), Some("Qwen/Qwen3-0.6B"))
2668            .validate()
2669            .unwrap_err();
2670        assert!(
2671            err.to_string().contains("ftp"),
2672            "error should mention the bad scheme: {err}"
2673        );
2674    }
2675
2676    #[test]
2677    fn test_cocoon_url_validation_rejects_invalid_url() {
2678        let err = cocoon_entry(Some("not-a-url"), Some("Qwen/Qwen3-0.6B"))
2679            .validate()
2680            .unwrap_err();
2681        assert!(
2682            err.to_string().contains("not-a-url"),
2683            "error should mention the bad value: {err}"
2684        );
2685    }
2686
2687    #[test]
2688    fn test_cocoon_url_none_passes() {
2689        assert!(
2690            cocoon_entry(None, Some("Qwen/Qwen3-0.6B"))
2691                .validate()
2692                .is_ok()
2693        );
2694    }
2695
2696    #[test]
2697    fn test_cocoon_model_empty_rejected() {
2698        let err = cocoon_entry(Some("http://localhost:10000"), Some(""))
2699            .validate()
2700            .unwrap_err();
2701        assert!(
2702            err.to_string().contains("empty"),
2703            "error should mention 'empty': {err}"
2704        );
2705    }
2706
2707    #[test]
2708    fn test_cocoon_model_none_passes() {
2709        assert!(
2710            cocoon_entry(Some("http://localhost:10000"), None)
2711                .validate()
2712                .is_ok()
2713        );
2714    }
2715
2716    #[test]
2717    fn validate_cocoon_pricing_negative_prompt_errors() {
2718        let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2719        e.cocoon_pricing = Some(CocoonPricing {
2720            prompt_cents_per_1k: -1.0,
2721            completion_cents_per_1k: 0.03,
2722        });
2723        assert!(e.validate().is_err());
2724    }
2725
2726    #[test]
2727    fn validate_cocoon_pricing_negative_completion_errors() {
2728        let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2729        e.cocoon_pricing = Some(CocoonPricing {
2730            prompt_cents_per_1k: 0.01,
2731            completion_cents_per_1k: -0.5,
2732        });
2733        assert!(e.validate().is_err());
2734    }
2735
2736    #[test]
2737    fn validate_cocoon_pricing_valid_passes() {
2738        let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2739        e.cocoon_pricing = Some(CocoonPricing {
2740            prompt_cents_per_1k: 0.01,
2741            completion_cents_per_1k: 0.03,
2742        });
2743        assert!(e.validate().is_ok());
2744    }
2745}