Skip to main content

zeph_config/
providers.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use serde::{Deserialize, Serialize};
5
6// ── LLM provider config types (moved from zeph-llm) ─────────────────────────
7
8/// Extended or adaptive thinking mode for Claude.
9///
10/// Serializes with `mode` as tag:
11/// `{ "mode": "extended", "budget_tokens": 10000 }` or `{ "mode": "adaptive" }`.
12#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
13#[serde(tag = "mode", rename_all = "snake_case")]
14pub enum ThinkingConfig {
15    /// Extended thinking with an explicit token budget.
16    Extended {
17        /// Maximum thinking tokens to allocate.
18        budget_tokens: u32,
19    },
20    /// Adaptive thinking that selects effort automatically.
21    Adaptive {
22        /// Explicit effort hint when provided; model-chosen when `None`.
23        #[serde(default, skip_serializing_if = "Option::is_none")]
24        effort: Option<ThinkingEffort>,
25    },
26}
27
28/// Effort level for adaptive thinking.
29#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
30#[serde(rename_all = "lowercase")]
31pub enum ThinkingEffort {
32    /// Minimal thinking; fastest responses.
33    Low,
34    /// Balanced thinking depth. This is the default.
35    #[default]
36    Medium,
37    /// Maximum thinking depth; slowest responses.
38    High,
39}
40
41/// Prompt-cache TTL variant for the Anthropic API.
42///
43/// When used as a TOML config value the accepted strings are `"ephemeral"` and `"1h"`.
44/// On the wire (Anthropic API), `OneHour` serializes as `"1h"` inside the `cache_control.ttl`
45/// field.
46#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Default)]
47#[serde(rename_all = "snake_case")]
48pub enum CacheTtl {
49    /// Default ephemeral TTL (~5 minutes). No beta header required.
50    #[default]
51    Ephemeral,
52    /// Extended 1-hour TTL. Requires the `extended-cache-ttl-2025-04-25` beta header.
53    /// Cache writes cost approximately 2× more than `Ephemeral`.
54    #[serde(rename = "1h")]
55    OneHour,
56}
57
58impl CacheTtl {
59    /// Returns `true` when this TTL variant requires the `extended-cache-ttl-2025-04-25` beta
60    /// header to be sent with each request.
61    #[must_use]
62    pub fn requires_beta(self) -> bool {
63        match self {
64            Self::OneHour => true,
65            Self::Ephemeral => false,
66        }
67    }
68}
69
70/// Thinking level for Gemini models that support extended reasoning.
71///
72/// Maps to `generationConfig.thinkingConfig.thinkingLevel` in the Gemini API.
73/// Valid for Gemini 3+ models. For Gemini 2.5, use `thinking_budget` instead.
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
75#[serde(rename_all = "lowercase")]
76pub enum GeminiThinkingLevel {
77    /// Minimal reasoning pass.
78    Minimal,
79    /// Low reasoning depth.
80    Low,
81    /// Medium reasoning depth.
82    Medium,
83    /// Full reasoning depth.
84    High,
85}
86
87pub use zeph_common::ProviderName;
88
89fn default_response_cache_ttl_secs() -> u64 {
90    3600
91}
92
93fn default_semantic_cache_threshold() -> f32 {
94    0.95
95}
96
97fn default_semantic_cache_max_candidates() -> u32 {
98    10
99}
100
101fn default_router_ema_alpha() -> f64 {
102    0.1
103}
104
105fn default_router_reorder_interval() -> u64 {
106    10
107}
108
109fn default_embedding_model() -> String {
110    "qwen3-embedding".into()
111}
112
113fn default_candle_source() -> String {
114    "huggingface".into()
115}
116
117fn default_chat_template() -> String {
118    "chatml".into()
119}
120
121fn default_candle_device() -> String {
122    "cpu".into()
123}
124
125fn default_temperature() -> f64 {
126    0.7
127}
128
129fn default_max_tokens() -> usize {
130    2048
131}
132
133fn default_seed() -> u64 {
134    42
135}
136
137fn default_repeat_penalty() -> f32 {
138    1.1
139}
140
141fn default_repeat_last_n() -> usize {
142    64
143}
144
145fn default_cascade_quality_threshold() -> f64 {
146    0.5
147}
148
149fn default_cascade_max_escalations() -> u8 {
150    2
151}
152
153fn default_cascade_window_size() -> usize {
154    50
155}
156
157fn default_cascade_judge_timeout_ms() -> u64 {
158    5_000
159}
160
161fn default_reputation_decay_factor() -> f64 {
162    0.95
163}
164
165fn default_reputation_weight() -> f64 {
166    0.3
167}
168
169fn default_reputation_min_observations() -> u64 {
170    5
171}
172
173/// Returns the default STT provider name (empty string — auto-detect).
174#[must_use]
175pub fn default_stt_provider() -> String {
176    String::new()
177}
178
179/// Returns the default STT transcription language hint (`"auto"`).
180#[must_use]
181pub fn default_stt_language() -> String {
182    "auto".into()
183}
184
185/// Returns the default embedding model name used by `[llm] embedding_model`.
186#[must_use]
187pub(crate) fn get_default_embedding_model() -> String {
188    default_embedding_model()
189}
190
191/// Returns the default response cache TTL in seconds.
192#[must_use]
193pub(crate) fn get_default_response_cache_ttl_secs() -> u64 {
194    default_response_cache_ttl_secs()
195}
196
197/// Returns the default EMA alpha for the router latency estimator.
198#[must_use]
199pub(crate) fn get_default_router_ema_alpha() -> f64 {
200    default_router_ema_alpha()
201}
202
203/// Returns the default router reorder interval (turns between provider re-ranking).
204#[must_use]
205pub(crate) fn get_default_router_reorder_interval() -> u64 {
206    default_router_reorder_interval()
207}
208
209/// LLM provider backend selector.
210///
211/// Used in `[[llm.providers]]` entries as the `type` field.
212///
213/// # Example (TOML)
214///
215/// ```toml
216/// [[llm.providers]]
217/// type = "openai"
218/// model = "gpt-4o"
219/// name = "quality"
220/// ```
221#[non_exhaustive]
222#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
223#[serde(rename_all = "lowercase")]
224pub enum ProviderKind {
225    /// Local Ollama server (default base URL: `http://localhost:11434`).
226    Ollama,
227    /// Anthropic Claude API.
228    Claude,
229    /// `OpenAI` API.
230    OpenAi,
231    /// Google Gemini API.
232    Gemini,
233    /// Local Candle inference (CPU/GPU, no external server required).
234    Candle,
235    /// OpenAI-compatible third-party API (e.g. Groq, Together AI, LM Studio).
236    Compatible,
237    /// Native Gonka blockchain provider.
238    Gonka,
239    /// Cocoon confidential compute network via localhost sidecar.
240    Cocoon,
241}
242
243impl ProviderKind {
244    /// Return the lowercase string identifier for this provider kind.
245    ///
246    /// # Examples
247    ///
248    /// ```
249    /// use zeph_config::ProviderKind;
250    ///
251    /// assert_eq!(ProviderKind::Claude.as_str(), "claude");
252    /// assert_eq!(ProviderKind::OpenAi.as_str(), "openai");
253    /// ```
254    #[must_use]
255    pub fn as_str(self) -> &'static str {
256        match self {
257            Self::Ollama => "ollama",
258            Self::Claude => "claude",
259            Self::OpenAi => "openai",
260            Self::Gemini => "gemini",
261            Self::Candle => "candle",
262            Self::Compatible => "compatible",
263            Self::Gonka => "gonka",
264            Self::Cocoon => "cocoon",
265        }
266    }
267}
268
269impl std::fmt::Display for ProviderKind {
270    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
271        f.write_str(self.as_str())
272    }
273}
274
275/// LLM configuration, nested under `[llm]` in TOML.
276///
277/// Declares the provider pool and controls routing, embedding, caching, and STT.
278/// All providers are declared in `[[llm.providers]]`; subsystems reference them by
279/// the `name` field using a `*_provider` config key.
280///
281/// # Example (TOML)
282///
283/// ```toml
284/// [[llm.providers]]
285/// name = "fast"
286/// type = "openai"
287/// model = "gpt-4o-mini"
288///
289/// [[llm.providers]]
290/// name = "quality"
291/// type = "claude"
292/// model = "claude-opus-4-5"
293///
294/// [llm]
295/// routing = "none"
296/// embedding_model = "qwen3-embedding"
297/// ```
298#[derive(Debug, Deserialize, Serialize)]
299pub struct LlmConfig {
300    /// Provider pool. First entry is default unless one is marked `default = true`.
301    #[serde(default, skip_serializing_if = "Vec::is_empty")]
302    pub providers: Vec<ProviderEntry>,
303
304    /// Routing strategy for multi-provider configs.
305    #[serde(default, skip_serializing_if = "is_routing_none")]
306    pub routing: LlmRoutingStrategy,
307
308    #[serde(default = "default_embedding_model_opt")]
309    pub embedding_model: String,
310    #[serde(default, skip_serializing_if = "Option::is_none")]
311    pub candle: Option<CandleConfig>,
312    #[serde(default)]
313    pub stt: Option<SttConfig>,
314    #[serde(default)]
315    pub response_cache_enabled: bool,
316    #[serde(default = "default_response_cache_ttl_secs")]
317    pub response_cache_ttl_secs: u64,
318    /// Enable semantic similarity-based response caching. Requires embedding support.
319    #[serde(default)]
320    pub semantic_cache_enabled: bool,
321    /// Cosine similarity threshold for semantic cache hits (0.0–1.0).
322    ///
323    /// Only the highest-scoring candidate above this threshold is returned.
324    /// Lower values produce more cache hits but risk returning less relevant responses.
325    /// Recommended range: 0.92–0.98; default: 0.95.
326    #[serde(default = "default_semantic_cache_threshold")]
327    pub semantic_cache_threshold: f32,
328    /// Maximum cached entries to examine per semantic lookup (SQL `LIMIT` clause in
329    /// `ResponseCache::get_semantic()`). Controls the recall-vs-performance tradeoff:
330    ///
331    /// - **Higher values** (e.g. 50): scan more entries, better chance of finding a
332    ///   semantically similar cached response, but slower queries.
333    /// - **Lower values** (e.g. 5): faster queries, but may miss relevant cached entries
334    ///   when the cache is large.
335    /// - **Default (10)**: balanced middle ground for typical workloads.
336    ///
337    /// Tuning guidance: set to 50+ when recall matters more than latency (e.g. long-running
338    /// sessions with many cached responses); reduce to 5 for low-latency interactive use.
339    /// Env override: `ZEPH_LLM_SEMANTIC_CACHE_MAX_CANDIDATES`.
340    #[serde(default = "default_semantic_cache_max_candidates")]
341    pub semantic_cache_max_candidates: u32,
342    #[serde(default)]
343    pub router_ema_enabled: bool,
344    #[serde(default = "default_router_ema_alpha")]
345    pub router_ema_alpha: f64,
346    #[serde(default = "default_router_reorder_interval")]
347    pub router_reorder_interval: u64,
348    /// Routing configuration for Thompson/Cascade strategies.
349    #[serde(default, skip_serializing_if = "Option::is_none")]
350    pub router: Option<RouterConfig>,
351    /// Provider-specific instruction file to inject into the system prompt.
352    /// Merged with `agent.instruction_files` at startup.
353    #[serde(default, skip_serializing_if = "Option::is_none")]
354    pub instruction_file: Option<std::path::PathBuf>,
355    /// Shorthand model spec for tool-pair summarization and context compaction.
356    /// Format: `ollama/<model>`, `claude[/<model>]`, `openai[/<model>]`, `compatible/<name>`, `candle`.
357    /// Ignored when `[llm.summary_provider]` is set.
358    #[serde(default, skip_serializing_if = "Option::is_none")]
359    pub summary_model: Option<String>,
360    /// Structured provider config for summarization. Takes precedence over `summary_model`.
361    #[serde(default, skip_serializing_if = "Option::is_none")]
362    pub summary_provider: Option<ProviderEntry>,
363
364    /// Complexity triage routing configuration. Required when `routing = "triage"`.
365    #[serde(default, skip_serializing_if = "Option::is_none")]
366    pub complexity_routing: Option<ComplexityRoutingConfig>,
367
368    /// Collaborative Entropy (`CoE`) configuration. `None` = `CoE` disabled.
369    #[serde(default, skip_serializing_if = "Option::is_none")]
370    pub coe: Option<CoeConfig>,
371}
372
373fn default_embedding_model_opt() -> String {
374    default_embedding_model()
375}
376
377#[allow(clippy::trivially_copy_pass_by_ref)]
378fn is_routing_none(s: &LlmRoutingStrategy) -> bool {
379    *s == LlmRoutingStrategy::None
380}
381
382impl LlmConfig {
383    /// Effective provider kind for the primary (first/default) provider in the pool.
384    #[must_use]
385    pub fn effective_provider(&self) -> ProviderKind {
386        self.providers
387            .first()
388            .map_or(ProviderKind::Ollama, |e| e.provider_type)
389    }
390
391    /// Effective base URL for the primary provider.
392    #[must_use]
393    pub fn effective_base_url(&self) -> &str {
394        self.providers
395            .first()
396            .and_then(|e| e.base_url.as_deref())
397            .unwrap_or("http://localhost:11434")
398    }
399
400    /// Effective model for the primary chat-capable provider.
401    ///
402    /// Skips embed-only entries (those with `embed = true`) and returns the model of the
403    /// first provider that can handle chat requests. Falls back to `"qwen3:8b"` when no
404    /// chat-capable provider is configured.
405    #[must_use]
406    pub fn effective_model(&self) -> &str {
407        self.providers
408            .iter()
409            .find(|e| !e.embed)
410            .and_then(|e| e.model.as_deref())
411            .unwrap_or("qwen3:8b")
412    }
413
414    /// Find the provider entry designated for STT.
415    ///
416    /// Resolution priority:
417    /// 1. `[llm.stt].provider` matches `[[llm.providers]].name` and the entry has `stt_model`
418    /// 2. `[llm.stt].provider` is empty — fall through to auto-detect
419    /// 3. First provider with `stt_model` set (auto-detect fallback)
420    /// 4. `None` — STT disabled
421    #[must_use]
422    pub fn stt_provider_entry(&self) -> Option<&ProviderEntry> {
423        let name_hint = self.stt.as_ref().map_or("", |s| s.provider.as_str());
424        if name_hint.is_empty() {
425            self.providers.iter().find(|p| p.stt_model.is_some())
426        } else {
427            self.providers
428                .iter()
429                .find(|p| p.effective_name() == name_hint && p.stt_model.is_some())
430        }
431    }
432
433    /// Validate that the config uses the new `[[llm.providers]]` format.
434    ///
435    /// # Errors
436    ///
437    /// Returns `ConfigError::Validation` when no providers are configured.
438    pub fn check_legacy_format(&self) -> Result<(), crate::error::ConfigError> {
439        Ok(())
440    }
441
442    /// Validate STT config cross-references.
443    ///
444    /// # Errors
445    ///
446    /// Returns `ConfigError::Validation` when the referenced STT provider does not exist.
447    pub fn validate_stt(&self) -> Result<(), crate::error::ConfigError> {
448        use crate::error::ConfigError;
449
450        let Some(stt) = &self.stt else {
451            return Ok(());
452        };
453        if stt.provider.is_empty() {
454            return Ok(());
455        }
456        let found = self
457            .providers
458            .iter()
459            .find(|p| p.effective_name() == stt.provider);
460        match found {
461            None => {
462                return Err(ConfigError::Validation(format!(
463                    "[llm.stt].provider = {:?} does not match any [[llm.providers]] entry",
464                    stt.provider
465                )));
466            }
467            Some(entry) if entry.stt_model.is_none() => {
468                tracing::warn!(
469                    provider = stt.provider,
470                    "[[llm.providers]] entry exists but has no `stt_model` — STT will not be activated"
471                );
472            }
473            _ => {}
474        }
475        Ok(())
476    }
477
478    /// Resolve `provider_name` to its model string and emit a startup warning when the
479    /// model does not look like a fast-tier model.
480    ///
481    /// **Soft check — never returns an error.** Misconfiguration produces a single
482    /// `tracing::warn!` at startup so operators can fix configs without being blocked.
483    ///
484    /// Rules:
485    /// - Empty `provider_name` → silently OK (caller will use the primary provider).
486    /// - Provider not found in pool → warns `"<label> provider '<name>' not found"`.
487    /// - Model resolved but not in `FAST_TIER_MODEL_HINTS` and not in `extra_allowlist` →
488    ///   warns `"<label> provider '<name>' uses '<model>' which may not be fast-tier"`.
489    /// - Model matches a hint or allowlist entry → silently OK.
490    ///
491    /// # Examples
492    ///
493    /// ```no_run
494    /// use zeph_config::providers::{LlmConfig, ProviderName};
495    ///
496    /// // LlmConfig is constructed via config file; here we illustrate the call shape.
497    /// # let cfg: LlmConfig = unimplemented!();
498    /// // empty provider name is silently ok
499    /// cfg.warn_non_fast_tier_provider(&ProviderName::default(), "memcot.distill_provider", &[]);
500    /// ```
501    pub fn warn_non_fast_tier_provider(
502        &self,
503        provider_name: &ProviderName,
504        feature_label: &str,
505        extra_allowlist: &[String],
506    ) {
507        if provider_name.is_empty() {
508            return;
509        }
510        let name = provider_name.as_str();
511        let Some(entry) = self.providers.iter().find(|p| p.effective_name() == name) else {
512            tracing::warn!(
513                provider = name,
514                "{feature_label} provider '{name}' not found in [[llm.providers]]"
515            );
516            return;
517        };
518        let model = entry.model.as_deref().unwrap_or("");
519        if model.is_empty() {
520            return;
521        }
522        let lower = model.to_lowercase();
523        let in_hints = FAST_TIER_MODEL_HINTS.iter().any(|h| lower.contains(h));
524        let in_extra = extra_allowlist.iter().any(|h| lower.contains(h.as_str()));
525        if !in_hints && !in_extra {
526            tracing::warn!(
527                provider = name,
528                actual = model,
529                "{feature_label} provider '{name}' uses model '{model}' \
530                 which may not be fast-tier; prefer a fast model to bound distillation cost"
531            );
532        }
533    }
534}
535
536/// Lowercased substrings that identify commonly accepted fast-tier models.
537///
538/// Used by [`LlmConfig::warn_non_fast_tier_provider`] for a soft startup check.
539/// Updating this list is non-breaking; missing a fast model only suppresses a warning.
540pub const FAST_TIER_MODEL_HINTS: &[&str] = &[
541    "gpt-4o-mini",
542    "gpt-4.1-mini",
543    "gpt-5-mini",
544    "gpt-5-nano",
545    "claude-haiku",
546    "claude-3-haiku",
547    "claude-3-5-haiku",
548    "qwen3:8b",
549    "qwen2.5:7b",
550    "qwen2:7b",
551    "llama3.2:3b",
552    "llama3.1:8b",
553    "gemma3:4b",
554    "gemma3:8b",
555    "phi4:mini",
556    "mistral:7b",
557];
558
559/// Speech-to-text configuration, nested under `[llm.stt]` in TOML.
560///
561/// When set, Zeph uses the referenced provider for voice transcription.
562/// The provider must have an `stt_model` field set in its `[[llm.providers]]` entry.
563///
564/// # Example (TOML)
565///
566/// ```toml
567/// [llm.stt]
568/// provider = "fast"
569/// language = "en"
570/// ```
571#[derive(Debug, Clone, Deserialize, Serialize)]
572pub struct SttConfig {
573    /// Provider name from `[[llm.providers]]`. Empty string means auto-detect first provider
574    /// with `stt_model` set.
575    #[serde(default = "default_stt_provider")]
576    pub provider: String,
577    /// Language hint for transcription (e.g. `"en"`, `"auto"`).
578    #[serde(default = "default_stt_language")]
579    pub language: String,
580}
581
582/// Routing strategy selection for multi-provider routing.
583#[non_exhaustive]
584#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
585#[serde(rename_all = "lowercase")]
586pub enum RouterStrategyConfig {
587    /// Exponential moving average latency-aware ordering.
588    #[default]
589    Ema,
590    /// Thompson Sampling with Beta distributions (persistence-backed).
591    Thompson,
592    /// Cascade routing: try cheapest provider first, escalate on degenerate output.
593    Cascade,
594    /// PILOT: `LinUCB` contextual bandit with online learning and cost-aware reward.
595    Bandit,
596}
597
598/// Agent Stability Index (ASI) configuration.
599///
600/// Tracks per-provider response coherence via a sliding window of response embeddings.
601/// When coherence drops below `coherence_threshold`, the provider's routing prior is
602/// penalized by `penalty_weight`. Disabled by default; session-only (no persistence).
603///
604/// # Known Limitation
605///
606/// ASI embeddings are computed in a background `tokio::spawn` task after the response is
607/// returned to the caller. Under high request rates, the coherence score used for routing
608/// may lag 1–2 responses behind due to this fire-and-forget design. With the default
609/// `window = 5`, this lag is tolerable — coherence is a slow-moving signal.
610#[derive(Debug, Clone, Deserialize, Serialize)]
611pub struct AsiConfig {
612    /// Enable ASI coherence tracking. Default: false.
613    #[serde(default)]
614    pub enabled: bool,
615
616    /// Sliding window size for response embeddings per provider. Default: 5.
617    #[serde(default = "default_asi_window")]
618    pub window: usize,
619
620    /// Coherence score [0.0, 1.0] below which the provider is penalized. Default: 0.7.
621    #[serde(default = "default_asi_coherence_threshold")]
622    pub coherence_threshold: f32,
623
624    /// Penalty weight applied to Thompson beta / EMA score on low coherence. Default: 0.3.
625    ///
626    /// For Thompson, this shifts the beta prior: `beta += penalty_weight * (threshold - coherence)`.
627    /// For EMA, the score is multiplied by `max(0.5, coherence / threshold)`.
628    #[serde(default = "default_asi_penalty_weight")]
629    pub penalty_weight: f32,
630}
631
632fn default_asi_window() -> usize {
633    5
634}
635
636fn default_asi_coherence_threshold() -> f32 {
637    0.7
638}
639
640fn default_asi_penalty_weight() -> f32 {
641    0.3
642}
643
644impl Default for AsiConfig {
645    fn default() -> Self {
646        Self {
647            enabled: false,
648            window: default_asi_window(),
649            coherence_threshold: default_asi_coherence_threshold(),
650            penalty_weight: default_asi_penalty_weight(),
651        }
652    }
653}
654
655/// Routing configuration for multi-provider setups.
656#[derive(Debug, Clone, Deserialize, Serialize)]
657pub struct RouterConfig {
658    /// Routing strategy: `"ema"` (default), `"thompson"`, `"cascade"`, or `"bandit"`.
659    #[serde(default)]
660    pub strategy: RouterStrategyConfig,
661    /// Path for persisting Thompson Sampling state. Defaults to `~/.zeph/router_thompson_state.json`.
662    ///
663    /// # Security
664    ///
665    /// This path is user-controlled. The application writes and reads a JSON file at
666    /// this location. Ensure the path is within a directory that is not world-writable
667    /// (e.g., avoid `/tmp`). The file is created with mode `0o600` on Unix.
668    #[serde(default)]
669    pub thompson_state_path: Option<String>,
670    /// Cascade routing configuration. Only used when `strategy = "cascade"`.
671    #[serde(default)]
672    pub cascade: Option<CascadeConfig>,
673    /// Bayesian reputation scoring configuration (RAPS). Disabled by default.
674    #[serde(default)]
675    pub reputation: Option<ReputationConfig>,
676    /// PILOT bandit routing configuration. Only used when `strategy = "bandit"`.
677    #[serde(default)]
678    pub bandit: Option<BanditConfig>,
679    /// Embedding-based quality gate threshold for Thompson/EMA routing. Default: disabled.
680    ///
681    /// When set, after provider selection, the cosine similarity between the query embedding
682    /// and the response embedding is computed. If below this threshold, the next provider in
683    /// the ordered list is tried. On exhaustion, the best response seen is returned.
684    ///
685    /// Only applies to Thompson and EMA strategies. Cascade uses its own quality classifier.
686    /// Fail-open: embedding errors disable the gate for that request.
687    #[serde(default)]
688    pub quality_gate: Option<f32>,
689    /// Agent Stability Index configuration. Disabled by default.
690    #[serde(default)]
691    pub asi: Option<AsiConfig>,
692    /// Maximum number of concurrent `embed_batch` calls through the router.
693    ///
694    /// Limits simultaneous embedding HTTP requests to prevent provider rate-limiting
695    /// and memory pressure during indexing or high-frequency recall. Default: 4.
696    /// Set to 0 to disable the semaphore (unlimited concurrency).
697    #[serde(default = "default_embed_concurrency")]
698    pub embed_concurrency: usize,
699}
700
701fn default_embed_concurrency() -> usize {
702    4
703}
704
705/// Configuration for Bayesian reputation scoring (RAPS — Reputation-Adjusted Provider Selection).
706///
707/// When enabled, quality outcomes from tool execution shift the routing scores over time,
708/// giving an advantage to providers that consistently produce valid tool arguments.
709///
710/// Default: disabled. Set `enabled = true` to activate.
711#[derive(Debug, Clone, Deserialize, Serialize)]
712pub struct ReputationConfig {
713    /// Enable reputation scoring. Default: false.
714    #[serde(default)]
715    pub enabled: bool,
716    /// Session-level decay factor applied on each load. Range: (0.0, 1.0]. Default: 0.95.
717    /// Lower values make reputation forget faster; 1.0 = no decay.
718    #[serde(default = "default_reputation_decay_factor")]
719    pub decay_factor: f64,
720    /// Weight of reputation in routing score blend. Range: [0.0, 1.0]. Default: 0.3.
721    ///
722    /// **Warning**: values above 0.5 can aggressively suppress low-reputation providers.
723    /// At `weight = 1.0` with `rep_factor = 0.0` (all failures), the routing score
724    /// drops to zero — the provider becomes unreachable for that session. Stick to
725    /// the default (0.3) unless you intentionally want strong reputation gating.
726    #[serde(default = "default_reputation_weight")]
727    pub weight: f64,
728    /// Minimum quality observations before reputation influences routing. Default: 5.
729    #[serde(default = "default_reputation_min_observations")]
730    pub min_observations: u64,
731    /// Path for persisting reputation state. Defaults to `~/.config/zeph/router_reputation_state.json`.
732    #[serde(default)]
733    pub state_path: Option<String>,
734}
735
736/// Configuration for cascade routing (`strategy = "cascade"`).
737///
738/// Cascade routing tries providers in chain order (cheapest first), escalating to
739/// the next provider when the response is classified as degenerate (empty, repetitive,
740/// incoherent). Chain order determines cost order: first provider = cheapest.
741///
742/// # Limitations
743///
744/// The heuristic classifier detects degenerate outputs only, not semantic failures.
745/// Use `classifier_mode = "judge"` for semantic quality gating (adds LLM call cost).
746#[derive(Debug, Clone, Deserialize, Serialize)]
747pub struct CascadeConfig {
748    /// Minimum quality score [0.0, 1.0] to accept a response without escalating.
749    /// Responses scoring below this threshold trigger escalation.
750    #[serde(default = "default_cascade_quality_threshold")]
751    pub quality_threshold: f64,
752
753    /// Maximum number of quality-based escalations per request.
754    /// Network/API errors do not count against this budget.
755    /// Default: 2 (allows up to 3 providers: cheap → mid → expensive).
756    #[serde(default = "default_cascade_max_escalations")]
757    pub max_escalations: u8,
758
759    /// Quality classifier mode: `"heuristic"` (default) or `"judge"`.
760    /// Heuristic is zero-cost but detects only degenerate outputs.
761    /// Judge requires a configured `summary_model` and adds one LLM call per evaluation.
762    #[serde(default)]
763    pub classifier_mode: CascadeClassifierMode,
764
765    /// Rolling quality history window size per provider. Default: 50.
766    #[serde(default = "default_cascade_window_size")]
767    pub window_size: usize,
768
769    /// Maximum cumulative input+output tokens across all escalation levels.
770    /// When exceeded, returns the best-seen response instead of escalating further.
771    /// `None` disables the budget (unbounded escalation cost).
772    #[serde(default)]
773    pub max_cascade_tokens: Option<u32>,
774
775    /// Explicit cost ordering of provider names (cheapest first).
776    /// When set, cascade routing sorts providers by their position in this list before
777    /// trying them. Providers not in the list are appended after listed ones in their
778    /// original chain order. When unset, chain order is used (default behavior).
779    #[serde(default, skip_serializing_if = "Option::is_none")]
780    pub cost_tiers: Option<Vec<String>>,
781
782    /// Hard timeout for the judge LLM call (milliseconds).
783    /// If the judge does not respond within this budget, the call is treated as a failure
784    /// and heuristic scoring is used instead. Default: 5000 (5 s).
785    #[serde(default = "default_cascade_judge_timeout_ms")]
786    pub judge_timeout_ms: u64,
787}
788
789impl Default for CascadeConfig {
790    fn default() -> Self {
791        Self {
792            quality_threshold: default_cascade_quality_threshold(),
793            max_escalations: default_cascade_max_escalations(),
794            classifier_mode: CascadeClassifierMode::default(),
795            window_size: default_cascade_window_size(),
796            max_cascade_tokens: None,
797            cost_tiers: None,
798            judge_timeout_ms: default_cascade_judge_timeout_ms(),
799        }
800    }
801}
802
803/// Quality classifier mode for cascade routing.
804#[non_exhaustive]
805#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
806#[serde(rename_all = "lowercase")]
807pub enum CascadeClassifierMode {
808    /// Zero-cost heuristic: detects degenerate outputs (empty, repetitive, incoherent).
809    /// Does not detect semantic failures (hallucinations, wrong answers).
810    #[default]
811    Heuristic,
812    /// LLM-based judge: more accurate but adds latency. Falls back to heuristic on failure.
813    /// Requires `summary_model` to be configured.
814    Judge,
815}
816
817fn default_bandit_alpha() -> f32 {
818    1.0
819}
820
821fn default_bandit_dim() -> usize {
822    32
823}
824
825fn default_bandit_cost_weight() -> f32 {
826    0.1
827}
828
829fn default_bandit_decay_factor() -> f32 {
830    1.0
831}
832
833fn default_bandit_embedding_timeout_ms() -> u64 {
834    50
835}
836
837fn default_bandit_cache_size() -> usize {
838    512
839}
840
841/// Configuration for PILOT bandit routing (`strategy = "bandit"`).
842///
843/// PILOT (Provider Intelligence via Learned Online Tuning) uses a `LinUCB` contextual
844/// bandit to learn which provider performs best for a given query context. The feature
845/// vector is derived from the query embedding (first `dim` components, L2-normalised).
846///
847/// **Cold start**: the bandit falls back to Thompson sampling for the first
848/// `10 * num_providers` queries (configurable). After warmup, `LinUCB` takes over.
849///
850/// **Embedding**: an `embedding_provider` must be set for feature vectors. If the embed
851/// call exceeds `embedding_timeout_ms` or fails, the bandit falls back to Thompson/uniform.
852/// Use a local provider (Ollama, Candle) to avoid network latency on the hot path.
853#[derive(Debug, Clone, Deserialize, Serialize)]
854pub struct BanditConfig {
855    /// `LinUCB` exploration parameter. Default: 1.0.
856    /// Higher values increase exploration; lower values favour exploitation.
857    #[serde(default = "default_bandit_alpha")]
858    pub alpha: f32,
859
860    /// Feature vector dimension (first `dim` components of the embedding).
861    ///
862    /// This is simple truncation, not PCA. The first raw embedding dimensions do not
863    /// necessarily capture the most variance. For `OpenAI` `text-embedding-3-*` models,
864    /// consider using the `dimensions` API parameter (Matryoshka embeddings) instead.
865    /// Default: 32.
866    #[serde(default = "default_bandit_dim")]
867    pub dim: usize,
868
869    /// Cost penalty weight in the reward signal: `reward = quality - cost_weight * cost_fraction`.
870    /// Default: 0.1. Increase to penalise expensive providers more aggressively.
871    #[serde(default = "default_bandit_cost_weight")]
872    pub cost_weight: f32,
873
874    /// Session-level decay applied to arm state on startup: `A = I + decay*(A-I)`, `b = decay*b`.
875    /// Values < 1.0 cause re-exploration after provider quality changes. Default: 1.0 (no decay).
876    #[serde(default = "default_bandit_decay_factor")]
877    pub decay_factor: f32,
878
879    /// Provider name from `[[llm.providers]]` used for query embeddings.
880    ///
881    /// SLM recommended: prefer a fast local model (e.g. Ollama `nomic-embed-text`,
882    /// Candle, or `text-embedding-3-small`) — this is called on every bandit request.
883    /// Empty string disables `LinUCB` (bandit always falls back to Thompson/uniform).
884    #[serde(default)]
885    pub embedding_provider: ProviderName,
886
887    /// Hard timeout for the embedding call in milliseconds. Default: 50.
888    /// If exceeded, the request falls back to Thompson/uniform selection.
889    #[serde(default = "default_bandit_embedding_timeout_ms")]
890    pub embedding_timeout_ms: u64,
891
892    /// Maximum cached embeddings (keyed by query text hash). Default: 512.
893    #[serde(default = "default_bandit_cache_size")]
894    pub cache_size: usize,
895
896    /// Path for persisting bandit state. Defaults to `~/.config/zeph/router_bandit_state.json`.
897    ///
898    /// # Security
899    ///
900    /// This path is user-controlled. The file is created with mode `0o600` on Unix.
901    /// Do not place it in world-writable directories.
902    #[serde(default)]
903    pub state_path: Option<String>,
904
905    /// MAR (Memory-Augmented Routing) confidence threshold.
906    ///
907    /// When the top-1 semantic recall score for the current query is >= this value,
908    /// the bandit biases toward cheaper providers (the answer is likely in memory).
909    /// Set to 1.0 to disable MAR. Default: 0.9.
910    #[serde(default = "default_bandit_memory_confidence_threshold")]
911    pub memory_confidence_threshold: f32,
912
913    /// Minimum number of queries before `LinUCB` takes over from Thompson warmup.
914    ///
915    /// When unset or `0`, defaults to `10 × number of providers` (computed at startup).
916    /// Set explicitly to control how long the bandit explores uniformly before
917    /// switching to context-aware routing. Setting `0` preserves the computed default.
918    #[serde(default)]
919    pub warmup_queries: Option<u64>,
920}
921
922fn default_bandit_memory_confidence_threshold() -> f32 {
923    0.9
924}
925
926impl Default for BanditConfig {
927    fn default() -> Self {
928        Self {
929            alpha: default_bandit_alpha(),
930            dim: default_bandit_dim(),
931            cost_weight: default_bandit_cost_weight(),
932            decay_factor: default_bandit_decay_factor(),
933            embedding_provider: ProviderName::default(),
934            embedding_timeout_ms: default_bandit_embedding_timeout_ms(),
935            cache_size: default_bandit_cache_size(),
936            state_path: None,
937            memory_confidence_threshold: default_bandit_memory_confidence_threshold(),
938            warmup_queries: None,
939        }
940    }
941}
942
943#[derive(Debug, Deserialize, Serialize)]
944pub struct CandleConfig {
945    #[serde(default = "default_candle_source")]
946    pub source: String,
947    #[serde(default)]
948    pub local_path: String,
949    #[serde(default)]
950    pub filename: Option<String>,
951    #[serde(default = "default_chat_template")]
952    pub chat_template: String,
953    #[serde(default = "default_candle_device")]
954    pub device: String,
955    #[serde(default)]
956    pub embedding_repo: Option<String>,
957    /// Resolved `HuggingFace` Hub API token for authenticated model downloads.
958    ///
959    /// Must be the **token value** — resolved by the caller before constructing this config.
960    #[serde(default)]
961    pub hf_token: Option<String>,
962    #[serde(default)]
963    pub generation: GenerationParams,
964    /// Maximum seconds to wait for each half of a single inference request.
965    ///
966    /// The timeout is applied **twice** per `chat()` call: once for the channel send
967    /// (waiting for a free slot) and once for the oneshot reply (waiting for the worker
968    /// to finish). The effective maximum wall-clock wait per request is therefore
969    /// `2 × inference_timeout_secs`. CPU inference can be slow; 120s is a conservative
970    /// default for large models, giving up to 240s total before an error is returned.
971    /// Values of 0 are silently promoted to 1 at bootstrap.
972    #[serde(default = "default_inference_timeout_secs")]
973    pub inference_timeout_secs: u64,
974}
975
976fn default_inference_timeout_secs() -> u64 {
977    120
978}
979
980/// Sampling / generation parameters for Candle local inference.
981///
982/// Used inside `[llm.candle.generation]` or a `[[llm.providers]]` Candle entry.
983#[derive(Debug, Clone, Deserialize, Serialize)]
984pub struct GenerationParams {
985    /// Sampling temperature. Higher values produce more creative outputs. Default: `0.7`.
986    #[serde(default = "default_temperature")]
987    pub temperature: f64,
988    /// Nucleus sampling threshold. When set, tokens with cumulative probability above
989    /// this value are excluded. Default: `None` (disabled).
990    #[serde(default)]
991    pub top_p: Option<f64>,
992    /// Top-k sampling. When set, only the top-k most probable tokens are considered.
993    /// Default: `None` (disabled).
994    #[serde(default)]
995    pub top_k: Option<usize>,
996    /// Maximum number of tokens to generate per response. Capped at [`MAX_TOKENS_CAP`].
997    /// Default: `2048`.
998    #[serde(default = "default_max_tokens")]
999    pub max_tokens: usize,
1000    /// Random seed for reproducible outputs. Default: `42`.
1001    #[serde(default = "default_seed")]
1002    pub seed: u64,
1003    /// Repetition penalty applied during sampling. Default: `1.1`.
1004    #[serde(default = "default_repeat_penalty")]
1005    pub repeat_penalty: f32,
1006    /// Number of last tokens to consider for the repetition penalty window. Default: `64`.
1007    #[serde(default = "default_repeat_last_n")]
1008    pub repeat_last_n: usize,
1009}
1010
1011/// Hard upper bound on `GenerationParams::max_tokens` to prevent unbounded generation.
1012pub const MAX_TOKENS_CAP: usize = 32768;
1013
1014impl GenerationParams {
1015    /// Returns `max_tokens` clamped to [`MAX_TOKENS_CAP`].
1016    ///
1017    /// # Examples
1018    ///
1019    /// ```
1020    /// use zeph_config::GenerationParams;
1021    ///
1022    /// let params = GenerationParams::default();
1023    /// assert!(params.capped_max_tokens() <= 32768);
1024    /// ```
1025    #[must_use]
1026    pub fn capped_max_tokens(&self) -> usize {
1027        self.max_tokens.min(MAX_TOKENS_CAP)
1028    }
1029}
1030
1031impl Default for GenerationParams {
1032    fn default() -> Self {
1033        Self {
1034            temperature: default_temperature(),
1035            top_p: None,
1036            top_k: None,
1037            max_tokens: default_max_tokens(),
1038            seed: default_seed(),
1039            repeat_penalty: default_repeat_penalty(),
1040            repeat_last_n: default_repeat_last_n(),
1041        }
1042    }
1043}
1044
1045// ─── Unified config types ─────────────────────────────────────────────────────
1046
1047/// Routing strategy for the `[[llm.providers]]` pool.
1048#[non_exhaustive]
1049#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
1050#[serde(rename_all = "lowercase")]
1051pub enum LlmRoutingStrategy {
1052    /// Single provider or first-in-pool (default).
1053    #[default]
1054    None,
1055    /// Exponential moving average latency-aware ordering.
1056    Ema,
1057    /// Thompson Sampling with Beta distributions.
1058    Thompson,
1059    /// Cascade: try cheapest provider first, escalate on degenerate output.
1060    Cascade,
1061    /// Complexity triage routing: pre-classify each request, delegate to appropriate tier.
1062    Triage,
1063    /// PILOT: `LinUCB` contextual bandit with online learning and budget-aware reward.
1064    Bandit,
1065}
1066
1067fn default_triage_timeout_secs() -> u64 {
1068    5
1069}
1070
1071fn default_max_triage_tokens() -> u32 {
1072    50
1073}
1074
1075fn default_true() -> bool {
1076    true
1077}
1078
1079#[allow(clippy::trivially_copy_pass_by_ref)]
1080fn is_true(v: &bool) -> bool {
1081    *v
1082}
1083
1084/// Tier-to-provider name mapping for complexity routing.
1085#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1086pub struct TierMapping {
1087    pub simple: Option<String>,
1088    pub medium: Option<String>,
1089    pub complex: Option<String>,
1090    pub expert: Option<String>,
1091}
1092
1093/// Configuration for complexity-based triage routing (`routing = "triage"`).
1094///
1095/// When `[llm] routing = "triage"` is set, a cheap triage model classifies each request
1096/// and routes it to the appropriate tier provider. Requires at least one tier mapping.
1097///
1098/// # Example
1099///
1100/// ```toml
1101/// [llm]
1102/// routing = "triage"
1103///
1104/// [llm.complexity_routing]
1105/// triage_provider = "local-fast"
1106///
1107/// [llm.complexity_routing.tiers]
1108/// simple = "local-fast"
1109/// medium = "haiku"
1110/// complex = "sonnet"
1111/// expert = "opus"
1112/// ```
1113#[derive(Debug, Clone, Deserialize, Serialize)]
1114pub struct ComplexityRoutingConfig {
1115    /// Provider name from `[[llm.providers]]` used for triage classification.
1116    #[serde(default)]
1117    pub triage_provider: Option<ProviderName>,
1118
1119    /// Skip triage when all tiers map to the same provider.
1120    #[serde(default = "default_true")]
1121    pub bypass_single_provider: bool,
1122
1123    /// Tier-to-provider name mapping.
1124    #[serde(default)]
1125    pub tiers: TierMapping,
1126
1127    /// Max output tokens for the triage classification call. Default: 50.
1128    #[serde(default = "default_max_triage_tokens")]
1129    pub max_triage_tokens: u32,
1130
1131    /// Timeout in seconds for the triage classification call. Default: 5.
1132    /// On timeout, falls back to the default (first) tier provider.
1133    #[serde(default = "default_triage_timeout_secs")]
1134    pub triage_timeout_secs: u64,
1135
1136    /// Optional fallback strategy when triage misclassifies.
1137    /// Only `"cascade"` is currently supported (Phase 4).
1138    #[serde(default)]
1139    pub fallback_strategy: Option<String>,
1140}
1141
1142impl Default for ComplexityRoutingConfig {
1143    fn default() -> Self {
1144        Self {
1145            triage_provider: None,
1146            bypass_single_provider: true,
1147            tiers: TierMapping::default(),
1148            max_triage_tokens: default_max_triage_tokens(),
1149            triage_timeout_secs: default_triage_timeout_secs(),
1150            fallback_strategy: None,
1151        }
1152    }
1153}
1154
1155/// Configuration for the Collaborative Entropy (`CoE`) subsystem (`[llm.coe]` TOML section).
1156///
1157/// `CoE` detects uncertain responses from the primary provider and escalates to a
1158/// secondary provider when either the intra-entropy or inter-divergence signal crosses
1159/// its threshold. Only active for `RouterStrategy::Ema` and `RouterStrategy::Thompson`.
1160///
1161/// # Example
1162///
1163/// ```toml
1164/// [llm.coe]
1165/// enabled = true
1166/// intra_threshold = 0.8
1167/// inter_threshold = 0.20
1168/// shadow_sample_rate = 0.1
1169/// secondary_provider = "quality"
1170/// embed_provider = ""
1171/// ```
1172#[derive(Debug, Clone, Deserialize, Serialize)]
1173#[serde(default)]
1174pub struct CoeConfig {
1175    /// Enable `CoE`. When `false`, the struct is ignored.
1176    pub enabled: bool,
1177    /// Mean negative log-prob threshold; responses above this trigger intra escalation.
1178    pub intra_threshold: f64,
1179    /// Divergence threshold in `[0.0, 1.0]`.
1180    pub inter_threshold: f64,
1181    /// Baseline rate at which secondary is called even when intra is low.
1182    pub shadow_sample_rate: f64,
1183    /// Provider name from `[[llm.providers]]` used as the escalation target.
1184    pub secondary_provider: ProviderName,
1185    /// Provider name for inter-divergence embeddings. Empty → inherit bandit's embed provider.
1186    pub embed_provider: ProviderName,
1187}
1188
1189impl Default for CoeConfig {
1190    fn default() -> Self {
1191        Self {
1192            enabled: false,
1193            intra_threshold: 0.8,
1194            inter_threshold: 0.20,
1195            shadow_sample_rate: 0.1,
1196            secondary_provider: ProviderName::default(),
1197            embed_provider: ProviderName::default(),
1198        }
1199    }
1200}
1201
1202/// A single Gonka network node endpoint.
1203///
1204/// Used in `[[llm.providers]]` entries with `type = "gonka"` to declare
1205/// the node pool for blockchain inference routing.
1206#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
1207pub struct GonkaNode {
1208    /// HTTP(S) URL of the Gonka node (e.g. `"https://node1.gonka.ai"`).
1209    pub url: String,
1210    /// On-chain bech32 address of this node (e.g. `"gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"`).
1211    ///
1212    /// Required for signature construction: every signed request binds to the target node's
1213    /// on-chain address, making signatures non-replayable across different nodes.
1214    pub address: String,
1215    /// Optional human-readable label for `zeph gonka doctor` output.
1216    #[serde(default, skip_serializing_if = "Option::is_none")]
1217    pub name: Option<String>,
1218}
1219
1220/// Inline candle config for use inside `ProviderEntry`.
1221/// Re-uses the generation params from `CandleConfig`.
1222#[derive(Debug, Clone, Deserialize, Serialize)]
1223pub struct CandleInlineConfig {
1224    #[serde(default = "default_candle_source")]
1225    pub source: String,
1226    #[serde(default)]
1227    pub local_path: String,
1228    #[serde(default)]
1229    pub filename: Option<String>,
1230    #[serde(default = "default_chat_template")]
1231    pub chat_template: String,
1232    #[serde(default = "default_candle_device")]
1233    pub device: String,
1234    #[serde(default)]
1235    pub embedding_repo: Option<String>,
1236    /// Resolved `HuggingFace` Hub API token for authenticated model downloads.
1237    #[serde(default)]
1238    pub hf_token: Option<String>,
1239    #[serde(default)]
1240    pub generation: GenerationParams,
1241    /// Maximum wall-clock seconds to wait for a single inference request.
1242    ///
1243    /// Effective timeout is `2 × inference_timeout_secs` (send + recv each have this budget).
1244    /// CPU inference can be slow; 120s is a conservative default. Floored at 1s.
1245    #[serde(default = "default_inference_timeout_secs")]
1246    pub inference_timeout_secs: u64,
1247}
1248
1249impl Default for CandleInlineConfig {
1250    fn default() -> Self {
1251        Self {
1252            source: default_candle_source(),
1253            local_path: String::new(),
1254            filename: None,
1255            chat_template: default_chat_template(),
1256            device: default_candle_device(),
1257            embedding_repo: None,
1258            hf_token: None,
1259            generation: GenerationParams::default(),
1260            inference_timeout_secs: default_inference_timeout_secs(),
1261        }
1262    }
1263}
1264
1265/// Per-1K-token pricing for a Cocoon provider, in cents.
1266///
1267/// Cocoon model names (e.g. `Qwen/Qwen3-0.6B`) are not in the built-in pricing table.
1268/// When this struct is present in a provider entry, its values are registered with
1269/// `CostTracker` at startup so that token costs are tracked accurately.
1270///
1271/// Reasoning tokens (when the model uses chain-of-thought) are folded into
1272/// `completion_tokens` by the Cocoon sidecar and counted at the completion price.
1273#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
1274pub struct CocoonPricing {
1275    /// Prompt (input) token price in cents per 1K tokens.
1276    #[serde(default)]
1277    pub prompt_cents_per_1k: f64,
1278    /// Completion (output) token price in cents per 1K tokens.
1279    /// Reasoning tokens are counted here since the sidecar folds them into completion tokens.
1280    #[serde(default)]
1281    pub completion_cents_per_1k: f64,
1282}
1283
1284/// Unified provider entry: one struct replaces `CloudLlmConfig`, `OpenAiConfig`,
1285/// `GeminiConfig`, `OllamaConfig`, `CompatibleConfig`, and `OrchestratorProviderConfig`.
1286///
1287/// Provider-specific fields use `#[serde(default)]` and are ignored by backends
1288/// that do not use them (flat-union pattern).
1289#[derive(Debug, Clone, Deserialize, Serialize)]
1290#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
1291pub struct ProviderEntry {
1292    /// Required: provider backend type.
1293    #[serde(rename = "type")]
1294    pub provider_type: ProviderKind,
1295
1296    /// Optional name for multi-provider configs. Auto-generated from type if absent.
1297    #[serde(default)]
1298    pub name: Option<String>,
1299
1300    /// Model identifier. Required for most types.
1301    #[serde(default)]
1302    pub model: Option<String>,
1303
1304    /// API base URL. Each type has its own default.
1305    #[serde(default)]
1306    pub base_url: Option<String>,
1307
1308    /// Max output tokens.
1309    #[serde(default)]
1310    pub max_tokens: Option<u32>,
1311
1312    /// Embedding model. When set, this provider supports `embed()` calls.
1313    #[serde(default)]
1314    pub embedding_model: Option<String>,
1315
1316    /// STT model. When set, this provider supports speech-to-text via the Whisper API or
1317    /// Candle-local inference.
1318    #[serde(default)]
1319    pub stt_model: Option<String>,
1320
1321    /// Mark this entry as the embedding provider (handles `embed()` calls).
1322    #[serde(default)]
1323    pub embed: bool,
1324
1325    /// Mark this entry as the default chat provider (overrides position-based default).
1326    #[serde(default)]
1327    pub default: bool,
1328
1329    // --- Claude-specific ---
1330    #[serde(default)]
1331    pub thinking: Option<ThinkingConfig>,
1332    #[serde(default)]
1333    pub server_compaction: bool,
1334    #[serde(default)]
1335    pub enable_extended_context: bool,
1336    /// Prompt cache TTL variant. `None` keeps the default ~5-minute ephemeral TTL.
1337    /// Set to `"1h"` to enable the extended 1-hour TTL (beta, ~2× write cost).
1338    #[serde(default)]
1339    pub prompt_cache_ttl: Option<CacheTtl>,
1340
1341    // --- OpenAI-specific ---
1342    #[serde(default)]
1343    pub reasoning_effort: Option<String>,
1344
1345    // --- Gemini-specific ---
1346    #[serde(default)]
1347    pub thinking_level: Option<GeminiThinkingLevel>,
1348    #[serde(default)]
1349    pub thinking_budget: Option<i32>,
1350    #[serde(default)]
1351    pub include_thoughts: Option<bool>,
1352
1353    // --- Compatible-specific: optional inline api_key ---
1354    #[serde(default)]
1355    pub api_key: Option<String>,
1356
1357    // --- Candle-specific ---
1358    #[serde(default)]
1359    pub candle: Option<CandleInlineConfig>,
1360
1361    // --- Vision ---
1362    #[serde(default)]
1363    pub vision_model: Option<String>,
1364
1365    // --- Gonka-specific ---
1366    /// Gonka network node pool. Required (non-empty) when `type = "gonka"`.
1367    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1368    pub gonka_nodes: Vec<GonkaNode>,
1369    /// bech32 chain prefix for address encoding. Defaults to `"gonka"` when omitted.
1370    #[serde(default, skip_serializing_if = "Option::is_none")]
1371    pub gonka_chain_prefix: Option<String>,
1372
1373    // --- Cocoon-specific ---
1374    /// Cocoon sidecar HTTP URL. Defaults to `"http://localhost:10000"` when absent.
1375    #[serde(default, skip_serializing_if = "Option::is_none")]
1376    pub cocoon_client_url: Option<String>,
1377    /// Sentinel field for access hash. Leave empty in config; actual value
1378    /// is resolved from the age vault as `ZEPH_COCOON_ACCESS_HASH`.
1379    #[serde(default, skip_serializing_if = "Option::is_none")]
1380    pub cocoon_access_hash: Option<String>,
1381    /// Whether to perform a health check against `/stats` at provider construction time.
1382    #[serde(default = "default_true", skip_serializing_if = "is_true")]
1383    pub cocoon_health_check: bool,
1384    /// Manual per-1K-token pricing for this Cocoon provider.
1385    ///
1386    /// Cocoon model names (e.g. `Qwen/Qwen3-0.6B`) are not in the built-in pricing table.
1387    /// When this section is present, the values are registered with `CostTracker` at startup
1388    /// so that token costs are tracked accurately.
1389    ///
1390    /// Example TOML:
1391    /// ```toml
1392    /// [llm.providers.cocoon_pricing]
1393    /// prompt_cents_per_1k = 0.01
1394    /// completion_cents_per_1k = 0.03
1395    /// ```
1396    #[serde(default, skip_serializing_if = "Option::is_none")]
1397    pub cocoon_pricing: Option<CocoonPricing>,
1398
1399    /// Provider-specific instruction file.
1400    #[serde(default)]
1401    pub instruction_file: Option<std::path::PathBuf>,
1402
1403    /// Maximum concurrent LLM calls from orchestrated sub-agents to this provider.
1404    ///
1405    /// When set, `DagScheduler` acquires a semaphore permit before dispatching a
1406    /// sub-agent that targets this provider. Dispatch is deferred (using the existing
1407    /// `deferral_backoff` mechanism) when the semaphore is saturated.
1408    ///
1409    /// `None` (default) = unlimited — no admission control applied.
1410    ///
1411    /// # Example (TOML)
1412    ///
1413    /// ```toml
1414    /// [[llm.providers]]
1415    /// name = "quality"
1416    /// type = "openai"
1417    /// model = "gpt-5"
1418    /// max_concurrent = 3
1419    /// ```
1420    #[serde(default, skip_serializing_if = "Option::is_none")]
1421    pub max_concurrent: Option<u32>,
1422}
1423
1424impl Default for ProviderEntry {
1425    fn default() -> Self {
1426        Self {
1427            provider_type: ProviderKind::Ollama,
1428            name: None,
1429            model: None,
1430            base_url: None,
1431            max_tokens: None,
1432            embedding_model: None,
1433            stt_model: None,
1434            embed: false,
1435            default: false,
1436            thinking: None,
1437            server_compaction: false,
1438            enable_extended_context: false,
1439            prompt_cache_ttl: None,
1440            reasoning_effort: None,
1441            thinking_level: None,
1442            thinking_budget: None,
1443            include_thoughts: None,
1444            api_key: None,
1445            candle: None,
1446            vision_model: None,
1447            gonka_nodes: Vec::new(),
1448            gonka_chain_prefix: None,
1449            cocoon_client_url: None,
1450            cocoon_access_hash: None,
1451            cocoon_health_check: true,
1452            cocoon_pricing: None,
1453            instruction_file: None,
1454            max_concurrent: None,
1455        }
1456    }
1457}
1458
1459impl ProviderEntry {
1460    /// Resolve the effective name: explicit `name` field or type string.
1461    #[must_use]
1462    pub fn effective_name(&self) -> String {
1463        self.name
1464            .clone()
1465            .unwrap_or_else(|| self.provider_type.as_str().to_owned())
1466    }
1467
1468    /// Resolve the effective model: explicit `model` field or the provider-type default.
1469    ///
1470    /// Defaults mirror those used in `build_provider_from_entry` so that `runtime.model_name`
1471    /// always reflects the actual model being used rather than the provider type string.
1472    #[must_use]
1473    pub fn effective_model(&self) -> String {
1474        if let Some(ref m) = self.model {
1475            return m.clone();
1476        }
1477        match self.provider_type {
1478            ProviderKind::Ollama => "qwen3:8b".to_owned(),
1479            ProviderKind::Claude => "claude-haiku-4-5-20251001".to_owned(),
1480            ProviderKind::OpenAi => "gpt-4o-mini".to_owned(),
1481            ProviderKind::Gemini => "gemini-2.0-flash".to_owned(),
1482            // Compatible/Candle return empty because the model is resolved elsewhere.
1483            // Gonka returns empty because it is a blockchain provider, not an LLM — there is no model concept.
1484            ProviderKind::Compatible | ProviderKind::Candle | ProviderKind::Gonka => String::new(),
1485            ProviderKind::Cocoon => "Qwen/Qwen3-0.6B".to_owned(),
1486        }
1487    }
1488
1489    /// Validate this entry for cross-field consistency.
1490    ///
1491    /// # Errors
1492    ///
1493    /// Returns `ConfigError` when a fatal invariant is violated (e.g. compatible provider
1494    /// without a name).
1495    pub fn validate(&self) -> Result<(), crate::error::ConfigError> {
1496        use crate::error::ConfigError;
1497
1498        // B2: compatible provider MUST have name set.
1499        if self.provider_type == ProviderKind::Compatible && self.name.is_none() {
1500            return Err(ConfigError::Validation(
1501                "[[llm.providers]] entry with type=\"compatible\" must set `name`".into(),
1502            ));
1503        }
1504
1505        // B3: gonka provider MUST have name and valid gonka_nodes.
1506        if self.provider_type == ProviderKind::Gonka {
1507            if self.name.is_none() {
1508                return Err(ConfigError::Validation(
1509                    "[[llm.providers]] entry with type=\"gonka\" must set `name`".into(),
1510                ));
1511            }
1512            self.validate_gonka_nodes()?;
1513        }
1514
1515        // B4: cocoon provider MUST have a name.
1516        if self.provider_type == ProviderKind::Cocoon
1517            && self.name.as_ref().is_none_or(String::is_empty)
1518        {
1519            return Err(ConfigError::Validation(
1520                "[[llm.providers]] entry with type=\"cocoon\" must set `name`".into(),
1521            ));
1522        }
1523
1524        // B5: cocoon URL must be valid http/https; cocoon model must not be empty.
1525        if self.provider_type == ProviderKind::Cocoon {
1526            let name = self.effective_name();
1527            if let Some(ref url_str) = self.cocoon_client_url {
1528                match url::Url::parse(url_str) {
1529                    Err(_) => {
1530                        return Err(ConfigError::Validation(format!(
1531                            "[[llm.providers]] entry '{name}': cocoon_client_url \
1532                             '{url_str}' is not a valid URL; expected format: \
1533                             http://localhost:10000"
1534                        )));
1535                    }
1536                    Ok(u) if !matches!(u.host_str(), Some("localhost" | "127.0.0.1" | "::1")) => {
1537                        return Err(ConfigError::Validation(format!(
1538                            "[[llm.providers]] entry '{name}': cocoon_client_url host must be \
1539                             localhost or 127.0.0.1, got '{}'",
1540                            u.host_str().unwrap_or("<none>")
1541                        )));
1542                    }
1543                    Ok(u) if u.scheme() != "http" && u.scheme() != "https" => {
1544                        return Err(ConfigError::Validation(format!(
1545                            "[[llm.providers]] entry '{name}': cocoon_client_url \
1546                             scheme must be http or https, got '{}'",
1547                            u.scheme()
1548                        )));
1549                    }
1550                    _ => {}
1551                }
1552            }
1553            if self.model.as_deref().is_some_and(|m| m.trim().is_empty()) {
1554                return Err(ConfigError::Validation(format!(
1555                    "[[llm.providers]] entry '{name}': model must not be empty \
1556                     for cocoon provider"
1557                )));
1558            }
1559            if let Some(ref p) = self.cocoon_pricing {
1560                if !p.prompt_cents_per_1k.is_finite() || p.prompt_cents_per_1k < 0.0 {
1561                    return Err(ConfigError::Validation(format!(
1562                        "[[llm.providers]] entry '{name}': cocoon_pricing.prompt_cents_per_1k \
1563                         must be a finite non-negative number"
1564                    )));
1565                }
1566                if !p.completion_cents_per_1k.is_finite() || p.completion_cents_per_1k < 0.0 {
1567                    return Err(ConfigError::Validation(format!(
1568                        "[[llm.providers]] entry '{name}': \
1569                         cocoon_pricing.completion_cents_per_1k \
1570                         must be a finite non-negative number"
1571                    )));
1572                }
1573            }
1574        }
1575
1576        // B1: warn on irrelevant fields.
1577        self.warn_irrelevant_fields();
1578
1579        // W6: Candle STT-only provider (stt_model set, no model) is valid — no warning needed.
1580        // Warn if Ollama has stt_model set (Ollama does not support Whisper API).
1581        if self.stt_model.is_some() && self.provider_type == ProviderKind::Ollama {
1582            tracing::warn!(
1583                provider = self.effective_name(),
1584                "field `stt_model` is set on an Ollama provider; Ollama does not support the \
1585                 Whisper STT API — use OpenAI, compatible, or candle instead"
1586            );
1587        }
1588
1589        Ok(())
1590    }
1591
1592    /// Resolve the effective Gonka chain prefix: explicit value or `"gonka"` default.
1593    #[must_use]
1594    pub fn effective_gonka_chain_prefix(&self) -> &str {
1595        self.gonka_chain_prefix.as_deref().unwrap_or("gonka")
1596    }
1597
1598    fn warn_irrelevant_fields(&self) {
1599        let name = self.effective_name();
1600        match self.provider_type {
1601            ProviderKind::Ollama => {
1602                if self.thinking.is_some() {
1603                    tracing::warn!(
1604                        provider = name,
1605                        "field `thinking` is only used by Claude providers"
1606                    );
1607                }
1608                if self.reasoning_effort.is_some() {
1609                    tracing::warn!(
1610                        provider = name,
1611                        "field `reasoning_effort` is only used by OpenAI providers"
1612                    );
1613                }
1614                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1615                    tracing::warn!(
1616                        provider = name,
1617                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1618                    );
1619                }
1620            }
1621            ProviderKind::Claude => {
1622                if self.reasoning_effort.is_some() {
1623                    tracing::warn!(
1624                        provider = name,
1625                        "field `reasoning_effort` is only used by OpenAI providers"
1626                    );
1627                }
1628                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1629                    tracing::warn!(
1630                        provider = name,
1631                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1632                    );
1633                }
1634            }
1635            ProviderKind::OpenAi => {
1636                if self.thinking.is_some() {
1637                    tracing::warn!(
1638                        provider = name,
1639                        "field `thinking` is only used by Claude providers"
1640                    );
1641                }
1642                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1643                    tracing::warn!(
1644                        provider = name,
1645                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1646                    );
1647                }
1648            }
1649            ProviderKind::Gemini => {
1650                if self.thinking.is_some() {
1651                    tracing::warn!(
1652                        provider = name,
1653                        "field `thinking` is only used by Claude providers"
1654                    );
1655                }
1656                if self.reasoning_effort.is_some() {
1657                    tracing::warn!(
1658                        provider = name,
1659                        "field `reasoning_effort` is only used by OpenAI providers"
1660                    );
1661                }
1662            }
1663            ProviderKind::Gonka => {
1664                if self.thinking.is_some() {
1665                    tracing::warn!(
1666                        provider = name,
1667                        "field `thinking` is only used by Claude providers"
1668                    );
1669                }
1670                if self.reasoning_effort.is_some() {
1671                    tracing::warn!(
1672                        provider = name,
1673                        "field `reasoning_effort` is only used by OpenAI providers"
1674                    );
1675                }
1676                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1677                    tracing::warn!(
1678                        provider = name,
1679                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1680                    );
1681                }
1682            }
1683            ProviderKind::Compatible | ProviderKind::Candle => {}
1684            ProviderKind::Cocoon => {
1685                if self.base_url.is_some() {
1686                    tracing::warn!(
1687                        provider = name,
1688                        "field `base_url` is ignored for cocoon providers; use `cocoon_client_url` instead"
1689                    );
1690                }
1691            }
1692        }
1693    }
1694
1695    fn validate_gonka_nodes(&self) -> Result<(), crate::error::ConfigError> {
1696        use crate::error::ConfigError;
1697        if self.gonka_nodes.is_empty() {
1698            return Err(ConfigError::Validation(format!(
1699                "[[llm.providers]] entry '{}' with type=\"gonka\" must set non-empty `gonka_nodes`",
1700                self.effective_name()
1701            )));
1702        }
1703        for (i, node) in self.gonka_nodes.iter().enumerate() {
1704            if node.url.is_empty() {
1705                return Err(ConfigError::Validation(format!(
1706                    "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must not be empty",
1707                    self.effective_name()
1708                )));
1709            }
1710            if !node.url.starts_with("http://") && !node.url.starts_with("https://") {
1711                return Err(ConfigError::Validation(format!(
1712                    "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must start with http:// or https://",
1713                    self.effective_name()
1714                )));
1715            }
1716        }
1717        Ok(())
1718    }
1719}
1720
1721/// Validate a pool of `ProviderEntry` items.
1722///
1723/// # Errors
1724///
1725/// Returns `ConfigError` for fatal validation failures:
1726/// - Empty pool
1727/// - Duplicate names
1728/// - Multiple entries marked `default = true`
1729/// - Individual entry validation errors
1730pub fn validate_pool(entries: &[ProviderEntry]) -> Result<(), crate::error::ConfigError> {
1731    use crate::error::ConfigError;
1732    use std::collections::HashSet;
1733
1734    if entries.is_empty() {
1735        return Err(ConfigError::Validation(
1736            "at least one LLM provider must be configured in [[llm.providers]]".into(),
1737        ));
1738    }
1739
1740    let default_count = entries.iter().filter(|e| e.default).count();
1741    if default_count > 1 {
1742        return Err(ConfigError::Validation(
1743            "only one [[llm.providers]] entry can be marked `default = true`".into(),
1744        ));
1745    }
1746
1747    let mut seen_names: HashSet<String> = HashSet::new();
1748    for entry in entries {
1749        let name = entry.effective_name();
1750        if !seen_names.insert(name.clone()) {
1751            return Err(ConfigError::Validation(format!(
1752                "duplicate provider name \"{name}\" in [[llm.providers]]"
1753            )));
1754        }
1755        entry.validate()?;
1756    }
1757
1758    Ok(())
1759}
1760
1761#[cfg(test)]
1762mod tests {
1763    use super::*;
1764
1765    fn ollama_entry() -> ProviderEntry {
1766        ProviderEntry {
1767            provider_type: ProviderKind::Ollama,
1768            name: Some("ollama".into()),
1769            model: Some("qwen3:8b".into()),
1770            ..Default::default()
1771        }
1772    }
1773
1774    fn claude_entry() -> ProviderEntry {
1775        ProviderEntry {
1776            provider_type: ProviderKind::Claude,
1777            name: Some("claude".into()),
1778            model: Some("claude-sonnet-4-6".into()),
1779            max_tokens: Some(8192),
1780            ..Default::default()
1781        }
1782    }
1783
1784    // ─── ProviderEntry::validate ─────────────────────────────────────────────
1785
1786    #[test]
1787    fn validate_ollama_valid() {
1788        assert!(ollama_entry().validate().is_ok());
1789    }
1790
1791    #[test]
1792    fn validate_claude_valid() {
1793        assert!(claude_entry().validate().is_ok());
1794    }
1795
1796    #[test]
1797    fn validate_compatible_without_name_errors() {
1798        let entry = ProviderEntry {
1799            provider_type: ProviderKind::Compatible,
1800            name: None,
1801            ..Default::default()
1802        };
1803        let err = entry.validate().unwrap_err();
1804        assert!(
1805            err.to_string().contains("compatible"),
1806            "error should mention compatible: {err}"
1807        );
1808    }
1809
1810    #[test]
1811    fn validate_compatible_with_name_ok() {
1812        let entry = ProviderEntry {
1813            provider_type: ProviderKind::Compatible,
1814            name: Some("my-proxy".into()),
1815            base_url: Some("http://localhost:8080".into()),
1816            model: Some("gpt-4o".into()),
1817            max_tokens: Some(4096),
1818            ..Default::default()
1819        };
1820        assert!(entry.validate().is_ok());
1821    }
1822
1823    #[test]
1824    fn validate_openai_valid() {
1825        let entry = ProviderEntry {
1826            provider_type: ProviderKind::OpenAi,
1827            name: Some("openai".into()),
1828            model: Some("gpt-4o".into()),
1829            max_tokens: Some(4096),
1830            ..Default::default()
1831        };
1832        assert!(entry.validate().is_ok());
1833    }
1834
1835    #[test]
1836    fn validate_gemini_valid() {
1837        let entry = ProviderEntry {
1838            provider_type: ProviderKind::Gemini,
1839            name: Some("gemini".into()),
1840            model: Some("gemini-2.0-flash".into()),
1841            ..Default::default()
1842        };
1843        assert!(entry.validate().is_ok());
1844    }
1845
1846    // ─── validate_pool ───────────────────────────────────────────────────────
1847
1848    #[test]
1849    fn validate_pool_empty_errors() {
1850        let err = validate_pool(&[]).unwrap_err();
1851        assert!(err.to_string().contains("at least one"), "{err}");
1852    }
1853
1854    #[test]
1855    fn validate_pool_single_entry_ok() {
1856        assert!(validate_pool(&[ollama_entry()]).is_ok());
1857    }
1858
1859    #[test]
1860    fn validate_pool_duplicate_names_errors() {
1861        let a = ollama_entry();
1862        let b = ollama_entry(); // same effective name "ollama"
1863        let err = validate_pool(&[a, b]).unwrap_err();
1864        assert!(err.to_string().contains("duplicate"), "{err}");
1865    }
1866
1867    #[test]
1868    fn validate_pool_multiple_defaults_errors() {
1869        let mut a = ollama_entry();
1870        let mut b = claude_entry();
1871        a.default = true;
1872        b.default = true;
1873        let err = validate_pool(&[a, b]).unwrap_err();
1874        assert!(err.to_string().contains("default"), "{err}");
1875    }
1876
1877    #[test]
1878    fn validate_pool_two_different_providers_ok() {
1879        assert!(validate_pool(&[ollama_entry(), claude_entry()]).is_ok());
1880    }
1881
1882    #[test]
1883    fn validate_pool_propagates_entry_error() {
1884        let bad = ProviderEntry {
1885            provider_type: ProviderKind::Compatible,
1886            name: None, // invalid: compatible without name
1887            ..Default::default()
1888        };
1889        assert!(validate_pool(&[bad]).is_err());
1890    }
1891
1892    // ─── ProviderEntry::effective_model ──────────────────────────────────────
1893
1894    #[test]
1895    fn effective_model_returns_explicit_when_set() {
1896        let entry = ProviderEntry {
1897            provider_type: ProviderKind::Claude,
1898            model: Some("claude-sonnet-4-6".into()),
1899            ..Default::default()
1900        };
1901        assert_eq!(entry.effective_model(), "claude-sonnet-4-6");
1902    }
1903
1904    #[test]
1905    fn effective_model_ollama_default_when_none() {
1906        let entry = ProviderEntry {
1907            provider_type: ProviderKind::Ollama,
1908            model: None,
1909            ..Default::default()
1910        };
1911        assert_eq!(entry.effective_model(), "qwen3:8b");
1912    }
1913
1914    #[test]
1915    fn effective_model_claude_default_when_none() {
1916        let entry = ProviderEntry {
1917            provider_type: ProviderKind::Claude,
1918            model: None,
1919            ..Default::default()
1920        };
1921        assert_eq!(entry.effective_model(), "claude-haiku-4-5-20251001");
1922    }
1923
1924    #[test]
1925    fn effective_model_openai_default_when_none() {
1926        let entry = ProviderEntry {
1927            provider_type: ProviderKind::OpenAi,
1928            model: None,
1929            ..Default::default()
1930        };
1931        assert_eq!(entry.effective_model(), "gpt-4o-mini");
1932    }
1933
1934    #[test]
1935    fn effective_model_gemini_default_when_none() {
1936        let entry = ProviderEntry {
1937            provider_type: ProviderKind::Gemini,
1938            model: None,
1939            ..Default::default()
1940        };
1941        assert_eq!(entry.effective_model(), "gemini-2.0-flash");
1942    }
1943
1944    // ─── LlmConfig::check_legacy_format ──────────────────────────────────────
1945
1946    // Parse a complete TOML snippet that includes the [llm] header.
1947    fn parse_llm(toml: &str) -> LlmConfig {
1948        #[derive(serde::Deserialize)]
1949        struct Wrapper {
1950            llm: LlmConfig,
1951        }
1952        toml::from_str::<Wrapper>(toml).unwrap().llm
1953    }
1954
1955    #[test]
1956    fn check_legacy_format_new_format_ok() {
1957        let cfg = parse_llm(
1958            r#"
1959[llm]
1960
1961[[llm.providers]]
1962type = "ollama"
1963model = "qwen3:8b"
1964"#,
1965        );
1966        assert!(cfg.check_legacy_format().is_ok());
1967    }
1968
1969    #[test]
1970    fn check_legacy_format_empty_providers_no_legacy_ok() {
1971        // No providers, no legacy fields — passes (empty [llm] is acceptable here)
1972        let cfg = parse_llm("[llm]\n");
1973        assert!(cfg.check_legacy_format().is_ok());
1974    }
1975
1976    // ─── LlmConfig::effective_* helpers ──────────────────────────────────────
1977
1978    #[test]
1979    fn effective_provider_falls_back_to_ollama_when_no_providers() {
1980        let cfg = parse_llm("[llm]\n");
1981        assert_eq!(cfg.effective_provider(), ProviderKind::Ollama);
1982    }
1983
1984    #[test]
1985    fn effective_provider_reads_from_providers_first() {
1986        let cfg = parse_llm(
1987            r#"
1988[llm]
1989
1990[[llm.providers]]
1991type = "claude"
1992model = "claude-sonnet-4-6"
1993"#,
1994        );
1995        assert_eq!(cfg.effective_provider(), ProviderKind::Claude);
1996    }
1997
1998    #[test]
1999    fn effective_model_reads_from_providers_first() {
2000        let cfg = parse_llm(
2001            r#"
2002[llm]
2003
2004[[llm.providers]]
2005type = "ollama"
2006model = "qwen3:8b"
2007"#,
2008        );
2009        assert_eq!(cfg.effective_model(), "qwen3:8b");
2010    }
2011
2012    #[test]
2013    fn effective_model_skips_embed_only_provider() {
2014        let cfg = parse_llm(
2015            r#"
2016[llm]
2017
2018[[llm.providers]]
2019type = "ollama"
2020model = "gemma4:26b"
2021embed = true
2022
2023[[llm.providers]]
2024type = "openai"
2025model = "gpt-4o-mini"
2026"#,
2027        );
2028        assert_eq!(cfg.effective_model(), "gpt-4o-mini");
2029    }
2030
2031    #[test]
2032    fn effective_base_url_default_when_absent() {
2033        let cfg = parse_llm("[llm]\n");
2034        assert_eq!(cfg.effective_base_url(), "http://localhost:11434");
2035    }
2036
2037    #[test]
2038    fn effective_base_url_from_providers_entry() {
2039        let cfg = parse_llm(
2040            r#"
2041[llm]
2042
2043[[llm.providers]]
2044type = "ollama"
2045base_url = "http://myhost:11434"
2046"#,
2047        );
2048        assert_eq!(cfg.effective_base_url(), "http://myhost:11434");
2049    }
2050
2051    // ─── ComplexityRoutingConfig / LlmRoutingStrategy::Triage TOML parsing ──
2052
2053    #[test]
2054    fn complexity_routing_defaults() {
2055        let cr = ComplexityRoutingConfig::default();
2056        assert!(
2057            cr.bypass_single_provider,
2058            "bypass_single_provider must default to true"
2059        );
2060        assert_eq!(cr.triage_timeout_secs, 5);
2061        assert_eq!(cr.max_triage_tokens, 50);
2062        assert!(cr.triage_provider.is_none());
2063        assert!(cr.tiers.simple.is_none());
2064    }
2065
2066    #[test]
2067    fn complexity_routing_toml_round_trip() {
2068        let cfg = parse_llm(
2069            r#"
2070[llm]
2071routing = "triage"
2072
2073[llm.complexity_routing]
2074triage_provider = "fast"
2075bypass_single_provider = false
2076triage_timeout_secs = 10
2077max_triage_tokens = 100
2078
2079[llm.complexity_routing.tiers]
2080simple = "fast"
2081medium = "medium"
2082complex = "large"
2083expert = "opus"
2084"#,
2085        );
2086        assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2087        let cr = cfg
2088            .complexity_routing
2089            .expect("complexity_routing must be present");
2090        assert_eq!(
2091            cr.triage_provider.as_ref().map(ProviderName::as_str),
2092            Some("fast")
2093        );
2094        assert!(!cr.bypass_single_provider);
2095        assert_eq!(cr.triage_timeout_secs, 10);
2096        assert_eq!(cr.max_triage_tokens, 100);
2097        assert_eq!(cr.tiers.simple.as_deref(), Some("fast"));
2098        assert_eq!(cr.tiers.medium.as_deref(), Some("medium"));
2099        assert_eq!(cr.tiers.complex.as_deref(), Some("large"));
2100        assert_eq!(cr.tiers.expert.as_deref(), Some("opus"));
2101    }
2102
2103    #[test]
2104    fn complexity_routing_partial_tiers_toml() {
2105        // Only simple + complex configured; medium and expert are None.
2106        let cfg = parse_llm(
2107            r#"
2108[llm]
2109routing = "triage"
2110
2111[llm.complexity_routing.tiers]
2112simple = "haiku"
2113complex = "sonnet"
2114"#,
2115        );
2116        let cr = cfg
2117            .complexity_routing
2118            .expect("complexity_routing must be present");
2119        assert_eq!(cr.tiers.simple.as_deref(), Some("haiku"));
2120        assert!(cr.tiers.medium.is_none());
2121        assert_eq!(cr.tiers.complex.as_deref(), Some("sonnet"));
2122        assert!(cr.tiers.expert.is_none());
2123        // Defaults still applied.
2124        assert!(cr.bypass_single_provider);
2125        assert_eq!(cr.triage_timeout_secs, 5);
2126    }
2127
2128    #[test]
2129    fn routing_strategy_triage_deserialized() {
2130        let cfg = parse_llm(
2131            r#"
2132[llm]
2133routing = "triage"
2134"#,
2135        );
2136        assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2137    }
2138
2139    // ─── stt_provider_entry ───────────────────────────────────────────────────
2140
2141    #[test]
2142    fn stt_provider_entry_by_name_match() {
2143        let cfg = parse_llm(
2144            r#"
2145[llm]
2146
2147[[llm.providers]]
2148type = "openai"
2149name = "quality"
2150model = "gpt-5.4"
2151stt_model = "gpt-4o-mini-transcribe"
2152
2153[llm.stt]
2154provider = "quality"
2155"#,
2156        );
2157        let entry = cfg.stt_provider_entry().expect("should find stt provider");
2158        assert_eq!(entry.effective_name(), "quality");
2159        assert_eq!(entry.stt_model.as_deref(), Some("gpt-4o-mini-transcribe"));
2160    }
2161
2162    #[test]
2163    fn stt_provider_entry_auto_detect_when_provider_empty() {
2164        let cfg = parse_llm(
2165            r#"
2166[llm]
2167
2168[[llm.providers]]
2169type = "openai"
2170name = "openai-stt"
2171stt_model = "whisper-1"
2172
2173[llm.stt]
2174provider = ""
2175"#,
2176        );
2177        let entry = cfg.stt_provider_entry().expect("should auto-detect");
2178        assert_eq!(entry.effective_name(), "openai-stt");
2179    }
2180
2181    #[test]
2182    fn stt_provider_entry_auto_detect_no_stt_section() {
2183        let cfg = parse_llm(
2184            r#"
2185[llm]
2186
2187[[llm.providers]]
2188type = "openai"
2189name = "openai-stt"
2190stt_model = "whisper-1"
2191"#,
2192        );
2193        // No [llm.stt] section — should still find first provider with stt_model.
2194        let entry = cfg.stt_provider_entry().expect("should auto-detect");
2195        assert_eq!(entry.effective_name(), "openai-stt");
2196    }
2197
2198    #[test]
2199    fn stt_provider_entry_none_when_no_stt_model() {
2200        let cfg = parse_llm(
2201            r#"
2202[llm]
2203
2204[[llm.providers]]
2205type = "openai"
2206name = "quality"
2207model = "gpt-5.4"
2208"#,
2209        );
2210        assert!(cfg.stt_provider_entry().is_none());
2211    }
2212
2213    #[test]
2214    fn stt_provider_entry_name_mismatch_falls_back_to_none() {
2215        // Named provider exists but has no stt_model; another unnamed has stt_model.
2216        let cfg = parse_llm(
2217            r#"
2218[llm]
2219
2220[[llm.providers]]
2221type = "openai"
2222name = "quality"
2223model = "gpt-5.4"
2224
2225[[llm.providers]]
2226type = "openai"
2227name = "openai-stt"
2228stt_model = "whisper-1"
2229
2230[llm.stt]
2231provider = "quality"
2232"#,
2233        );
2234        // "quality" has no stt_model — returns None for name-based lookup.
2235        assert!(cfg.stt_provider_entry().is_none());
2236    }
2237
2238    #[test]
2239    fn stt_config_deserializes_new_slim_format() {
2240        let cfg = parse_llm(
2241            r#"
2242[llm]
2243
2244[[llm.providers]]
2245type = "openai"
2246name = "quality"
2247stt_model = "whisper-1"
2248
2249[llm.stt]
2250provider = "quality"
2251language = "en"
2252"#,
2253        );
2254        let stt = cfg.stt.as_ref().expect("stt section present");
2255        assert_eq!(stt.provider, "quality");
2256        assert_eq!(stt.language, "en");
2257    }
2258
2259    #[test]
2260    fn stt_config_default_provider_is_empty() {
2261        // Verify that W4 fix: default_stt_provider() returns "" not "whisper".
2262        assert_eq!(default_stt_provider(), "");
2263    }
2264
2265    #[test]
2266    fn validate_stt_missing_provider_ok() {
2267        let cfg = parse_llm("[llm]\n");
2268        assert!(cfg.validate_stt().is_ok());
2269    }
2270
2271    #[test]
2272    fn validate_stt_valid_reference() {
2273        let cfg = parse_llm(
2274            r#"
2275[llm]
2276
2277[[llm.providers]]
2278type = "openai"
2279name = "quality"
2280stt_model = "whisper-1"
2281
2282[llm.stt]
2283provider = "quality"
2284"#,
2285        );
2286        assert!(cfg.validate_stt().is_ok());
2287    }
2288
2289    #[test]
2290    fn validate_stt_nonexistent_provider_errors() {
2291        let cfg = parse_llm(
2292            r#"
2293[llm]
2294
2295[[llm.providers]]
2296type = "openai"
2297name = "quality"
2298model = "gpt-5.4"
2299
2300[llm.stt]
2301provider = "nonexistent"
2302"#,
2303        );
2304        assert!(cfg.validate_stt().is_err());
2305    }
2306
2307    #[test]
2308    fn validate_stt_provider_exists_but_no_stt_model_returns_ok_with_warn() {
2309        // MEDIUM: provider is found but has no stt_model — should return Ok (warn path, not error).
2310        let cfg = parse_llm(
2311            r#"
2312[llm]
2313
2314[[llm.providers]]
2315type = "openai"
2316name = "quality"
2317model = "gpt-5.4"
2318
2319[llm.stt]
2320provider = "quality"
2321"#,
2322        );
2323        // validate_stt must succeed (only a tracing::warn is emitted — not an error).
2324        assert!(cfg.validate_stt().is_ok());
2325        // stt_provider_entry must return None because no stt_model is set.
2326        assert!(
2327            cfg.stt_provider_entry().is_none(),
2328            "stt_provider_entry must be None when provider has no stt_model"
2329        );
2330    }
2331
2332    // ─── BanditConfig::warmup_queries deserialization ─────────────────────────
2333
2334    #[test]
2335    fn bandit_warmup_queries_explicit_value_is_deserialized() {
2336        let cfg = parse_llm(
2337            r#"
2338[llm]
2339
2340[llm.router]
2341strategy = "bandit"
2342
2343[llm.router.bandit]
2344warmup_queries = 50
2345"#,
2346        );
2347        let bandit = cfg
2348            .router
2349            .expect("router section must be present")
2350            .bandit
2351            .expect("bandit section must be present");
2352        assert_eq!(
2353            bandit.warmup_queries,
2354            Some(50),
2355            "warmup_queries = 50 must deserialize to Some(50)"
2356        );
2357    }
2358
2359    #[test]
2360    fn bandit_warmup_queries_explicit_null_is_none() {
2361        // Explicitly writing the field as absent: field simply not present is
2362        // equivalent due to #[serde(default)]. Test that an explicit 0 is Some(0).
2363        let cfg = parse_llm(
2364            r#"
2365[llm]
2366
2367[llm.router]
2368strategy = "bandit"
2369
2370[llm.router.bandit]
2371warmup_queries = 0
2372"#,
2373        );
2374        let bandit = cfg
2375            .router
2376            .expect("router section must be present")
2377            .bandit
2378            .expect("bandit section must be present");
2379        // 0 is a valid explicit value — it means "preserve computed default".
2380        assert_eq!(
2381            bandit.warmup_queries,
2382            Some(0),
2383            "warmup_queries = 0 must deserialize to Some(0)"
2384        );
2385    }
2386
2387    #[test]
2388    fn bandit_warmup_queries_missing_field_defaults_to_none() {
2389        // When warmup_queries is omitted entirely, #[serde(default)] must produce None.
2390        let cfg = parse_llm(
2391            r#"
2392[llm]
2393
2394[llm.router]
2395strategy = "bandit"
2396
2397[llm.router.bandit]
2398alpha = 1.5
2399"#,
2400        );
2401        let bandit = cfg
2402            .router
2403            .expect("router section must be present")
2404            .bandit
2405            .expect("bandit section must be present");
2406        assert_eq!(
2407            bandit.warmup_queries, None,
2408            "omitted warmup_queries must default to None"
2409        );
2410    }
2411
2412    #[test]
2413    fn provider_name_new_and_as_str() {
2414        let n = ProviderName::new("fast");
2415        assert_eq!(n.as_str(), "fast");
2416        assert!(!n.is_empty());
2417    }
2418
2419    #[test]
2420    fn provider_name_default_is_empty() {
2421        let n = ProviderName::default();
2422        assert!(n.is_empty());
2423        assert_eq!(n.as_str(), "");
2424    }
2425
2426    #[test]
2427    fn provider_name_partial_eq_str() {
2428        let n = ProviderName::new("fast");
2429        assert_eq!(n, "fast");
2430        assert_ne!(n, "slow");
2431    }
2432
2433    #[test]
2434    fn provider_name_serde_roundtrip() {
2435        let n = ProviderName::new("my-provider");
2436        let json = serde_json::to_string(&n).expect("serialize");
2437        assert_eq!(json, "\"my-provider\"");
2438        let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2439        assert_eq!(back, n);
2440    }
2441
2442    #[test]
2443    fn provider_name_serde_empty_roundtrip() {
2444        let n = ProviderName::default();
2445        let json = serde_json::to_string(&n).expect("serialize");
2446        assert_eq!(json, "\"\"");
2447        let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2448        assert_eq!(back, n);
2449        assert!(back.is_empty());
2450    }
2451
2452    // ─── GonkaNode / ProviderKind::Gonka ─────────────────────────────────────
2453
2454    fn gonka_entry_with_nodes(nodes: Vec<GonkaNode>) -> ProviderEntry {
2455        ProviderEntry {
2456            provider_type: ProviderKind::Gonka,
2457            name: Some("my-gonka".into()),
2458            gonka_nodes: nodes,
2459            ..Default::default()
2460        }
2461    }
2462
2463    fn valid_gonka_nodes() -> Vec<GonkaNode> {
2464        vec![
2465            GonkaNode {
2466                url: "https://node1.gonka.ai".into(),
2467                address: "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6".into(),
2468                name: Some("node1".into()),
2469            },
2470            GonkaNode {
2471                url: "https://node2.gonka.ai".into(),
2472                address: "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum".into(),
2473                name: Some("node2".into()),
2474            },
2475            GonkaNode {
2476                url: "http://node3.internal".into(),
2477                address: "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg".into(),
2478                name: None,
2479            },
2480        ]
2481    }
2482
2483    #[test]
2484    fn validate_gonka_valid() {
2485        let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2486        assert!(entry.validate().is_ok());
2487    }
2488
2489    #[test]
2490    fn validate_gonka_empty_nodes_errors() {
2491        let entry = gonka_entry_with_nodes(vec![]);
2492        let err = entry.validate().unwrap_err();
2493        assert!(
2494            err.to_string().contains("gonka_nodes"),
2495            "error should mention gonka_nodes: {err}"
2496        );
2497    }
2498
2499    #[test]
2500    fn validate_gonka_node_empty_url_errors() {
2501        let entry = gonka_entry_with_nodes(vec![GonkaNode {
2502            url: String::new(),
2503            address: "gonka1test".into(),
2504            name: None,
2505        }]);
2506        let err = entry.validate().unwrap_err();
2507        assert!(err.to_string().contains("url"), "{err}");
2508    }
2509
2510    #[test]
2511    fn validate_gonka_node_invalid_scheme_errors() {
2512        let entry = gonka_entry_with_nodes(vec![GonkaNode {
2513            url: "ftp://node.gonka.ai".into(),
2514            address: "gonka1test".into(),
2515            name: None,
2516        }]);
2517        let err = entry.validate().unwrap_err();
2518        assert!(err.to_string().contains("http"), "{err}");
2519    }
2520
2521    #[test]
2522    fn validate_gonka_without_name_errors() {
2523        let entry = ProviderEntry {
2524            provider_type: ProviderKind::Gonka,
2525            name: None,
2526            gonka_nodes: valid_gonka_nodes(),
2527            ..Default::default()
2528        };
2529        let err = entry.validate().unwrap_err();
2530        assert!(err.to_string().contains("gonka"), "{err}");
2531    }
2532
2533    #[test]
2534    fn gonka_toml_round_trip() {
2535        let toml = r#"
2536[llm]
2537
2538[[llm.providers]]
2539type = "gonka"
2540name = "my-gonka"
2541gonka_chain_prefix = "custom-chain"
2542
2543[[llm.providers.gonka_nodes]]
2544url = "https://node1.gonka.ai"
2545address = "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2546name = "node1"
2547
2548[[llm.providers.gonka_nodes]]
2549url = "https://node2.gonka.ai"
2550address = "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum"
2551name = "node2"
2552
2553[[llm.providers.gonka_nodes]]
2554url = "https://node3.gonka.ai"
2555address = "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg"
2556"#;
2557        let cfg = parse_llm(toml);
2558        assert_eq!(cfg.providers.len(), 1);
2559        let entry = &cfg.providers[0];
2560        assert_eq!(entry.provider_type, ProviderKind::Gonka);
2561        assert_eq!(entry.name.as_deref(), Some("my-gonka"));
2562        let nodes = &entry.gonka_nodes;
2563        assert_eq!(nodes.len(), 3);
2564        assert_eq!(nodes[0].url, "https://node1.gonka.ai");
2565        assert_eq!(
2566            nodes[0].address,
2567            "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2568        );
2569        assert_eq!(nodes[0].name.as_deref(), Some("node1"));
2570        assert_eq!(nodes[2].name, None);
2571        assert_eq!(entry.gonka_chain_prefix.as_deref(), Some("custom-chain"));
2572    }
2573
2574    #[test]
2575    fn gonka_default_chain_prefix() {
2576        let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2577        assert_eq!(entry.effective_gonka_chain_prefix(), "gonka");
2578    }
2579
2580    #[test]
2581    fn gonka_explicit_chain_prefix() {
2582        let entry = ProviderEntry {
2583            provider_type: ProviderKind::Gonka,
2584            name: Some("my-gonka".into()),
2585            gonka_nodes: valid_gonka_nodes(),
2586            gonka_chain_prefix: Some("my-chain".into()),
2587            ..Default::default()
2588        };
2589        assert_eq!(entry.effective_gonka_chain_prefix(), "my-chain");
2590    }
2591
2592    #[test]
2593    fn effective_model_gonka_is_empty() {
2594        let entry = ProviderEntry {
2595            provider_type: ProviderKind::Gonka,
2596            model: None,
2597            ..Default::default()
2598        };
2599        assert_eq!(entry.effective_model(), "");
2600    }
2601
2602    #[test]
2603    fn existing_configs_still_parse() {
2604        let toml = r#"
2605[llm]
2606
2607[[llm.providers]]
2608type = "ollama"
2609model = "qwen3:8b"
2610
2611[[llm.providers]]
2612type = "claude"
2613name = "claude"
2614model = "claude-sonnet-4-6"
2615"#;
2616        let cfg = parse_llm(toml);
2617        assert_eq!(cfg.providers.len(), 2);
2618        assert_eq!(cfg.providers[0].provider_type, ProviderKind::Ollama);
2619        assert_eq!(cfg.providers[1].provider_type, ProviderKind::Claude);
2620    }
2621
2622    // ── ProviderEntry::validate — Cocoon URL and model validation ─────────────
2623
2624    fn cocoon_entry(url: Option<&str>, model: Option<&str>) -> ProviderEntry {
2625        ProviderEntry {
2626            provider_type: ProviderKind::Cocoon,
2627            name: Some("cocoon".into()),
2628            cocoon_client_url: url.map(str::to_owned),
2629            model: model.map(str::to_owned),
2630            ..Default::default()
2631        }
2632    }
2633
2634    #[test]
2635    fn test_cocoon_url_validation_accepts_http() {
2636        assert!(
2637            cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2638                .validate()
2639                .is_ok()
2640        );
2641    }
2642
2643    #[test]
2644    fn test_cocoon_url_validation_accepts_https_localhost() {
2645        assert!(
2646            cocoon_entry(Some("https://localhost:10000"), Some("Qwen/Qwen3-0.6B"))
2647                .validate()
2648                .is_ok()
2649        );
2650    }
2651
2652    #[test]
2653    fn test_cocoon_url_validation_rejects_non_localhost() {
2654        let err = cocoon_entry(Some("http://192.168.1.10:10000"), Some("Qwen/Qwen3-0.6B"))
2655            .validate()
2656            .unwrap_err();
2657        assert!(
2658            err.to_string().contains("localhost"),
2659            "error should mention localhost restriction: {err}"
2660        );
2661    }
2662
2663    #[test]
2664    fn test_cocoon_url_validation_rejects_non_http_scheme() {
2665        let err = cocoon_entry(Some("ftp://localhost"), Some("Qwen/Qwen3-0.6B"))
2666            .validate()
2667            .unwrap_err();
2668        assert!(
2669            err.to_string().contains("ftp"),
2670            "error should mention the bad scheme: {err}"
2671        );
2672    }
2673
2674    #[test]
2675    fn test_cocoon_url_validation_rejects_invalid_url() {
2676        let err = cocoon_entry(Some("not-a-url"), Some("Qwen/Qwen3-0.6B"))
2677            .validate()
2678            .unwrap_err();
2679        assert!(
2680            err.to_string().contains("not-a-url"),
2681            "error should mention the bad value: {err}"
2682        );
2683    }
2684
2685    #[test]
2686    fn test_cocoon_url_none_passes() {
2687        assert!(
2688            cocoon_entry(None, Some("Qwen/Qwen3-0.6B"))
2689                .validate()
2690                .is_ok()
2691        );
2692    }
2693
2694    #[test]
2695    fn test_cocoon_model_empty_rejected() {
2696        let err = cocoon_entry(Some("http://localhost:10000"), Some(""))
2697            .validate()
2698            .unwrap_err();
2699        assert!(
2700            err.to_string().contains("empty"),
2701            "error should mention 'empty': {err}"
2702        );
2703    }
2704
2705    #[test]
2706    fn test_cocoon_model_none_passes() {
2707        assert!(
2708            cocoon_entry(Some("http://localhost:10000"), None)
2709                .validate()
2710                .is_ok()
2711        );
2712    }
2713
2714    #[test]
2715    fn validate_cocoon_pricing_negative_prompt_errors() {
2716        let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2717        e.cocoon_pricing = Some(CocoonPricing {
2718            prompt_cents_per_1k: -1.0,
2719            completion_cents_per_1k: 0.03,
2720        });
2721        assert!(e.validate().is_err());
2722    }
2723
2724    #[test]
2725    fn validate_cocoon_pricing_negative_completion_errors() {
2726        let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2727        e.cocoon_pricing = Some(CocoonPricing {
2728            prompt_cents_per_1k: 0.01,
2729            completion_cents_per_1k: -0.5,
2730        });
2731        assert!(e.validate().is_err());
2732    }
2733
2734    #[test]
2735    fn validate_cocoon_pricing_valid_passes() {
2736        let mut e = cocoon_entry(Some("http://localhost:10000"), Some("Qwen/Qwen3-0.6B"));
2737        e.cocoon_pricing = Some(CocoonPricing {
2738            prompt_cents_per_1k: 0.01,
2739            completion_cents_per_1k: 0.03,
2740        });
2741        assert!(e.validate().is_ok());
2742    }
2743}