Skip to main content

zeph_config/
providers.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::fmt;
5
6use serde::{Deserialize, Serialize};
7
8// ── LLM provider config types (moved from zeph-llm) ─────────────────────────
9
10/// Extended or adaptive thinking mode for Claude.
11///
12/// Serializes with `mode` as tag:
13/// `{ "mode": "extended", "budget_tokens": 10000 }` or `{ "mode": "adaptive" }`.
14#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
15#[serde(tag = "mode", rename_all = "snake_case")]
16pub enum ThinkingConfig {
17    /// Extended thinking with an explicit token budget.
18    Extended {
19        /// Maximum thinking tokens to allocate.
20        budget_tokens: u32,
21    },
22    /// Adaptive thinking that selects effort automatically.
23    Adaptive {
24        /// Explicit effort hint when provided; model-chosen when `None`.
25        #[serde(default, skip_serializing_if = "Option::is_none")]
26        effort: Option<ThinkingEffort>,
27    },
28}
29
30/// Effort level for adaptive thinking.
31#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
32#[serde(rename_all = "lowercase")]
33pub enum ThinkingEffort {
34    /// Minimal thinking; fastest responses.
35    Low,
36    /// Balanced thinking depth. This is the default.
37    #[default]
38    Medium,
39    /// Maximum thinking depth; slowest responses.
40    High,
41}
42
43/// Prompt-cache TTL variant for the Anthropic API.
44///
45/// When used as a TOML config value the accepted strings are `"ephemeral"` and `"1h"`.
46/// On the wire (Anthropic API), `OneHour` serializes as `"1h"` inside the `cache_control.ttl`
47/// field.
48#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Default)]
49#[serde(rename_all = "snake_case")]
50pub enum CacheTtl {
51    /// Default ephemeral TTL (~5 minutes). No beta header required.
52    #[default]
53    Ephemeral,
54    /// Extended 1-hour TTL. Requires the `extended-cache-ttl-2025-04-25` beta header.
55    /// Cache writes cost approximately 2× more than `Ephemeral`.
56    #[serde(rename = "1h")]
57    OneHour,
58}
59
60impl CacheTtl {
61    /// Returns `true` when this TTL variant requires the `extended-cache-ttl-2025-04-25` beta
62    /// header to be sent with each request.
63    #[must_use]
64    pub fn requires_beta(self) -> bool {
65        match self {
66            Self::OneHour => true,
67            Self::Ephemeral => false,
68        }
69    }
70}
71
72/// Thinking level for Gemini models that support extended reasoning.
73///
74/// Maps to `generationConfig.thinkingConfig.thinkingLevel` in the Gemini API.
75/// Valid for Gemini 3+ models. For Gemini 2.5, use `thinking_budget` instead.
76#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
77#[serde(rename_all = "lowercase")]
78pub enum GeminiThinkingLevel {
79    /// Minimal reasoning pass.
80    Minimal,
81    /// Low reasoning depth.
82    Low,
83    /// Medium reasoning depth.
84    Medium,
85    /// Full reasoning depth.
86    High,
87}
88
89/// Newtype wrapper for a provider name referencing an entry in `[[llm.providers]]`.
90///
91/// Using a dedicated type instead of bare `String` makes provider cross-references
92/// explicit in the type system and enables validation at config load time.
93///
94/// # Note
95///
96/// `zeph-common` now defines a canonical `ProviderName(Arc<str>)` newtype. This
97/// config-local type uses `String` and exists for backward compat within `zeph-config`.
98///
99/// TODO(critic): migrate to `zeph_common::ProviderName` once `zeph-config` → `zeph-common`
100/// dependency inversion (A-1) lands.
101#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
102#[serde(transparent)]
103pub struct ProviderName(String);
104
105impl ProviderName {
106    /// Create a new `ProviderName` from any string-like value.
107    ///
108    /// An empty string is a sentinel meaning "use the primary provider" and is the
109    /// default value. Check [`is_empty`](Self::is_empty) before using in routing.
110    ///
111    /// # Examples
112    ///
113    /// ```
114    /// use zeph_config::providers::ProviderName;
115    ///
116    /// let name = ProviderName::new("fast");
117    /// assert_eq!(name.as_str(), "fast");
118    /// ```
119    #[must_use]
120    pub fn new(name: impl Into<String>) -> Self {
121        Self(name.into())
122    }
123
124    /// Return `true` when this is the empty sentinel (use primary provider).
125    ///
126    /// # Examples
127    ///
128    /// ```
129    /// use zeph_config::providers::ProviderName;
130    ///
131    /// assert!(ProviderName::default().is_empty());
132    /// assert!(!ProviderName::new("fast").is_empty());
133    /// ```
134    #[must_use]
135    pub fn is_empty(&self) -> bool {
136        self.0.is_empty()
137    }
138
139    /// Return the inner string slice.
140    ///
141    /// # Examples
142    ///
143    /// ```
144    /// use zeph_config::providers::ProviderName;
145    ///
146    /// let name = ProviderName::new("quality");
147    /// assert_eq!(name.as_str(), "quality");
148    /// ```
149    #[must_use]
150    pub fn as_str(&self) -> &str {
151        &self.0
152    }
153
154    /// Return `Some(&str)` when non-empty, `None` for the empty sentinel.
155    ///
156    /// Bridges `Option<ProviderName>` fields and the legacy
157    /// `.as_deref().filter(|s| !s.is_empty())` pattern.
158    ///
159    /// # Examples
160    ///
161    /// ```
162    /// use zeph_config::providers::ProviderName;
163    ///
164    /// assert_eq!(ProviderName::default().as_non_empty(), None);
165    /// assert_eq!(ProviderName::new("fast").as_non_empty(), Some("fast"));
166    /// ```
167    #[must_use]
168    pub fn as_non_empty(&self) -> Option<&str> {
169        if self.0.is_empty() {
170            None
171        } else {
172            Some(&self.0)
173        }
174    }
175}
176
177impl fmt::Display for ProviderName {
178    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179        self.0.fmt(f)
180    }
181}
182
183impl AsRef<str> for ProviderName {
184    fn as_ref(&self) -> &str {
185        &self.0
186    }
187}
188
189impl std::ops::Deref for ProviderName {
190    type Target = str;
191
192    fn deref(&self) -> &str {
193        &self.0
194    }
195}
196
197impl PartialEq<str> for ProviderName {
198    fn eq(&self, other: &str) -> bool {
199        self.0 == other
200    }
201}
202
203impl PartialEq<&str> for ProviderName {
204    fn eq(&self, other: &&str) -> bool {
205        self.0 == *other
206    }
207}
208
209fn default_response_cache_ttl_secs() -> u64 {
210    3600
211}
212
213fn default_semantic_cache_threshold() -> f32 {
214    0.95
215}
216
217fn default_semantic_cache_max_candidates() -> u32 {
218    10
219}
220
221fn default_router_ema_alpha() -> f64 {
222    0.1
223}
224
225fn default_router_reorder_interval() -> u64 {
226    10
227}
228
229fn default_embedding_model() -> String {
230    "qwen3-embedding".into()
231}
232
233fn default_candle_source() -> String {
234    "huggingface".into()
235}
236
237fn default_chat_template() -> String {
238    "chatml".into()
239}
240
241fn default_candle_device() -> String {
242    "cpu".into()
243}
244
245fn default_temperature() -> f64 {
246    0.7
247}
248
249fn default_max_tokens() -> usize {
250    2048
251}
252
253fn default_seed() -> u64 {
254    42
255}
256
257fn default_repeat_penalty() -> f32 {
258    1.1
259}
260
261fn default_repeat_last_n() -> usize {
262    64
263}
264
265fn default_cascade_quality_threshold() -> f64 {
266    0.5
267}
268
269fn default_cascade_max_escalations() -> u8 {
270    2
271}
272
273fn default_cascade_window_size() -> usize {
274    50
275}
276
277fn default_reputation_decay_factor() -> f64 {
278    0.95
279}
280
281fn default_reputation_weight() -> f64 {
282    0.3
283}
284
285fn default_reputation_min_observations() -> u64 {
286    5
287}
288
289/// Returns the default STT provider name (empty string — auto-detect).
290#[must_use]
291pub fn default_stt_provider() -> String {
292    String::new()
293}
294
295/// Returns the default STT transcription language hint (`"auto"`).
296#[must_use]
297pub fn default_stt_language() -> String {
298    "auto".into()
299}
300
301/// Returns the default embedding model name used by `[llm] embedding_model`.
302#[must_use]
303pub fn get_default_embedding_model() -> String {
304    default_embedding_model()
305}
306
307/// Returns the default response cache TTL in seconds.
308#[must_use]
309pub fn get_default_response_cache_ttl_secs() -> u64 {
310    default_response_cache_ttl_secs()
311}
312
313/// Returns the default EMA alpha for the router latency estimator.
314#[must_use]
315pub fn get_default_router_ema_alpha() -> f64 {
316    default_router_ema_alpha()
317}
318
319/// Returns the default router reorder interval (turns between provider re-ranking).
320#[must_use]
321pub fn get_default_router_reorder_interval() -> u64 {
322    default_router_reorder_interval()
323}
324
325/// LLM provider backend selector.
326///
327/// Used in `[[llm.providers]]` entries as the `type` field.
328///
329/// # Example (TOML)
330///
331/// ```toml
332/// [[llm.providers]]
333/// type = "openai"
334/// model = "gpt-4o"
335/// name = "quality"
336/// ```
337#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
338#[serde(rename_all = "lowercase")]
339pub enum ProviderKind {
340    /// Local Ollama server (default base URL: `http://localhost:11434`).
341    Ollama,
342    /// Anthropic Claude API.
343    Claude,
344    /// `OpenAI` API.
345    OpenAi,
346    /// Google Gemini API.
347    Gemini,
348    /// Local Candle inference (CPU/GPU, no external server required).
349    Candle,
350    /// OpenAI-compatible third-party API (e.g. Groq, Together AI, LM Studio).
351    Compatible,
352}
353
354impl ProviderKind {
355    /// Return the lowercase string identifier for this provider kind.
356    ///
357    /// # Examples
358    ///
359    /// ```
360    /// use zeph_config::ProviderKind;
361    ///
362    /// assert_eq!(ProviderKind::Claude.as_str(), "claude");
363    /// assert_eq!(ProviderKind::OpenAi.as_str(), "openai");
364    /// ```
365    #[must_use]
366    pub fn as_str(self) -> &'static str {
367        match self {
368            Self::Ollama => "ollama",
369            Self::Claude => "claude",
370            Self::OpenAi => "openai",
371            Self::Gemini => "gemini",
372            Self::Candle => "candle",
373            Self::Compatible => "compatible",
374        }
375    }
376}
377
378impl std::fmt::Display for ProviderKind {
379    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
380        f.write_str(self.as_str())
381    }
382}
383
384/// LLM configuration, nested under `[llm]` in TOML.
385///
386/// Declares the provider pool and controls routing, embedding, caching, and STT.
387/// All providers are declared in `[[llm.providers]]`; subsystems reference them by
388/// the `name` field using a `*_provider` config key.
389///
390/// # Example (TOML)
391///
392/// ```toml
393/// [[llm.providers]]
394/// name = "fast"
395/// type = "openai"
396/// model = "gpt-4o-mini"
397///
398/// [[llm.providers]]
399/// name = "quality"
400/// type = "claude"
401/// model = "claude-opus-4-5"
402///
403/// [llm]
404/// routing = "none"
405/// embedding_model = "qwen3-embedding"
406/// ```
407#[derive(Debug, Deserialize, Serialize)]
408pub struct LlmConfig {
409    /// Provider pool. First entry is default unless one is marked `default = true`.
410    #[serde(default, skip_serializing_if = "Vec::is_empty")]
411    pub providers: Vec<ProviderEntry>,
412
413    /// Routing strategy for multi-provider configs.
414    #[serde(default, skip_serializing_if = "is_routing_none")]
415    pub routing: LlmRoutingStrategy,
416
417    #[serde(default = "default_embedding_model_opt")]
418    pub embedding_model: String,
419    #[serde(default, skip_serializing_if = "Option::is_none")]
420    pub candle: Option<CandleConfig>,
421    #[serde(default)]
422    pub stt: Option<SttConfig>,
423    #[serde(default)]
424    pub response_cache_enabled: bool,
425    #[serde(default = "default_response_cache_ttl_secs")]
426    pub response_cache_ttl_secs: u64,
427    /// Enable semantic similarity-based response caching. Requires embedding support.
428    #[serde(default)]
429    pub semantic_cache_enabled: bool,
430    /// Cosine similarity threshold for semantic cache hits (0.0–1.0).
431    ///
432    /// Only the highest-scoring candidate above this threshold is returned.
433    /// Lower values produce more cache hits but risk returning less relevant responses.
434    /// Recommended range: 0.92–0.98; default: 0.95.
435    #[serde(default = "default_semantic_cache_threshold")]
436    pub semantic_cache_threshold: f32,
437    /// Maximum cached entries to examine per semantic lookup (SQL `LIMIT` clause in
438    /// `ResponseCache::get_semantic()`). Controls the recall-vs-performance tradeoff:
439    ///
440    /// - **Higher values** (e.g. 50): scan more entries, better chance of finding a
441    ///   semantically similar cached response, but slower queries.
442    /// - **Lower values** (e.g. 5): faster queries, but may miss relevant cached entries
443    ///   when the cache is large.
444    /// - **Default (10)**: balanced middle ground for typical workloads.
445    ///
446    /// Tuning guidance: set to 50+ when recall matters more than latency (e.g. long-running
447    /// sessions with many cached responses); reduce to 5 for low-latency interactive use.
448    /// Env override: `ZEPH_LLM_SEMANTIC_CACHE_MAX_CANDIDATES`.
449    #[serde(default = "default_semantic_cache_max_candidates")]
450    pub semantic_cache_max_candidates: u32,
451    #[serde(default)]
452    pub router_ema_enabled: bool,
453    #[serde(default = "default_router_ema_alpha")]
454    pub router_ema_alpha: f64,
455    #[serde(default = "default_router_reorder_interval")]
456    pub router_reorder_interval: u64,
457    /// Routing configuration for Thompson/Cascade strategies.
458    #[serde(default, skip_serializing_if = "Option::is_none")]
459    pub router: Option<RouterConfig>,
460    /// Provider-specific instruction file to inject into the system prompt.
461    /// Merged with `agent.instruction_files` at startup.
462    #[serde(default, skip_serializing_if = "Option::is_none")]
463    pub instruction_file: Option<std::path::PathBuf>,
464    /// Shorthand model spec for tool-pair summarization and context compaction.
465    /// Format: `ollama/<model>`, `claude[/<model>]`, `openai[/<model>]`, `compatible/<name>`, `candle`.
466    /// Ignored when `[llm.summary_provider]` is set.
467    #[serde(default, skip_serializing_if = "Option::is_none")]
468    pub summary_model: Option<String>,
469    /// Structured provider config for summarization. Takes precedence over `summary_model`.
470    #[serde(default, skip_serializing_if = "Option::is_none")]
471    pub summary_provider: Option<ProviderEntry>,
472
473    /// Complexity triage routing configuration. Required when `routing = "triage"`.
474    #[serde(default, skip_serializing_if = "Option::is_none")]
475    pub complexity_routing: Option<ComplexityRoutingConfig>,
476
477    /// Collaborative Entropy (`CoE`) configuration. `None` = `CoE` disabled.
478    #[serde(default, skip_serializing_if = "Option::is_none")]
479    pub coe: Option<CoeConfig>,
480}
481
482fn default_embedding_model_opt() -> String {
483    default_embedding_model()
484}
485
486#[allow(clippy::trivially_copy_pass_by_ref)]
487fn is_routing_none(s: &LlmRoutingStrategy) -> bool {
488    *s == LlmRoutingStrategy::None
489}
490
491impl LlmConfig {
492    /// Effective provider kind for the primary (first/default) provider in the pool.
493    #[must_use]
494    pub fn effective_provider(&self) -> ProviderKind {
495        self.providers
496            .first()
497            .map_or(ProviderKind::Ollama, |e| e.provider_type)
498    }
499
500    /// Effective base URL for the primary provider.
501    #[must_use]
502    pub fn effective_base_url(&self) -> &str {
503        self.providers
504            .first()
505            .and_then(|e| e.base_url.as_deref())
506            .unwrap_or("http://localhost:11434")
507    }
508
509    /// Effective model for the primary chat-capable provider.
510    ///
511    /// Skips embed-only entries (those with `embed = true`) and returns the model of the
512    /// first provider that can handle chat requests. Falls back to `"qwen3:8b"` when no
513    /// chat-capable provider is configured.
514    #[must_use]
515    pub fn effective_model(&self) -> &str {
516        self.providers
517            .iter()
518            .find(|e| !e.embed)
519            .and_then(|e| e.model.as_deref())
520            .unwrap_or("qwen3:8b")
521    }
522
523    /// Find the provider entry designated for STT.
524    ///
525    /// Resolution priority:
526    /// 1. `[llm.stt].provider` matches `[[llm.providers]].name` and the entry has `stt_model`
527    /// 2. `[llm.stt].provider` is empty — fall through to auto-detect
528    /// 3. First provider with `stt_model` set (auto-detect fallback)
529    /// 4. `None` — STT disabled
530    #[must_use]
531    pub fn stt_provider_entry(&self) -> Option<&ProviderEntry> {
532        let name_hint = self.stt.as_ref().map_or("", |s| s.provider.as_str());
533        if name_hint.is_empty() {
534            self.providers.iter().find(|p| p.stt_model.is_some())
535        } else {
536            self.providers
537                .iter()
538                .find(|p| p.effective_name() == name_hint && p.stt_model.is_some())
539        }
540    }
541
542    /// Validate that the config uses the new `[[llm.providers]]` format.
543    ///
544    /// # Errors
545    ///
546    /// Returns `ConfigError::Validation` when no providers are configured.
547    pub fn check_legacy_format(&self) -> Result<(), crate::error::ConfigError> {
548        Ok(())
549    }
550
551    /// Validate STT config cross-references.
552    ///
553    /// # Errors
554    ///
555    /// Returns `ConfigError::Validation` when the referenced STT provider does not exist.
556    pub fn validate_stt(&self) -> Result<(), crate::error::ConfigError> {
557        use crate::error::ConfigError;
558
559        let Some(stt) = &self.stt else {
560            return Ok(());
561        };
562        if stt.provider.is_empty() {
563            return Ok(());
564        }
565        let found = self
566            .providers
567            .iter()
568            .find(|p| p.effective_name() == stt.provider);
569        match found {
570            None => {
571                return Err(ConfigError::Validation(format!(
572                    "[llm.stt].provider = {:?} does not match any [[llm.providers]] entry",
573                    stt.provider
574                )));
575            }
576            Some(entry) if entry.stt_model.is_none() => {
577                tracing::warn!(
578                    provider = stt.provider,
579                    "[[llm.providers]] entry exists but has no `stt_model` — STT will not be activated"
580                );
581            }
582            _ => {}
583        }
584        Ok(())
585    }
586}
587
588/// Speech-to-text configuration, nested under `[llm.stt]` in TOML.
589///
590/// When set, Zeph uses the referenced provider for voice transcription.
591/// The provider must have an `stt_model` field set in its `[[llm.providers]]` entry.
592///
593/// # Example (TOML)
594///
595/// ```toml
596/// [llm.stt]
597/// provider = "fast"
598/// language = "en"
599/// ```
600#[derive(Debug, Clone, Deserialize, Serialize)]
601pub struct SttConfig {
602    /// Provider name from `[[llm.providers]]`. Empty string means auto-detect first provider
603    /// with `stt_model` set.
604    #[serde(default = "default_stt_provider")]
605    pub provider: String,
606    /// Language hint for transcription (e.g. `"en"`, `"auto"`).
607    #[serde(default = "default_stt_language")]
608    pub language: String,
609}
610
611/// Routing strategy selection for multi-provider routing.
612#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
613#[serde(rename_all = "lowercase")]
614pub enum RouterStrategyConfig {
615    /// Exponential moving average latency-aware ordering.
616    #[default]
617    Ema,
618    /// Thompson Sampling with Beta distributions (persistence-backed).
619    Thompson,
620    /// Cascade routing: try cheapest provider first, escalate on degenerate output.
621    Cascade,
622    /// PILOT: `LinUCB` contextual bandit with online learning and cost-aware reward.
623    Bandit,
624}
625
626/// Agent Stability Index (ASI) configuration.
627///
628/// Tracks per-provider response coherence via a sliding window of response embeddings.
629/// When coherence drops below `coherence_threshold`, the provider's routing prior is
630/// penalized by `penalty_weight`. Disabled by default; session-only (no persistence).
631///
632/// # Known Limitation
633///
634/// ASI embeddings are computed in a background `tokio::spawn` task after the response is
635/// returned to the caller. Under high request rates, the coherence score used for routing
636/// may lag 1–2 responses behind due to this fire-and-forget design. With the default
637/// `window = 5`, this lag is tolerable — coherence is a slow-moving signal.
638#[derive(Debug, Clone, Deserialize, Serialize)]
639pub struct AsiConfig {
640    /// Enable ASI coherence tracking. Default: false.
641    #[serde(default)]
642    pub enabled: bool,
643
644    /// Sliding window size for response embeddings per provider. Default: 5.
645    #[serde(default = "default_asi_window")]
646    pub window: usize,
647
648    /// Coherence score [0.0, 1.0] below which the provider is penalized. Default: 0.7.
649    #[serde(default = "default_asi_coherence_threshold")]
650    pub coherence_threshold: f32,
651
652    /// Penalty weight applied to Thompson beta / EMA score on low coherence. Default: 0.3.
653    ///
654    /// For Thompson, this shifts the beta prior: `beta += penalty_weight * (threshold - coherence)`.
655    /// For EMA, the score is multiplied by `max(0.5, coherence / threshold)`.
656    #[serde(default = "default_asi_penalty_weight")]
657    pub penalty_weight: f32,
658}
659
660fn default_asi_window() -> usize {
661    5
662}
663
664fn default_asi_coherence_threshold() -> f32 {
665    0.7
666}
667
668fn default_asi_penalty_weight() -> f32 {
669    0.3
670}
671
672impl Default for AsiConfig {
673    fn default() -> Self {
674        Self {
675            enabled: false,
676            window: default_asi_window(),
677            coherence_threshold: default_asi_coherence_threshold(),
678            penalty_weight: default_asi_penalty_weight(),
679        }
680    }
681}
682
683/// Routing configuration for multi-provider setups.
684#[derive(Debug, Clone, Deserialize, Serialize)]
685pub struct RouterConfig {
686    /// Routing strategy: `"ema"` (default), `"thompson"`, `"cascade"`, or `"bandit"`.
687    #[serde(default)]
688    pub strategy: RouterStrategyConfig,
689    /// Path for persisting Thompson Sampling state. Defaults to `~/.zeph/router_thompson_state.json`.
690    ///
691    /// # Security
692    ///
693    /// This path is user-controlled. The application writes and reads a JSON file at
694    /// this location. Ensure the path is within a directory that is not world-writable
695    /// (e.g., avoid `/tmp`). The file is created with mode `0o600` on Unix.
696    #[serde(default)]
697    pub thompson_state_path: Option<String>,
698    /// Cascade routing configuration. Only used when `strategy = "cascade"`.
699    #[serde(default)]
700    pub cascade: Option<CascadeConfig>,
701    /// Bayesian reputation scoring configuration (RAPS). Disabled by default.
702    #[serde(default)]
703    pub reputation: Option<ReputationConfig>,
704    /// PILOT bandit routing configuration. Only used when `strategy = "bandit"`.
705    #[serde(default)]
706    pub bandit: Option<BanditConfig>,
707    /// Embedding-based quality gate threshold for Thompson/EMA routing. Default: disabled.
708    ///
709    /// When set, after provider selection, the cosine similarity between the query embedding
710    /// and the response embedding is computed. If below this threshold, the next provider in
711    /// the ordered list is tried. On exhaustion, the best response seen is returned.
712    ///
713    /// Only applies to Thompson and EMA strategies. Cascade uses its own quality classifier.
714    /// Fail-open: embedding errors disable the gate for that request.
715    #[serde(default)]
716    pub quality_gate: Option<f32>,
717    /// Agent Stability Index configuration. Disabled by default.
718    #[serde(default)]
719    pub asi: Option<AsiConfig>,
720    /// Maximum number of concurrent `embed_batch` calls through the router.
721    ///
722    /// Limits simultaneous embedding HTTP requests to prevent provider rate-limiting
723    /// and memory pressure during indexing or high-frequency recall. Default: 4.
724    /// Set to 0 to disable the semaphore (unlimited concurrency).
725    #[serde(default = "default_embed_concurrency")]
726    pub embed_concurrency: usize,
727}
728
729fn default_embed_concurrency() -> usize {
730    4
731}
732
733/// Configuration for Bayesian reputation scoring (RAPS — Reputation-Adjusted Provider Selection).
734///
735/// When enabled, quality outcomes from tool execution shift the routing scores over time,
736/// giving an advantage to providers that consistently produce valid tool arguments.
737///
738/// Default: disabled. Set `enabled = true` to activate.
739#[derive(Debug, Clone, Deserialize, Serialize)]
740pub struct ReputationConfig {
741    /// Enable reputation scoring. Default: false.
742    #[serde(default)]
743    pub enabled: bool,
744    /// Session-level decay factor applied on each load. Range: (0.0, 1.0]. Default: 0.95.
745    /// Lower values make reputation forget faster; 1.0 = no decay.
746    #[serde(default = "default_reputation_decay_factor")]
747    pub decay_factor: f64,
748    /// Weight of reputation in routing score blend. Range: [0.0, 1.0]. Default: 0.3.
749    ///
750    /// **Warning**: values above 0.5 can aggressively suppress low-reputation providers.
751    /// At `weight = 1.0` with `rep_factor = 0.0` (all failures), the routing score
752    /// drops to zero — the provider becomes unreachable for that session. Stick to
753    /// the default (0.3) unless you intentionally want strong reputation gating.
754    #[serde(default = "default_reputation_weight")]
755    pub weight: f64,
756    /// Minimum quality observations before reputation influences routing. Default: 5.
757    #[serde(default = "default_reputation_min_observations")]
758    pub min_observations: u64,
759    /// Path for persisting reputation state. Defaults to `~/.config/zeph/router_reputation_state.json`.
760    #[serde(default)]
761    pub state_path: Option<String>,
762}
763
764/// Configuration for cascade routing (`strategy = "cascade"`).
765///
766/// Cascade routing tries providers in chain order (cheapest first), escalating to
767/// the next provider when the response is classified as degenerate (empty, repetitive,
768/// incoherent). Chain order determines cost order: first provider = cheapest.
769///
770/// # Limitations
771///
772/// The heuristic classifier detects degenerate outputs only, not semantic failures.
773/// Use `classifier_mode = "judge"` for semantic quality gating (adds LLM call cost).
774#[derive(Debug, Clone, Deserialize, Serialize)]
775pub struct CascadeConfig {
776    /// Minimum quality score [0.0, 1.0] to accept a response without escalating.
777    /// Responses scoring below this threshold trigger escalation.
778    #[serde(default = "default_cascade_quality_threshold")]
779    pub quality_threshold: f64,
780
781    /// Maximum number of quality-based escalations per request.
782    /// Network/API errors do not count against this budget.
783    /// Default: 2 (allows up to 3 providers: cheap → mid → expensive).
784    #[serde(default = "default_cascade_max_escalations")]
785    pub max_escalations: u8,
786
787    /// Quality classifier mode: `"heuristic"` (default) or `"judge"`.
788    /// Heuristic is zero-cost but detects only degenerate outputs.
789    /// Judge requires a configured `summary_model` and adds one LLM call per evaluation.
790    #[serde(default)]
791    pub classifier_mode: CascadeClassifierMode,
792
793    /// Rolling quality history window size per provider. Default: 50.
794    #[serde(default = "default_cascade_window_size")]
795    pub window_size: usize,
796
797    /// Maximum cumulative input+output tokens across all escalation levels.
798    /// When exceeded, returns the best-seen response instead of escalating further.
799    /// `None` disables the budget (unbounded escalation cost).
800    #[serde(default)]
801    pub max_cascade_tokens: Option<u32>,
802
803    /// Explicit cost ordering of provider names (cheapest first).
804    /// When set, cascade routing sorts providers by their position in this list before
805    /// trying them. Providers not in the list are appended after listed ones in their
806    /// original chain order. When unset, chain order is used (default behavior).
807    #[serde(default, skip_serializing_if = "Option::is_none")]
808    pub cost_tiers: Option<Vec<String>>,
809}
810
811impl Default for CascadeConfig {
812    fn default() -> Self {
813        Self {
814            quality_threshold: default_cascade_quality_threshold(),
815            max_escalations: default_cascade_max_escalations(),
816            classifier_mode: CascadeClassifierMode::default(),
817            window_size: default_cascade_window_size(),
818            max_cascade_tokens: None,
819            cost_tiers: None,
820        }
821    }
822}
823
824/// Quality classifier mode for cascade routing.
825#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
826#[serde(rename_all = "lowercase")]
827pub enum CascadeClassifierMode {
828    /// Zero-cost heuristic: detects degenerate outputs (empty, repetitive, incoherent).
829    /// Does not detect semantic failures (hallucinations, wrong answers).
830    #[default]
831    Heuristic,
832    /// LLM-based judge: more accurate but adds latency. Falls back to heuristic on failure.
833    /// Requires `summary_model` to be configured.
834    Judge,
835}
836
837fn default_bandit_alpha() -> f32 {
838    1.0
839}
840
841fn default_bandit_dim() -> usize {
842    32
843}
844
845fn default_bandit_cost_weight() -> f32 {
846    0.1
847}
848
849fn default_bandit_decay_factor() -> f32 {
850    1.0
851}
852
853fn default_bandit_embedding_timeout_ms() -> u64 {
854    50
855}
856
857fn default_bandit_cache_size() -> usize {
858    512
859}
860
861/// Configuration for PILOT bandit routing (`strategy = "bandit"`).
862///
863/// PILOT (Provider Intelligence via Learned Online Tuning) uses a `LinUCB` contextual
864/// bandit to learn which provider performs best for a given query context. The feature
865/// vector is derived from the query embedding (first `dim` components, L2-normalised).
866///
867/// **Cold start**: the bandit falls back to Thompson sampling for the first
868/// `10 * num_providers` queries (configurable). After warmup, `LinUCB` takes over.
869///
870/// **Embedding**: an `embedding_provider` must be set for feature vectors. If the embed
871/// call exceeds `embedding_timeout_ms` or fails, the bandit falls back to Thompson/uniform.
872/// Use a local provider (Ollama, Candle) to avoid network latency on the hot path.
873#[derive(Debug, Clone, Deserialize, Serialize)]
874pub struct BanditConfig {
875    /// `LinUCB` exploration parameter. Default: 1.0.
876    /// Higher values increase exploration; lower values favour exploitation.
877    #[serde(default = "default_bandit_alpha")]
878    pub alpha: f32,
879
880    /// Feature vector dimension (first `dim` components of the embedding).
881    ///
882    /// This is simple truncation, not PCA. The first raw embedding dimensions do not
883    /// necessarily capture the most variance. For `OpenAI` `text-embedding-3-*` models,
884    /// consider using the `dimensions` API parameter (Matryoshka embeddings) instead.
885    /// Default: 32.
886    #[serde(default = "default_bandit_dim")]
887    pub dim: usize,
888
889    /// Cost penalty weight in the reward signal: `reward = quality - cost_weight * cost_fraction`.
890    /// Default: 0.1. Increase to penalise expensive providers more aggressively.
891    #[serde(default = "default_bandit_cost_weight")]
892    pub cost_weight: f32,
893
894    /// Session-level decay applied to arm state on startup: `A = I + decay*(A-I)`, `b = decay*b`.
895    /// Values < 1.0 cause re-exploration after provider quality changes. Default: 1.0 (no decay).
896    #[serde(default = "default_bandit_decay_factor")]
897    pub decay_factor: f32,
898
899    /// Provider name from `[[llm.providers]]` used for query embeddings.
900    ///
901    /// SLM recommended: prefer a fast local model (e.g. Ollama `nomic-embed-text`,
902    /// Candle, or `text-embedding-3-small`) — this is called on every bandit request.
903    /// Empty string disables `LinUCB` (bandit always falls back to Thompson/uniform).
904    #[serde(default)]
905    pub embedding_provider: ProviderName,
906
907    /// Hard timeout for the embedding call in milliseconds. Default: 50.
908    /// If exceeded, the request falls back to Thompson/uniform selection.
909    #[serde(default = "default_bandit_embedding_timeout_ms")]
910    pub embedding_timeout_ms: u64,
911
912    /// Maximum cached embeddings (keyed by query text hash). Default: 512.
913    #[serde(default = "default_bandit_cache_size")]
914    pub cache_size: usize,
915
916    /// Path for persisting bandit state. Defaults to `~/.config/zeph/router_bandit_state.json`.
917    ///
918    /// # Security
919    ///
920    /// This path is user-controlled. The file is created with mode `0o600` on Unix.
921    /// Do not place it in world-writable directories.
922    #[serde(default)]
923    pub state_path: Option<String>,
924
925    /// MAR (Memory-Augmented Routing) confidence threshold.
926    ///
927    /// When the top-1 semantic recall score for the current query is >= this value,
928    /// the bandit biases toward cheaper providers (the answer is likely in memory).
929    /// Set to 1.0 to disable MAR. Default: 0.9.
930    #[serde(default = "default_bandit_memory_confidence_threshold")]
931    pub memory_confidence_threshold: f32,
932
933    /// Minimum number of queries before `LinUCB` takes over from Thompson warmup.
934    ///
935    /// When unset or `0`, defaults to `10 × number of providers` (computed at startup).
936    /// Set explicitly to control how long the bandit explores uniformly before
937    /// switching to context-aware routing. Setting `0` preserves the computed default.
938    #[serde(default)]
939    pub warmup_queries: Option<u64>,
940}
941
942fn default_bandit_memory_confidence_threshold() -> f32 {
943    0.9
944}
945
946impl Default for BanditConfig {
947    fn default() -> Self {
948        Self {
949            alpha: default_bandit_alpha(),
950            dim: default_bandit_dim(),
951            cost_weight: default_bandit_cost_weight(),
952            decay_factor: default_bandit_decay_factor(),
953            embedding_provider: ProviderName::default(),
954            embedding_timeout_ms: default_bandit_embedding_timeout_ms(),
955            cache_size: default_bandit_cache_size(),
956            state_path: None,
957            memory_confidence_threshold: default_bandit_memory_confidence_threshold(),
958            warmup_queries: None,
959        }
960    }
961}
962
963#[derive(Debug, Deserialize, Serialize)]
964pub struct CandleConfig {
965    #[serde(default = "default_candle_source")]
966    pub source: String,
967    #[serde(default)]
968    pub local_path: String,
969    #[serde(default)]
970    pub filename: Option<String>,
971    #[serde(default = "default_chat_template")]
972    pub chat_template: String,
973    #[serde(default = "default_candle_device")]
974    pub device: String,
975    #[serde(default)]
976    pub embedding_repo: Option<String>,
977    /// Resolved `HuggingFace` Hub API token for authenticated model downloads.
978    ///
979    /// Must be the **token value** — resolved by the caller before constructing this config.
980    #[serde(default)]
981    pub hf_token: Option<String>,
982    #[serde(default)]
983    pub generation: GenerationParams,
984    /// Maximum seconds to wait for each half of a single inference request.
985    ///
986    /// The timeout is applied **twice** per `chat()` call: once for the channel send
987    /// (waiting for a free slot) and once for the oneshot reply (waiting for the worker
988    /// to finish). The effective maximum wall-clock wait per request is therefore
989    /// `2 × inference_timeout_secs`. CPU inference can be slow; 120s is a conservative
990    /// default for large models, giving up to 240s total before an error is returned.
991    /// Values of 0 are silently promoted to 1 at bootstrap.
992    #[serde(default = "default_inference_timeout_secs")]
993    pub inference_timeout_secs: u64,
994}
995
996fn default_inference_timeout_secs() -> u64 {
997    120
998}
999
1000/// Sampling / generation parameters for Candle local inference.
1001///
1002/// Used inside `[llm.candle.generation]` or a `[[llm.providers]]` Candle entry.
1003#[derive(Debug, Clone, Deserialize, Serialize)]
1004pub struct GenerationParams {
1005    /// Sampling temperature. Higher values produce more creative outputs. Default: `0.7`.
1006    #[serde(default = "default_temperature")]
1007    pub temperature: f64,
1008    /// Nucleus sampling threshold. When set, tokens with cumulative probability above
1009    /// this value are excluded. Default: `None` (disabled).
1010    #[serde(default)]
1011    pub top_p: Option<f64>,
1012    /// Top-k sampling. When set, only the top-k most probable tokens are considered.
1013    /// Default: `None` (disabled).
1014    #[serde(default)]
1015    pub top_k: Option<usize>,
1016    /// Maximum number of tokens to generate per response. Capped at [`MAX_TOKENS_CAP`].
1017    /// Default: `2048`.
1018    #[serde(default = "default_max_tokens")]
1019    pub max_tokens: usize,
1020    /// Random seed for reproducible outputs. Default: `42`.
1021    #[serde(default = "default_seed")]
1022    pub seed: u64,
1023    /// Repetition penalty applied during sampling. Default: `1.1`.
1024    #[serde(default = "default_repeat_penalty")]
1025    pub repeat_penalty: f32,
1026    /// Number of last tokens to consider for the repetition penalty window. Default: `64`.
1027    #[serde(default = "default_repeat_last_n")]
1028    pub repeat_last_n: usize,
1029}
1030
1031/// Hard upper bound on `GenerationParams::max_tokens` to prevent unbounded generation.
1032pub const MAX_TOKENS_CAP: usize = 32768;
1033
1034impl GenerationParams {
1035    /// Returns `max_tokens` clamped to [`MAX_TOKENS_CAP`].
1036    ///
1037    /// # Examples
1038    ///
1039    /// ```
1040    /// use zeph_config::GenerationParams;
1041    ///
1042    /// let params = GenerationParams::default();
1043    /// assert!(params.capped_max_tokens() <= 32768);
1044    /// ```
1045    #[must_use]
1046    pub fn capped_max_tokens(&self) -> usize {
1047        self.max_tokens.min(MAX_TOKENS_CAP)
1048    }
1049}
1050
1051impl Default for GenerationParams {
1052    fn default() -> Self {
1053        Self {
1054            temperature: default_temperature(),
1055            top_p: None,
1056            top_k: None,
1057            max_tokens: default_max_tokens(),
1058            seed: default_seed(),
1059            repeat_penalty: default_repeat_penalty(),
1060            repeat_last_n: default_repeat_last_n(),
1061        }
1062    }
1063}
1064
1065// ─── Unified config types ─────────────────────────────────────────────────────
1066
1067/// Routing strategy for the `[[llm.providers]]` pool.
1068#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
1069#[serde(rename_all = "lowercase")]
1070pub enum LlmRoutingStrategy {
1071    /// Single provider or first-in-pool (default).
1072    #[default]
1073    None,
1074    /// Exponential moving average latency-aware ordering.
1075    Ema,
1076    /// Thompson Sampling with Beta distributions.
1077    Thompson,
1078    /// Cascade: try cheapest provider first, escalate on degenerate output.
1079    Cascade,
1080    /// Complexity triage routing: pre-classify each request, delegate to appropriate tier.
1081    Triage,
1082    /// PILOT: `LinUCB` contextual bandit with online learning and budget-aware reward.
1083    Bandit,
1084}
1085
1086fn default_triage_timeout_secs() -> u64 {
1087    5
1088}
1089
1090fn default_max_triage_tokens() -> u32 {
1091    50
1092}
1093
1094fn default_true() -> bool {
1095    true
1096}
1097
1098/// Tier-to-provider name mapping for complexity routing.
1099#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1100pub struct TierMapping {
1101    pub simple: Option<String>,
1102    pub medium: Option<String>,
1103    pub complex: Option<String>,
1104    pub expert: Option<String>,
1105}
1106
1107/// Configuration for complexity-based triage routing (`routing = "triage"`).
1108///
1109/// When `[llm] routing = "triage"` is set, a cheap triage model classifies each request
1110/// and routes it to the appropriate tier provider. Requires at least one tier mapping.
1111///
1112/// # Example
1113///
1114/// ```toml
1115/// [llm]
1116/// routing = "triage"
1117///
1118/// [llm.complexity_routing]
1119/// triage_provider = "local-fast"
1120///
1121/// [llm.complexity_routing.tiers]
1122/// simple = "local-fast"
1123/// medium = "haiku"
1124/// complex = "sonnet"
1125/// expert = "opus"
1126/// ```
1127#[derive(Debug, Clone, Deserialize, Serialize)]
1128pub struct ComplexityRoutingConfig {
1129    /// Provider name from `[[llm.providers]]` used for triage classification.
1130    #[serde(default)]
1131    pub triage_provider: Option<ProviderName>,
1132
1133    /// Skip triage when all tiers map to the same provider.
1134    #[serde(default = "default_true")]
1135    pub bypass_single_provider: bool,
1136
1137    /// Tier-to-provider name mapping.
1138    #[serde(default)]
1139    pub tiers: TierMapping,
1140
1141    /// Max output tokens for the triage classification call. Default: 50.
1142    #[serde(default = "default_max_triage_tokens")]
1143    pub max_triage_tokens: u32,
1144
1145    /// Timeout in seconds for the triage classification call. Default: 5.
1146    /// On timeout, falls back to the default (first) tier provider.
1147    #[serde(default = "default_triage_timeout_secs")]
1148    pub triage_timeout_secs: u64,
1149
1150    /// Optional fallback strategy when triage misclassifies.
1151    /// Only `"cascade"` is currently supported (Phase 4).
1152    #[serde(default)]
1153    pub fallback_strategy: Option<String>,
1154}
1155
1156impl Default for ComplexityRoutingConfig {
1157    fn default() -> Self {
1158        Self {
1159            triage_provider: None,
1160            bypass_single_provider: true,
1161            tiers: TierMapping::default(),
1162            max_triage_tokens: default_max_triage_tokens(),
1163            triage_timeout_secs: default_triage_timeout_secs(),
1164            fallback_strategy: None,
1165        }
1166    }
1167}
1168
1169/// Configuration for the Collaborative Entropy (`CoE`) subsystem (`[llm.coe]` TOML section).
1170///
1171/// `CoE` detects uncertain responses from the primary provider and escalates to a
1172/// secondary provider when either the intra-entropy or inter-divergence signal crosses
1173/// its threshold. Only active for `RouterStrategy::Ema` and `RouterStrategy::Thompson`.
1174///
1175/// # Example
1176///
1177/// ```toml
1178/// [llm.coe]
1179/// enabled = true
1180/// intra_threshold = 0.8
1181/// inter_threshold = 0.20
1182/// shadow_sample_rate = 0.1
1183/// secondary_provider = "quality"
1184/// embed_provider = ""
1185/// ```
1186#[derive(Debug, Clone, Deserialize, Serialize)]
1187#[serde(default)]
1188pub struct CoeConfig {
1189    /// Enable `CoE`. When `false`, the struct is ignored.
1190    pub enabled: bool,
1191    /// Mean negative log-prob threshold; responses above this trigger intra escalation.
1192    pub intra_threshold: f64,
1193    /// Divergence threshold in `[0.0, 1.0]`.
1194    pub inter_threshold: f64,
1195    /// Baseline rate at which secondary is called even when intra is low.
1196    pub shadow_sample_rate: f64,
1197    /// Provider name from `[[llm.providers]]` used as the escalation target.
1198    pub secondary_provider: ProviderName,
1199    /// Provider name for inter-divergence embeddings. Empty → inherit bandit's embed provider.
1200    pub embed_provider: ProviderName,
1201}
1202
1203impl Default for CoeConfig {
1204    fn default() -> Self {
1205        Self {
1206            enabled: false,
1207            intra_threshold: 0.8,
1208            inter_threshold: 0.20,
1209            shadow_sample_rate: 0.1,
1210            secondary_provider: ProviderName::default(),
1211            embed_provider: ProviderName::default(),
1212        }
1213    }
1214}
1215
1216/// Inline candle config for use inside `ProviderEntry`.
1217/// Re-uses the generation params from `CandleConfig`.
1218#[derive(Debug, Clone, Deserialize, Serialize)]
1219pub struct CandleInlineConfig {
1220    #[serde(default = "default_candle_source")]
1221    pub source: String,
1222    #[serde(default)]
1223    pub local_path: String,
1224    #[serde(default)]
1225    pub filename: Option<String>,
1226    #[serde(default = "default_chat_template")]
1227    pub chat_template: String,
1228    #[serde(default = "default_candle_device")]
1229    pub device: String,
1230    #[serde(default)]
1231    pub embedding_repo: Option<String>,
1232    /// Resolved `HuggingFace` Hub API token for authenticated model downloads.
1233    #[serde(default)]
1234    pub hf_token: Option<String>,
1235    #[serde(default)]
1236    pub generation: GenerationParams,
1237    /// Maximum wall-clock seconds to wait for a single inference request.
1238    ///
1239    /// Effective timeout is `2 × inference_timeout_secs` (send + recv each have this budget).
1240    /// CPU inference can be slow; 120s is a conservative default. Floored at 1s.
1241    #[serde(default = "default_inference_timeout_secs")]
1242    pub inference_timeout_secs: u64,
1243}
1244
1245impl Default for CandleInlineConfig {
1246    fn default() -> Self {
1247        Self {
1248            source: default_candle_source(),
1249            local_path: String::new(),
1250            filename: None,
1251            chat_template: default_chat_template(),
1252            device: default_candle_device(),
1253            embedding_repo: None,
1254            hf_token: None,
1255            generation: GenerationParams::default(),
1256            inference_timeout_secs: default_inference_timeout_secs(),
1257        }
1258    }
1259}
1260
1261/// Unified provider entry: one struct replaces `CloudLlmConfig`, `OpenAiConfig`,
1262/// `GeminiConfig`, `OllamaConfig`, `CompatibleConfig`, and `OrchestratorProviderConfig`.
1263///
1264/// Provider-specific fields use `#[serde(default)]` and are ignored by backends
1265/// that do not use them (flat-union pattern).
1266#[derive(Debug, Clone, Deserialize, Serialize)]
1267#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
1268pub struct ProviderEntry {
1269    /// Required: provider backend type.
1270    #[serde(rename = "type")]
1271    pub provider_type: ProviderKind,
1272
1273    /// Optional name for multi-provider configs. Auto-generated from type if absent.
1274    #[serde(default)]
1275    pub name: Option<String>,
1276
1277    /// Model identifier. Required for most types.
1278    #[serde(default)]
1279    pub model: Option<String>,
1280
1281    /// API base URL. Each type has its own default.
1282    #[serde(default)]
1283    pub base_url: Option<String>,
1284
1285    /// Max output tokens.
1286    #[serde(default)]
1287    pub max_tokens: Option<u32>,
1288
1289    /// Embedding model. When set, this provider supports `embed()` calls.
1290    #[serde(default)]
1291    pub embedding_model: Option<String>,
1292
1293    /// STT model. When set, this provider supports speech-to-text via the Whisper API or
1294    /// Candle-local inference.
1295    #[serde(default)]
1296    pub stt_model: Option<String>,
1297
1298    /// Mark this entry as the embedding provider (handles `embed()` calls).
1299    #[serde(default)]
1300    pub embed: bool,
1301
1302    /// Mark this entry as the default chat provider (overrides position-based default).
1303    #[serde(default)]
1304    pub default: bool,
1305
1306    // --- Claude-specific ---
1307    #[serde(default)]
1308    pub thinking: Option<ThinkingConfig>,
1309    #[serde(default)]
1310    pub server_compaction: bool,
1311    #[serde(default)]
1312    pub enable_extended_context: bool,
1313    /// Prompt cache TTL variant. `None` keeps the default ~5-minute ephemeral TTL.
1314    /// Set to `"1h"` to enable the extended 1-hour TTL (beta, ~2× write cost).
1315    #[serde(default)]
1316    pub prompt_cache_ttl: Option<CacheTtl>,
1317
1318    // --- OpenAI-specific ---
1319    #[serde(default)]
1320    pub reasoning_effort: Option<String>,
1321
1322    // --- Gemini-specific ---
1323    #[serde(default)]
1324    pub thinking_level: Option<GeminiThinkingLevel>,
1325    #[serde(default)]
1326    pub thinking_budget: Option<i32>,
1327    #[serde(default)]
1328    pub include_thoughts: Option<bool>,
1329
1330    // --- Compatible-specific: optional inline api_key ---
1331    #[serde(default)]
1332    pub api_key: Option<String>,
1333
1334    // --- Candle-specific ---
1335    #[serde(default)]
1336    pub candle: Option<CandleInlineConfig>,
1337
1338    // --- Vision ---
1339    #[serde(default)]
1340    pub vision_model: Option<String>,
1341
1342    /// Provider-specific instruction file.
1343    #[serde(default)]
1344    pub instruction_file: Option<std::path::PathBuf>,
1345}
1346
1347impl Default for ProviderEntry {
1348    fn default() -> Self {
1349        Self {
1350            provider_type: ProviderKind::Ollama,
1351            name: None,
1352            model: None,
1353            base_url: None,
1354            max_tokens: None,
1355            embedding_model: None,
1356            stt_model: None,
1357            embed: false,
1358            default: false,
1359            thinking: None,
1360            server_compaction: false,
1361            enable_extended_context: false,
1362            prompt_cache_ttl: None,
1363            reasoning_effort: None,
1364            thinking_level: None,
1365            thinking_budget: None,
1366            include_thoughts: None,
1367            api_key: None,
1368            candle: None,
1369            vision_model: None,
1370            instruction_file: None,
1371        }
1372    }
1373}
1374
1375impl ProviderEntry {
1376    /// Resolve the effective name: explicit `name` field or type string.
1377    #[must_use]
1378    pub fn effective_name(&self) -> String {
1379        self.name
1380            .clone()
1381            .unwrap_or_else(|| self.provider_type.as_str().to_owned())
1382    }
1383
1384    /// Resolve the effective model: explicit `model` field or the provider-type default.
1385    ///
1386    /// Defaults mirror those used in `build_provider_from_entry` so that `runtime.model_name`
1387    /// always reflects the actual model being used rather than the provider type string.
1388    #[must_use]
1389    pub fn effective_model(&self) -> String {
1390        if let Some(ref m) = self.model {
1391            return m.clone();
1392        }
1393        match self.provider_type {
1394            ProviderKind::Ollama => "qwen3:8b".to_owned(),
1395            ProviderKind::Claude => "claude-haiku-4-5-20251001".to_owned(),
1396            ProviderKind::OpenAi => "gpt-4o-mini".to_owned(),
1397            ProviderKind::Gemini => "gemini-2.0-flash".to_owned(),
1398            ProviderKind::Compatible | ProviderKind::Candle => String::new(),
1399        }
1400    }
1401
1402    /// Validate this entry for cross-field consistency.
1403    ///
1404    /// # Errors
1405    ///
1406    /// Returns `ConfigError` when a fatal invariant is violated (e.g. compatible provider
1407    /// without a name).
1408    pub fn validate(&self) -> Result<(), crate::error::ConfigError> {
1409        use crate::error::ConfigError;
1410
1411        // B2: compatible provider MUST have name set.
1412        if self.provider_type == ProviderKind::Compatible && self.name.is_none() {
1413            return Err(ConfigError::Validation(
1414                "[[llm.providers]] entry with type=\"compatible\" must set `name`".into(),
1415            ));
1416        }
1417
1418        // B1: warn on irrelevant fields.
1419        match self.provider_type {
1420            ProviderKind::Ollama => {
1421                if self.thinking.is_some() {
1422                    tracing::warn!(
1423                        provider = self.effective_name(),
1424                        "field `thinking` is only used by Claude providers"
1425                    );
1426                }
1427                if self.reasoning_effort.is_some() {
1428                    tracing::warn!(
1429                        provider = self.effective_name(),
1430                        "field `reasoning_effort` is only used by OpenAI providers"
1431                    );
1432                }
1433                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1434                    tracing::warn!(
1435                        provider = self.effective_name(),
1436                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1437                    );
1438                }
1439            }
1440            ProviderKind::Claude => {
1441                if self.reasoning_effort.is_some() {
1442                    tracing::warn!(
1443                        provider = self.effective_name(),
1444                        "field `reasoning_effort` is only used by OpenAI providers"
1445                    );
1446                }
1447                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1448                    tracing::warn!(
1449                        provider = self.effective_name(),
1450                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1451                    );
1452                }
1453            }
1454            ProviderKind::OpenAi => {
1455                if self.thinking.is_some() {
1456                    tracing::warn!(
1457                        provider = self.effective_name(),
1458                        "field `thinking` is only used by Claude providers"
1459                    );
1460                }
1461                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1462                    tracing::warn!(
1463                        provider = self.effective_name(),
1464                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1465                    );
1466                }
1467            }
1468            ProviderKind::Gemini => {
1469                if self.thinking.is_some() {
1470                    tracing::warn!(
1471                        provider = self.effective_name(),
1472                        "field `thinking` is only used by Claude providers"
1473                    );
1474                }
1475                if self.reasoning_effort.is_some() {
1476                    tracing::warn!(
1477                        provider = self.effective_name(),
1478                        "field `reasoning_effort` is only used by OpenAI providers"
1479                    );
1480                }
1481            }
1482            _ => {}
1483        }
1484
1485        // W6: Candle STT-only provider (stt_model set, no model) is valid — no warning needed.
1486        // Warn if Ollama has stt_model set (Ollama does not support Whisper API).
1487        if self.stt_model.is_some() && self.provider_type == ProviderKind::Ollama {
1488            tracing::warn!(
1489                provider = self.effective_name(),
1490                "field `stt_model` is set on an Ollama provider; Ollama does not support the \
1491                 Whisper STT API — use OpenAI, compatible, or candle instead"
1492            );
1493        }
1494
1495        Ok(())
1496    }
1497}
1498
1499/// Validate a pool of `ProviderEntry` items.
1500///
1501/// # Errors
1502///
1503/// Returns `ConfigError` for fatal validation failures:
1504/// - Empty pool
1505/// - Duplicate names
1506/// - Multiple entries marked `default = true`
1507/// - Individual entry validation errors
1508pub fn validate_pool(entries: &[ProviderEntry]) -> Result<(), crate::error::ConfigError> {
1509    use crate::error::ConfigError;
1510    use std::collections::HashSet;
1511
1512    if entries.is_empty() {
1513        return Err(ConfigError::Validation(
1514            "at least one LLM provider must be configured in [[llm.providers]]".into(),
1515        ));
1516    }
1517
1518    let default_count = entries.iter().filter(|e| e.default).count();
1519    if default_count > 1 {
1520        return Err(ConfigError::Validation(
1521            "only one [[llm.providers]] entry can be marked `default = true`".into(),
1522        ));
1523    }
1524
1525    let mut seen_names: HashSet<String> = HashSet::new();
1526    for entry in entries {
1527        let name = entry.effective_name();
1528        if !seen_names.insert(name.clone()) {
1529            return Err(ConfigError::Validation(format!(
1530                "duplicate provider name \"{name}\" in [[llm.providers]]"
1531            )));
1532        }
1533        entry.validate()?;
1534    }
1535
1536    Ok(())
1537}
1538
1539#[cfg(test)]
1540mod tests {
1541    use super::*;
1542
1543    fn ollama_entry() -> ProviderEntry {
1544        ProviderEntry {
1545            provider_type: ProviderKind::Ollama,
1546            name: Some("ollama".into()),
1547            model: Some("qwen3:8b".into()),
1548            ..Default::default()
1549        }
1550    }
1551
1552    fn claude_entry() -> ProviderEntry {
1553        ProviderEntry {
1554            provider_type: ProviderKind::Claude,
1555            name: Some("claude".into()),
1556            model: Some("claude-sonnet-4-6".into()),
1557            max_tokens: Some(8192),
1558            ..Default::default()
1559        }
1560    }
1561
1562    // ─── ProviderEntry::validate ─────────────────────────────────────────────
1563
1564    #[test]
1565    fn validate_ollama_valid() {
1566        assert!(ollama_entry().validate().is_ok());
1567    }
1568
1569    #[test]
1570    fn validate_claude_valid() {
1571        assert!(claude_entry().validate().is_ok());
1572    }
1573
1574    #[test]
1575    fn validate_compatible_without_name_errors() {
1576        let entry = ProviderEntry {
1577            provider_type: ProviderKind::Compatible,
1578            name: None,
1579            ..Default::default()
1580        };
1581        let err = entry.validate().unwrap_err();
1582        assert!(
1583            err.to_string().contains("compatible"),
1584            "error should mention compatible: {err}"
1585        );
1586    }
1587
1588    #[test]
1589    fn validate_compatible_with_name_ok() {
1590        let entry = ProviderEntry {
1591            provider_type: ProviderKind::Compatible,
1592            name: Some("my-proxy".into()),
1593            base_url: Some("http://localhost:8080".into()),
1594            model: Some("gpt-4o".into()),
1595            max_tokens: Some(4096),
1596            ..Default::default()
1597        };
1598        assert!(entry.validate().is_ok());
1599    }
1600
1601    #[test]
1602    fn validate_openai_valid() {
1603        let entry = ProviderEntry {
1604            provider_type: ProviderKind::OpenAi,
1605            name: Some("openai".into()),
1606            model: Some("gpt-4o".into()),
1607            max_tokens: Some(4096),
1608            ..Default::default()
1609        };
1610        assert!(entry.validate().is_ok());
1611    }
1612
1613    #[test]
1614    fn validate_gemini_valid() {
1615        let entry = ProviderEntry {
1616            provider_type: ProviderKind::Gemini,
1617            name: Some("gemini".into()),
1618            model: Some("gemini-2.0-flash".into()),
1619            ..Default::default()
1620        };
1621        assert!(entry.validate().is_ok());
1622    }
1623
1624    // ─── validate_pool ───────────────────────────────────────────────────────
1625
1626    #[test]
1627    fn validate_pool_empty_errors() {
1628        let err = validate_pool(&[]).unwrap_err();
1629        assert!(err.to_string().contains("at least one"), "{err}");
1630    }
1631
1632    #[test]
1633    fn validate_pool_single_entry_ok() {
1634        assert!(validate_pool(&[ollama_entry()]).is_ok());
1635    }
1636
1637    #[test]
1638    fn validate_pool_duplicate_names_errors() {
1639        let a = ollama_entry();
1640        let b = ollama_entry(); // same effective name "ollama"
1641        let err = validate_pool(&[a, b]).unwrap_err();
1642        assert!(err.to_string().contains("duplicate"), "{err}");
1643    }
1644
1645    #[test]
1646    fn validate_pool_multiple_defaults_errors() {
1647        let mut a = ollama_entry();
1648        let mut b = claude_entry();
1649        a.default = true;
1650        b.default = true;
1651        let err = validate_pool(&[a, b]).unwrap_err();
1652        assert!(err.to_string().contains("default"), "{err}");
1653    }
1654
1655    #[test]
1656    fn validate_pool_two_different_providers_ok() {
1657        assert!(validate_pool(&[ollama_entry(), claude_entry()]).is_ok());
1658    }
1659
1660    #[test]
1661    fn validate_pool_propagates_entry_error() {
1662        let bad = ProviderEntry {
1663            provider_type: ProviderKind::Compatible,
1664            name: None, // invalid: compatible without name
1665            ..Default::default()
1666        };
1667        assert!(validate_pool(&[bad]).is_err());
1668    }
1669
1670    // ─── ProviderEntry::effective_model ──────────────────────────────────────
1671
1672    #[test]
1673    fn effective_model_returns_explicit_when_set() {
1674        let entry = ProviderEntry {
1675            provider_type: ProviderKind::Claude,
1676            model: Some("claude-sonnet-4-6".into()),
1677            ..Default::default()
1678        };
1679        assert_eq!(entry.effective_model(), "claude-sonnet-4-6");
1680    }
1681
1682    #[test]
1683    fn effective_model_ollama_default_when_none() {
1684        let entry = ProviderEntry {
1685            provider_type: ProviderKind::Ollama,
1686            model: None,
1687            ..Default::default()
1688        };
1689        assert_eq!(entry.effective_model(), "qwen3:8b");
1690    }
1691
1692    #[test]
1693    fn effective_model_claude_default_when_none() {
1694        let entry = ProviderEntry {
1695            provider_type: ProviderKind::Claude,
1696            model: None,
1697            ..Default::default()
1698        };
1699        assert_eq!(entry.effective_model(), "claude-haiku-4-5-20251001");
1700    }
1701
1702    #[test]
1703    fn effective_model_openai_default_when_none() {
1704        let entry = ProviderEntry {
1705            provider_type: ProviderKind::OpenAi,
1706            model: None,
1707            ..Default::default()
1708        };
1709        assert_eq!(entry.effective_model(), "gpt-4o-mini");
1710    }
1711
1712    #[test]
1713    fn effective_model_gemini_default_when_none() {
1714        let entry = ProviderEntry {
1715            provider_type: ProviderKind::Gemini,
1716            model: None,
1717            ..Default::default()
1718        };
1719        assert_eq!(entry.effective_model(), "gemini-2.0-flash");
1720    }
1721
1722    // ─── LlmConfig::check_legacy_format ──────────────────────────────────────
1723
1724    // Parse a complete TOML snippet that includes the [llm] header.
1725    fn parse_llm(toml: &str) -> LlmConfig {
1726        #[derive(serde::Deserialize)]
1727        struct Wrapper {
1728            llm: LlmConfig,
1729        }
1730        toml::from_str::<Wrapper>(toml).unwrap().llm
1731    }
1732
1733    #[test]
1734    fn check_legacy_format_new_format_ok() {
1735        let cfg = parse_llm(
1736            r#"
1737[llm]
1738
1739[[llm.providers]]
1740type = "ollama"
1741model = "qwen3:8b"
1742"#,
1743        );
1744        assert!(cfg.check_legacy_format().is_ok());
1745    }
1746
1747    #[test]
1748    fn check_legacy_format_empty_providers_no_legacy_ok() {
1749        // No providers, no legacy fields — passes (empty [llm] is acceptable here)
1750        let cfg = parse_llm("[llm]\n");
1751        assert!(cfg.check_legacy_format().is_ok());
1752    }
1753
1754    // ─── LlmConfig::effective_* helpers ──────────────────────────────────────
1755
1756    #[test]
1757    fn effective_provider_falls_back_to_ollama_when_no_providers() {
1758        let cfg = parse_llm("[llm]\n");
1759        assert_eq!(cfg.effective_provider(), ProviderKind::Ollama);
1760    }
1761
1762    #[test]
1763    fn effective_provider_reads_from_providers_first() {
1764        let cfg = parse_llm(
1765            r#"
1766[llm]
1767
1768[[llm.providers]]
1769type = "claude"
1770model = "claude-sonnet-4-6"
1771"#,
1772        );
1773        assert_eq!(cfg.effective_provider(), ProviderKind::Claude);
1774    }
1775
1776    #[test]
1777    fn effective_model_reads_from_providers_first() {
1778        let cfg = parse_llm(
1779            r#"
1780[llm]
1781
1782[[llm.providers]]
1783type = "ollama"
1784model = "qwen3:8b"
1785"#,
1786        );
1787        assert_eq!(cfg.effective_model(), "qwen3:8b");
1788    }
1789
1790    #[test]
1791    fn effective_model_skips_embed_only_provider() {
1792        let cfg = parse_llm(
1793            r#"
1794[llm]
1795
1796[[llm.providers]]
1797type = "ollama"
1798model = "gemma4:26b"
1799embed = true
1800
1801[[llm.providers]]
1802type = "openai"
1803model = "gpt-4o-mini"
1804"#,
1805        );
1806        assert_eq!(cfg.effective_model(), "gpt-4o-mini");
1807    }
1808
1809    #[test]
1810    fn effective_base_url_default_when_absent() {
1811        let cfg = parse_llm("[llm]\n");
1812        assert_eq!(cfg.effective_base_url(), "http://localhost:11434");
1813    }
1814
1815    #[test]
1816    fn effective_base_url_from_providers_entry() {
1817        let cfg = parse_llm(
1818            r#"
1819[llm]
1820
1821[[llm.providers]]
1822type = "ollama"
1823base_url = "http://myhost:11434"
1824"#,
1825        );
1826        assert_eq!(cfg.effective_base_url(), "http://myhost:11434");
1827    }
1828
1829    // ─── ComplexityRoutingConfig / LlmRoutingStrategy::Triage TOML parsing ──
1830
1831    #[test]
1832    fn complexity_routing_defaults() {
1833        let cr = ComplexityRoutingConfig::default();
1834        assert!(
1835            cr.bypass_single_provider,
1836            "bypass_single_provider must default to true"
1837        );
1838        assert_eq!(cr.triage_timeout_secs, 5);
1839        assert_eq!(cr.max_triage_tokens, 50);
1840        assert!(cr.triage_provider.is_none());
1841        assert!(cr.tiers.simple.is_none());
1842    }
1843
1844    #[test]
1845    fn complexity_routing_toml_round_trip() {
1846        let cfg = parse_llm(
1847            r#"
1848[llm]
1849routing = "triage"
1850
1851[llm.complexity_routing]
1852triage_provider = "fast"
1853bypass_single_provider = false
1854triage_timeout_secs = 10
1855max_triage_tokens = 100
1856
1857[llm.complexity_routing.tiers]
1858simple = "fast"
1859medium = "medium"
1860complex = "large"
1861expert = "opus"
1862"#,
1863        );
1864        assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
1865        let cr = cfg
1866            .complexity_routing
1867            .expect("complexity_routing must be present");
1868        assert_eq!(cr.triage_provider.as_deref(), Some("fast"));
1869        assert!(!cr.bypass_single_provider);
1870        assert_eq!(cr.triage_timeout_secs, 10);
1871        assert_eq!(cr.max_triage_tokens, 100);
1872        assert_eq!(cr.tiers.simple.as_deref(), Some("fast"));
1873        assert_eq!(cr.tiers.medium.as_deref(), Some("medium"));
1874        assert_eq!(cr.tiers.complex.as_deref(), Some("large"));
1875        assert_eq!(cr.tiers.expert.as_deref(), Some("opus"));
1876    }
1877
1878    #[test]
1879    fn complexity_routing_partial_tiers_toml() {
1880        // Only simple + complex configured; medium and expert are None.
1881        let cfg = parse_llm(
1882            r#"
1883[llm]
1884routing = "triage"
1885
1886[llm.complexity_routing.tiers]
1887simple = "haiku"
1888complex = "sonnet"
1889"#,
1890        );
1891        let cr = cfg
1892            .complexity_routing
1893            .expect("complexity_routing must be present");
1894        assert_eq!(cr.tiers.simple.as_deref(), Some("haiku"));
1895        assert!(cr.tiers.medium.is_none());
1896        assert_eq!(cr.tiers.complex.as_deref(), Some("sonnet"));
1897        assert!(cr.tiers.expert.is_none());
1898        // Defaults still applied.
1899        assert!(cr.bypass_single_provider);
1900        assert_eq!(cr.triage_timeout_secs, 5);
1901    }
1902
1903    #[test]
1904    fn routing_strategy_triage_deserialized() {
1905        let cfg = parse_llm(
1906            r#"
1907[llm]
1908routing = "triage"
1909"#,
1910        );
1911        assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
1912    }
1913
1914    // ─── stt_provider_entry ───────────────────────────────────────────────────
1915
1916    #[test]
1917    fn stt_provider_entry_by_name_match() {
1918        let cfg = parse_llm(
1919            r#"
1920[llm]
1921
1922[[llm.providers]]
1923type = "openai"
1924name = "quality"
1925model = "gpt-5.4"
1926stt_model = "gpt-4o-mini-transcribe"
1927
1928[llm.stt]
1929provider = "quality"
1930"#,
1931        );
1932        let entry = cfg.stt_provider_entry().expect("should find stt provider");
1933        assert_eq!(entry.effective_name(), "quality");
1934        assert_eq!(entry.stt_model.as_deref(), Some("gpt-4o-mini-transcribe"));
1935    }
1936
1937    #[test]
1938    fn stt_provider_entry_auto_detect_when_provider_empty() {
1939        let cfg = parse_llm(
1940            r#"
1941[llm]
1942
1943[[llm.providers]]
1944type = "openai"
1945name = "openai-stt"
1946stt_model = "whisper-1"
1947
1948[llm.stt]
1949provider = ""
1950"#,
1951        );
1952        let entry = cfg.stt_provider_entry().expect("should auto-detect");
1953        assert_eq!(entry.effective_name(), "openai-stt");
1954    }
1955
1956    #[test]
1957    fn stt_provider_entry_auto_detect_no_stt_section() {
1958        let cfg = parse_llm(
1959            r#"
1960[llm]
1961
1962[[llm.providers]]
1963type = "openai"
1964name = "openai-stt"
1965stt_model = "whisper-1"
1966"#,
1967        );
1968        // No [llm.stt] section — should still find first provider with stt_model.
1969        let entry = cfg.stt_provider_entry().expect("should auto-detect");
1970        assert_eq!(entry.effective_name(), "openai-stt");
1971    }
1972
1973    #[test]
1974    fn stt_provider_entry_none_when_no_stt_model() {
1975        let cfg = parse_llm(
1976            r#"
1977[llm]
1978
1979[[llm.providers]]
1980type = "openai"
1981name = "quality"
1982model = "gpt-5.4"
1983"#,
1984        );
1985        assert!(cfg.stt_provider_entry().is_none());
1986    }
1987
1988    #[test]
1989    fn stt_provider_entry_name_mismatch_falls_back_to_none() {
1990        // Named provider exists but has no stt_model; another unnamed has stt_model.
1991        let cfg = parse_llm(
1992            r#"
1993[llm]
1994
1995[[llm.providers]]
1996type = "openai"
1997name = "quality"
1998model = "gpt-5.4"
1999
2000[[llm.providers]]
2001type = "openai"
2002name = "openai-stt"
2003stt_model = "whisper-1"
2004
2005[llm.stt]
2006provider = "quality"
2007"#,
2008        );
2009        // "quality" has no stt_model — returns None for name-based lookup.
2010        assert!(cfg.stt_provider_entry().is_none());
2011    }
2012
2013    #[test]
2014    fn stt_config_deserializes_new_slim_format() {
2015        let cfg = parse_llm(
2016            r#"
2017[llm]
2018
2019[[llm.providers]]
2020type = "openai"
2021name = "quality"
2022stt_model = "whisper-1"
2023
2024[llm.stt]
2025provider = "quality"
2026language = "en"
2027"#,
2028        );
2029        let stt = cfg.stt.as_ref().expect("stt section present");
2030        assert_eq!(stt.provider, "quality");
2031        assert_eq!(stt.language, "en");
2032    }
2033
2034    #[test]
2035    fn stt_config_default_provider_is_empty() {
2036        // Verify that W4 fix: default_stt_provider() returns "" not "whisper".
2037        assert_eq!(default_stt_provider(), "");
2038    }
2039
2040    #[test]
2041    fn validate_stt_missing_provider_ok() {
2042        let cfg = parse_llm("[llm]\n");
2043        assert!(cfg.validate_stt().is_ok());
2044    }
2045
2046    #[test]
2047    fn validate_stt_valid_reference() {
2048        let cfg = parse_llm(
2049            r#"
2050[llm]
2051
2052[[llm.providers]]
2053type = "openai"
2054name = "quality"
2055stt_model = "whisper-1"
2056
2057[llm.stt]
2058provider = "quality"
2059"#,
2060        );
2061        assert!(cfg.validate_stt().is_ok());
2062    }
2063
2064    #[test]
2065    fn validate_stt_nonexistent_provider_errors() {
2066        let cfg = parse_llm(
2067            r#"
2068[llm]
2069
2070[[llm.providers]]
2071type = "openai"
2072name = "quality"
2073model = "gpt-5.4"
2074
2075[llm.stt]
2076provider = "nonexistent"
2077"#,
2078        );
2079        assert!(cfg.validate_stt().is_err());
2080    }
2081
2082    #[test]
2083    fn validate_stt_provider_exists_but_no_stt_model_returns_ok_with_warn() {
2084        // MEDIUM: provider is found but has no stt_model — should return Ok (warn path, not error).
2085        let cfg = parse_llm(
2086            r#"
2087[llm]
2088
2089[[llm.providers]]
2090type = "openai"
2091name = "quality"
2092model = "gpt-5.4"
2093
2094[llm.stt]
2095provider = "quality"
2096"#,
2097        );
2098        // validate_stt must succeed (only a tracing::warn is emitted — not an error).
2099        assert!(cfg.validate_stt().is_ok());
2100        // stt_provider_entry must return None because no stt_model is set.
2101        assert!(
2102            cfg.stt_provider_entry().is_none(),
2103            "stt_provider_entry must be None when provider has no stt_model"
2104        );
2105    }
2106
2107    // ─── BanditConfig::warmup_queries deserialization ─────────────────────────
2108
2109    #[test]
2110    fn bandit_warmup_queries_explicit_value_is_deserialized() {
2111        let cfg = parse_llm(
2112            r#"
2113[llm]
2114
2115[llm.router]
2116strategy = "bandit"
2117
2118[llm.router.bandit]
2119warmup_queries = 50
2120"#,
2121        );
2122        let bandit = cfg
2123            .router
2124            .expect("router section must be present")
2125            .bandit
2126            .expect("bandit section must be present");
2127        assert_eq!(
2128            bandit.warmup_queries,
2129            Some(50),
2130            "warmup_queries = 50 must deserialize to Some(50)"
2131        );
2132    }
2133
2134    #[test]
2135    fn bandit_warmup_queries_explicit_null_is_none() {
2136        // Explicitly writing the field as absent: field simply not present is
2137        // equivalent due to #[serde(default)]. Test that an explicit 0 is Some(0).
2138        let cfg = parse_llm(
2139            r#"
2140[llm]
2141
2142[llm.router]
2143strategy = "bandit"
2144
2145[llm.router.bandit]
2146warmup_queries = 0
2147"#,
2148        );
2149        let bandit = cfg
2150            .router
2151            .expect("router section must be present")
2152            .bandit
2153            .expect("bandit section must be present");
2154        // 0 is a valid explicit value — it means "preserve computed default".
2155        assert_eq!(
2156            bandit.warmup_queries,
2157            Some(0),
2158            "warmup_queries = 0 must deserialize to Some(0)"
2159        );
2160    }
2161
2162    #[test]
2163    fn bandit_warmup_queries_missing_field_defaults_to_none() {
2164        // When warmup_queries is omitted entirely, #[serde(default)] must produce None.
2165        let cfg = parse_llm(
2166            r#"
2167[llm]
2168
2169[llm.router]
2170strategy = "bandit"
2171
2172[llm.router.bandit]
2173alpha = 1.5
2174"#,
2175        );
2176        let bandit = cfg
2177            .router
2178            .expect("router section must be present")
2179            .bandit
2180            .expect("bandit section must be present");
2181        assert_eq!(
2182            bandit.warmup_queries, None,
2183            "omitted warmup_queries must default to None"
2184        );
2185    }
2186
2187    #[test]
2188    fn provider_name_new_and_as_str() {
2189        let n = ProviderName::new("fast");
2190        assert_eq!(n.as_str(), "fast");
2191        assert!(!n.is_empty());
2192    }
2193
2194    #[test]
2195    fn provider_name_default_is_empty() {
2196        let n = ProviderName::default();
2197        assert!(n.is_empty());
2198        assert_eq!(n.as_str(), "");
2199    }
2200
2201    #[test]
2202    fn provider_name_deref_to_str() {
2203        let n = ProviderName::new("quality");
2204        let s: &str = &n;
2205        assert_eq!(s, "quality");
2206    }
2207
2208    #[test]
2209    fn provider_name_partial_eq_str() {
2210        let n = ProviderName::new("fast");
2211        assert_eq!(n, "fast");
2212        assert_ne!(n, "slow");
2213    }
2214
2215    #[test]
2216    fn provider_name_serde_roundtrip() {
2217        let n = ProviderName::new("my-provider");
2218        let json = serde_json::to_string(&n).expect("serialize");
2219        assert_eq!(json, "\"my-provider\"");
2220        let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2221        assert_eq!(back, n);
2222    }
2223
2224    #[test]
2225    fn provider_name_serde_empty_roundtrip() {
2226        let n = ProviderName::default();
2227        let json = serde_json::to_string(&n).expect("serialize");
2228        assert_eq!(json, "\"\"");
2229        let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2230        assert_eq!(back, n);
2231        assert!(back.is_empty());
2232    }
2233}