zeph_config/
providers.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::fmt;
5
6use serde::{Deserialize, Serialize};
7
8// ── LLM provider config types (moved from zeph-llm) ─────────────────────────
9
10/// Extended or adaptive thinking mode for Claude.
11///
12/// Serializes with `mode` as tag:
13/// `{ "mode": "extended", "budget_tokens": 10000 }` or `{ "mode": "adaptive" }`.
14#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
15#[serde(tag = "mode", rename_all = "snake_case")]
16pub enum ThinkingConfig {
17    /// Extended thinking with an explicit token budget.
18    Extended {
19        /// Maximum thinking tokens to allocate.
20        budget_tokens: u32,
21    },
22    /// Adaptive thinking that selects effort automatically.
23    Adaptive {
24        /// Explicit effort hint when provided; model-chosen when `None`.
25        #[serde(default, skip_serializing_if = "Option::is_none")]
26        effort: Option<ThinkingEffort>,
27    },
28}
29
30/// Effort level for adaptive thinking.
31#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
32#[serde(rename_all = "lowercase")]
33pub enum ThinkingEffort {
34    /// Minimal thinking; fastest responses.
35    Low,
36    /// Balanced thinking depth. This is the default.
37    #[default]
38    Medium,
39    /// Maximum thinking depth; slowest responses.
40    High,
41}
42
43/// Prompt-cache TTL variant for the Anthropic API.
44///
45/// When used as a TOML config value the accepted strings are `"ephemeral"` and `"1h"`.
46/// On the wire (Anthropic API), `OneHour` serializes as `"1h"` inside the `cache_control.ttl`
47/// field.
48#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Default)]
49#[serde(rename_all = "snake_case")]
50pub enum CacheTtl {
51    /// Default ephemeral TTL (~5 minutes). No beta header required.
52    #[default]
53    Ephemeral,
54    /// Extended 1-hour TTL. Requires the `extended-cache-ttl-2025-04-25` beta header.
55    /// Cache writes cost approximately 2× more than `Ephemeral`.
56    #[serde(rename = "1h")]
57    OneHour,
58}
59
60impl CacheTtl {
61    /// Returns `true` when this TTL variant requires the `extended-cache-ttl-2025-04-25` beta
62    /// header to be sent with each request.
63    #[must_use]
64    pub fn requires_beta(self) -> bool {
65        match self {
66            Self::OneHour => true,
67            Self::Ephemeral => false,
68        }
69    }
70}
71
72/// Thinking level for Gemini models that support extended reasoning.
73///
74/// Maps to `generationConfig.thinkingConfig.thinkingLevel` in the Gemini API.
75/// Valid for Gemini 3+ models. For Gemini 2.5, use `thinking_budget` instead.
76#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
77#[serde(rename_all = "lowercase")]
78pub enum GeminiThinkingLevel {
79    /// Minimal reasoning pass.
80    Minimal,
81    /// Low reasoning depth.
82    Low,
83    /// Medium reasoning depth.
84    Medium,
85    /// Full reasoning depth.
86    High,
87}
88
89/// Newtype wrapper for a provider name referencing an entry in `[[llm.providers]]`.
90///
91/// Using a dedicated type instead of bare `String` makes provider cross-references
92/// explicit in the type system and enables validation at config load time.
93///
94/// # Note
95///
96/// `zeph-common` now defines a canonical `ProviderName(Arc<str>)` newtype. This
97/// config-local type uses `String` and exists for backward compat within `zeph-config`.
98///
99/// TODO(critic): migrate to `zeph_common::ProviderName` once `zeph-config` → `zeph-common`
100/// dependency inversion (A-1) lands.
101#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
102#[serde(transparent)]
103pub struct ProviderName(String);
104
105impl ProviderName {
106    /// Create a new `ProviderName` from any string-like value.
107    ///
108    /// An empty string is a sentinel meaning "use the primary provider" and is the
109    /// default value. Check [`is_empty`](Self::is_empty) before using in routing.
110    ///
111    /// # Examples
112    ///
113    /// ```
114    /// use zeph_config::providers::ProviderName;
115    ///
116    /// let name = ProviderName::new("fast");
117    /// assert_eq!(name.as_str(), "fast");
118    /// ```
119    #[must_use]
120    pub fn new(name: impl Into<String>) -> Self {
121        Self(name.into())
122    }
123
124    /// Return `true` when this is the empty sentinel (use primary provider).
125    ///
126    /// # Examples
127    ///
128    /// ```
129    /// use zeph_config::providers::ProviderName;
130    ///
131    /// assert!(ProviderName::default().is_empty());
132    /// assert!(!ProviderName::new("fast").is_empty());
133    /// ```
134    #[must_use]
135    pub fn is_empty(&self) -> bool {
136        self.0.is_empty()
137    }
138
139    /// Return the inner string slice.
140    ///
141    /// # Examples
142    ///
143    /// ```
144    /// use zeph_config::providers::ProviderName;
145    ///
146    /// let name = ProviderName::new("quality");
147    /// assert_eq!(name.as_str(), "quality");
148    /// ```
149    #[must_use]
150    pub fn as_str(&self) -> &str {
151        &self.0
152    }
153
154    /// Return `Some(&str)` when non-empty, `None` for the empty sentinel.
155    ///
156    /// Bridges `Option<ProviderName>` fields and the legacy
157    /// `.as_deref().filter(|s| !s.is_empty())` pattern.
158    ///
159    /// # Examples
160    ///
161    /// ```
162    /// use zeph_config::providers::ProviderName;
163    ///
164    /// assert_eq!(ProviderName::default().as_non_empty(), None);
165    /// assert_eq!(ProviderName::new("fast").as_non_empty(), Some("fast"));
166    /// ```
167    #[must_use]
168    pub fn as_non_empty(&self) -> Option<&str> {
169        if self.0.is_empty() {
170            None
171        } else {
172            Some(&self.0)
173        }
174    }
175}
176
177impl fmt::Display for ProviderName {
178    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179        self.0.fmt(f)
180    }
181}
182
183impl AsRef<str> for ProviderName {
184    fn as_ref(&self) -> &str {
185        &self.0
186    }
187}
188
189impl std::ops::Deref for ProviderName {
190    type Target = str;
191
192    fn deref(&self) -> &str {
193        &self.0
194    }
195}
196
197impl PartialEq<str> for ProviderName {
198    fn eq(&self, other: &str) -> bool {
199        self.0 == other
200    }
201}
202
203impl PartialEq<&str> for ProviderName {
204    fn eq(&self, other: &&str) -> bool {
205        self.0 == *other
206    }
207}
208
209fn default_response_cache_ttl_secs() -> u64 {
210    3600
211}
212
213fn default_semantic_cache_threshold() -> f32 {
214    0.95
215}
216
217fn default_semantic_cache_max_candidates() -> u32 {
218    10
219}
220
221fn default_router_ema_alpha() -> f64 {
222    0.1
223}
224
225fn default_router_reorder_interval() -> u64 {
226    10
227}
228
229fn default_embedding_model() -> String {
230    "qwen3-embedding".into()
231}
232
233fn default_candle_source() -> String {
234    "huggingface".into()
235}
236
237fn default_chat_template() -> String {
238    "chatml".into()
239}
240
241fn default_candle_device() -> String {
242    "cpu".into()
243}
244
245fn default_temperature() -> f64 {
246    0.7
247}
248
249fn default_max_tokens() -> usize {
250    2048
251}
252
253fn default_seed() -> u64 {
254    42
255}
256
257fn default_repeat_penalty() -> f32 {
258    1.1
259}
260
261fn default_repeat_last_n() -> usize {
262    64
263}
264
265fn default_cascade_quality_threshold() -> f64 {
266    0.5
267}
268
269fn default_cascade_max_escalations() -> u8 {
270    2
271}
272
273fn default_cascade_window_size() -> usize {
274    50
275}
276
277fn default_reputation_decay_factor() -> f64 {
278    0.95
279}
280
281fn default_reputation_weight() -> f64 {
282    0.3
283}
284
285fn default_reputation_min_observations() -> u64 {
286    5
287}
288
289/// Returns the default STT provider name (empty string — auto-detect).
290#[must_use]
291pub fn default_stt_provider() -> String {
292    String::new()
293}
294
295/// Returns the default STT transcription language hint (`"auto"`).
296#[must_use]
297pub fn default_stt_language() -> String {
298    "auto".into()
299}
300
301/// Returns the default embedding model name used by `[llm] embedding_model`.
302#[must_use]
303pub fn get_default_embedding_model() -> String {
304    default_embedding_model()
305}
306
307/// Returns the default response cache TTL in seconds.
308#[must_use]
309pub fn get_default_response_cache_ttl_secs() -> u64 {
310    default_response_cache_ttl_secs()
311}
312
313/// Returns the default EMA alpha for the router latency estimator.
314#[must_use]
315pub fn get_default_router_ema_alpha() -> f64 {
316    default_router_ema_alpha()
317}
318
319/// Returns the default router reorder interval (turns between provider re-ranking).
320#[must_use]
321pub fn get_default_router_reorder_interval() -> u64 {
322    default_router_reorder_interval()
323}
324
325/// LLM provider backend selector.
326///
327/// Used in `[[llm.providers]]` entries as the `type` field.
328///
329/// # Example (TOML)
330///
331/// ```toml
332/// [[llm.providers]]
333/// type = "openai"
334/// model = "gpt-4o"
335/// name = "quality"
336/// ```
337#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
338#[serde(rename_all = "lowercase")]
339pub enum ProviderKind {
340    /// Local Ollama server (default base URL: `http://localhost:11434`).
341    Ollama,
342    /// Anthropic Claude API.
343    Claude,
344    /// `OpenAI` API.
345    OpenAi,
346    /// Google Gemini API.
347    Gemini,
348    /// Local Candle inference (CPU/GPU, no external server required).
349    Candle,
350    /// OpenAI-compatible third-party API (e.g. Groq, Together AI, LM Studio).
351    Compatible,
352    /// Native Gonka blockchain provider.
353    Gonka,
354}
355
356impl ProviderKind {
357    /// Return the lowercase string identifier for this provider kind.
358    ///
359    /// # Examples
360    ///
361    /// ```
362    /// use zeph_config::ProviderKind;
363    ///
364    /// assert_eq!(ProviderKind::Claude.as_str(), "claude");
365    /// assert_eq!(ProviderKind::OpenAi.as_str(), "openai");
366    /// ```
367    #[must_use]
368    pub fn as_str(self) -> &'static str {
369        match self {
370            Self::Ollama => "ollama",
371            Self::Claude => "claude",
372            Self::OpenAi => "openai",
373            Self::Gemini => "gemini",
374            Self::Candle => "candle",
375            Self::Compatible => "compatible",
376            Self::Gonka => "gonka",
377        }
378    }
379}
380
381impl std::fmt::Display for ProviderKind {
382    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
383        f.write_str(self.as_str())
384    }
385}
386
387/// LLM configuration, nested under `[llm]` in TOML.
388///
389/// Declares the provider pool and controls routing, embedding, caching, and STT.
390/// All providers are declared in `[[llm.providers]]`; subsystems reference them by
391/// the `name` field using a `*_provider` config key.
392///
393/// # Example (TOML)
394///
395/// ```toml
396/// [[llm.providers]]
397/// name = "fast"
398/// type = "openai"
399/// model = "gpt-4o-mini"
400///
401/// [[llm.providers]]
402/// name = "quality"
403/// type = "claude"
404/// model = "claude-opus-4-5"
405///
406/// [llm]
407/// routing = "none"
408/// embedding_model = "qwen3-embedding"
409/// ```
410#[derive(Debug, Deserialize, Serialize)]
411pub struct LlmConfig {
412    /// Provider pool. First entry is default unless one is marked `default = true`.
413    #[serde(default, skip_serializing_if = "Vec::is_empty")]
414    pub providers: Vec<ProviderEntry>,
415
416    /// Routing strategy for multi-provider configs.
417    #[serde(default, skip_serializing_if = "is_routing_none")]
418    pub routing: LlmRoutingStrategy,
419
420    #[serde(default = "default_embedding_model_opt")]
421    pub embedding_model: String,
422    #[serde(default, skip_serializing_if = "Option::is_none")]
423    pub candle: Option<CandleConfig>,
424    #[serde(default)]
425    pub stt: Option<SttConfig>,
426    #[serde(default)]
427    pub response_cache_enabled: bool,
428    #[serde(default = "default_response_cache_ttl_secs")]
429    pub response_cache_ttl_secs: u64,
430    /// Enable semantic similarity-based response caching. Requires embedding support.
431    #[serde(default)]
432    pub semantic_cache_enabled: bool,
433    /// Cosine similarity threshold for semantic cache hits (0.0–1.0).
434    ///
435    /// Only the highest-scoring candidate above this threshold is returned.
436    /// Lower values produce more cache hits but risk returning less relevant responses.
437    /// Recommended range: 0.92–0.98; default: 0.95.
438    #[serde(default = "default_semantic_cache_threshold")]
439    pub semantic_cache_threshold: f32,
440    /// Maximum cached entries to examine per semantic lookup (SQL `LIMIT` clause in
441    /// `ResponseCache::get_semantic()`). Controls the recall-vs-performance tradeoff:
442    ///
443    /// - **Higher values** (e.g. 50): scan more entries, better chance of finding a
444    ///   semantically similar cached response, but slower queries.
445    /// - **Lower values** (e.g. 5): faster queries, but may miss relevant cached entries
446    ///   when the cache is large.
447    /// - **Default (10)**: balanced middle ground for typical workloads.
448    ///
449    /// Tuning guidance: set to 50+ when recall matters more than latency (e.g. long-running
450    /// sessions with many cached responses); reduce to 5 for low-latency interactive use.
451    /// Env override: `ZEPH_LLM_SEMANTIC_CACHE_MAX_CANDIDATES`.
452    #[serde(default = "default_semantic_cache_max_candidates")]
453    pub semantic_cache_max_candidates: u32,
454    #[serde(default)]
455    pub router_ema_enabled: bool,
456    #[serde(default = "default_router_ema_alpha")]
457    pub router_ema_alpha: f64,
458    #[serde(default = "default_router_reorder_interval")]
459    pub router_reorder_interval: u64,
460    /// Routing configuration for Thompson/Cascade strategies.
461    #[serde(default, skip_serializing_if = "Option::is_none")]
462    pub router: Option<RouterConfig>,
463    /// Provider-specific instruction file to inject into the system prompt.
464    /// Merged with `agent.instruction_files` at startup.
465    #[serde(default, skip_serializing_if = "Option::is_none")]
466    pub instruction_file: Option<std::path::PathBuf>,
467    /// Shorthand model spec for tool-pair summarization and context compaction.
468    /// Format: `ollama/<model>`, `claude[/<model>]`, `openai[/<model>]`, `compatible/<name>`, `candle`.
469    /// Ignored when `[llm.summary_provider]` is set.
470    #[serde(default, skip_serializing_if = "Option::is_none")]
471    pub summary_model: Option<String>,
472    /// Structured provider config for summarization. Takes precedence over `summary_model`.
473    #[serde(default, skip_serializing_if = "Option::is_none")]
474    pub summary_provider: Option<ProviderEntry>,
475
476    /// Complexity triage routing configuration. Required when `routing = "triage"`.
477    #[serde(default, skip_serializing_if = "Option::is_none")]
478    pub complexity_routing: Option<ComplexityRoutingConfig>,
479
480    /// Collaborative Entropy (`CoE`) configuration. `None` = `CoE` disabled.
481    #[serde(default, skip_serializing_if = "Option::is_none")]
482    pub coe: Option<CoeConfig>,
483}
484
485fn default_embedding_model_opt() -> String {
486    default_embedding_model()
487}
488
489#[allow(clippy::trivially_copy_pass_by_ref)]
490fn is_routing_none(s: &LlmRoutingStrategy) -> bool {
491    *s == LlmRoutingStrategy::None
492}
493
494impl LlmConfig {
495    /// Effective provider kind for the primary (first/default) provider in the pool.
496    #[must_use]
497    pub fn effective_provider(&self) -> ProviderKind {
498        self.providers
499            .first()
500            .map_or(ProviderKind::Ollama, |e| e.provider_type)
501    }
502
503    /// Effective base URL for the primary provider.
504    #[must_use]
505    pub fn effective_base_url(&self) -> &str {
506        self.providers
507            .first()
508            .and_then(|e| e.base_url.as_deref())
509            .unwrap_or("http://localhost:11434")
510    }
511
512    /// Effective model for the primary chat-capable provider.
513    ///
514    /// Skips embed-only entries (those with `embed = true`) and returns the model of the
515    /// first provider that can handle chat requests. Falls back to `"qwen3:8b"` when no
516    /// chat-capable provider is configured.
517    #[must_use]
518    pub fn effective_model(&self) -> &str {
519        self.providers
520            .iter()
521            .find(|e| !e.embed)
522            .and_then(|e| e.model.as_deref())
523            .unwrap_or("qwen3:8b")
524    }
525
526    /// Find the provider entry designated for STT.
527    ///
528    /// Resolution priority:
529    /// 1. `[llm.stt].provider` matches `[[llm.providers]].name` and the entry has `stt_model`
530    /// 2. `[llm.stt].provider` is empty — fall through to auto-detect
531    /// 3. First provider with `stt_model` set (auto-detect fallback)
532    /// 4. `None` — STT disabled
533    #[must_use]
534    pub fn stt_provider_entry(&self) -> Option<&ProviderEntry> {
535        let name_hint = self.stt.as_ref().map_or("", |s| s.provider.as_str());
536        if name_hint.is_empty() {
537            self.providers.iter().find(|p| p.stt_model.is_some())
538        } else {
539            self.providers
540                .iter()
541                .find(|p| p.effective_name() == name_hint && p.stt_model.is_some())
542        }
543    }
544
545    /// Validate that the config uses the new `[[llm.providers]]` format.
546    ///
547    /// # Errors
548    ///
549    /// Returns `ConfigError::Validation` when no providers are configured.
550    pub fn check_legacy_format(&self) -> Result<(), crate::error::ConfigError> {
551        Ok(())
552    }
553
554    /// Validate STT config cross-references.
555    ///
556    /// # Errors
557    ///
558    /// Returns `ConfigError::Validation` when the referenced STT provider does not exist.
559    pub fn validate_stt(&self) -> Result<(), crate::error::ConfigError> {
560        use crate::error::ConfigError;
561
562        let Some(stt) = &self.stt else {
563            return Ok(());
564        };
565        if stt.provider.is_empty() {
566            return Ok(());
567        }
568        let found = self
569            .providers
570            .iter()
571            .find(|p| p.effective_name() == stt.provider);
572        match found {
573            None => {
574                return Err(ConfigError::Validation(format!(
575                    "[llm.stt].provider = {:?} does not match any [[llm.providers]] entry",
576                    stt.provider
577                )));
578            }
579            Some(entry) if entry.stt_model.is_none() => {
580                tracing::warn!(
581                    provider = stt.provider,
582                    "[[llm.providers]] entry exists but has no `stt_model` — STT will not be activated"
583                );
584            }
585            _ => {}
586        }
587        Ok(())
588    }
589
590    /// Resolve `provider_name` to its model string and emit a startup warning when the
591    /// model does not look like a fast-tier model.
592    ///
593    /// **Soft check — never returns an error.** Misconfiguration produces a single
594    /// `tracing::warn!` at startup so operators can fix configs without being blocked.
595    ///
596    /// Rules:
597    /// - Empty `provider_name` → silently OK (caller will use the primary provider).
598    /// - Provider not found in pool → warns `"<label> provider '<name>' not found"`.
599    /// - Model resolved but not in `FAST_TIER_MODEL_HINTS` and not in `extra_allowlist` →
600    ///   warns `"<label> provider '<name>' uses '<model>' which may not be fast-tier"`.
601    /// - Model matches a hint or allowlist entry → silently OK.
602    ///
603    /// # Examples
604    ///
605    /// ```no_run
606    /// use zeph_config::providers::{LlmConfig, ProviderName};
607    ///
608    /// // LlmConfig is constructed via config file; here we illustrate the call shape.
609    /// # let cfg: LlmConfig = unimplemented!();
610    /// // empty provider name is silently ok
611    /// cfg.warn_non_fast_tier_provider(&ProviderName::default(), "memcot.distill_provider", &[]);
612    /// ```
613    pub fn warn_non_fast_tier_provider(
614        &self,
615        provider_name: &ProviderName,
616        feature_label: &str,
617        extra_allowlist: &[String],
618    ) {
619        if provider_name.is_empty() {
620            return;
621        }
622        let name = provider_name.as_str();
623        let Some(entry) = self.providers.iter().find(|p| p.effective_name() == name) else {
624            tracing::warn!(
625                provider = name,
626                "{feature_label} provider '{name}' not found in [[llm.providers]]"
627            );
628            return;
629        };
630        let model = entry.model.as_deref().unwrap_or("");
631        if model.is_empty() {
632            return;
633        }
634        let lower = model.to_lowercase();
635        let in_hints = FAST_TIER_MODEL_HINTS.iter().any(|h| lower.contains(h));
636        let in_extra = extra_allowlist.iter().any(|h| lower.contains(h.as_str()));
637        if !in_hints && !in_extra {
638            tracing::warn!(
639                provider = name,
640                actual = model,
641                "{feature_label} provider '{name}' uses model '{model}' \
642                 which may not be fast-tier; prefer a fast model to bound distillation cost"
643            );
644        }
645    }
646}
647
648/// Lowercased substrings that identify commonly accepted fast-tier models.
649///
650/// Used by [`LlmConfig::warn_non_fast_tier_provider`] for a soft startup check.
651/// Updating this list is non-breaking; missing a fast model only suppresses a warning.
652pub const FAST_TIER_MODEL_HINTS: &[&str] = &[
653    "gpt-4o-mini",
654    "gpt-4.1-mini",
655    "gpt-5-mini",
656    "gpt-5-nano",
657    "claude-haiku",
658    "claude-3-haiku",
659    "claude-3-5-haiku",
660    "qwen3:8b",
661    "qwen2.5:7b",
662    "qwen2:7b",
663    "llama3.2:3b",
664    "llama3.1:8b",
665    "gemma3:4b",
666    "gemma3:8b",
667    "phi4:mini",
668    "mistral:7b",
669];
670
671/// Speech-to-text configuration, nested under `[llm.stt]` in TOML.
672///
673/// When set, Zeph uses the referenced provider for voice transcription.
674/// The provider must have an `stt_model` field set in its `[[llm.providers]]` entry.
675///
676/// # Example (TOML)
677///
678/// ```toml
679/// [llm.stt]
680/// provider = "fast"
681/// language = "en"
682/// ```
683#[derive(Debug, Clone, Deserialize, Serialize)]
684pub struct SttConfig {
685    /// Provider name from `[[llm.providers]]`. Empty string means auto-detect first provider
686    /// with `stt_model` set.
687    #[serde(default = "default_stt_provider")]
688    pub provider: String,
689    /// Language hint for transcription (e.g. `"en"`, `"auto"`).
690    #[serde(default = "default_stt_language")]
691    pub language: String,
692}
693
694/// Routing strategy selection for multi-provider routing.
695#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
696#[serde(rename_all = "lowercase")]
697pub enum RouterStrategyConfig {
698    /// Exponential moving average latency-aware ordering.
699    #[default]
700    Ema,
701    /// Thompson Sampling with Beta distributions (persistence-backed).
702    Thompson,
703    /// Cascade routing: try cheapest provider first, escalate on degenerate output.
704    Cascade,
705    /// PILOT: `LinUCB` contextual bandit with online learning and cost-aware reward.
706    Bandit,
707}
708
709/// Agent Stability Index (ASI) configuration.
710///
711/// Tracks per-provider response coherence via a sliding window of response embeddings.
712/// When coherence drops below `coherence_threshold`, the provider's routing prior is
713/// penalized by `penalty_weight`. Disabled by default; session-only (no persistence).
714///
715/// # Known Limitation
716///
717/// ASI embeddings are computed in a background `tokio::spawn` task after the response is
718/// returned to the caller. Under high request rates, the coherence score used for routing
719/// may lag 1–2 responses behind due to this fire-and-forget design. With the default
720/// `window = 5`, this lag is tolerable — coherence is a slow-moving signal.
721#[derive(Debug, Clone, Deserialize, Serialize)]
722pub struct AsiConfig {
723    /// Enable ASI coherence tracking. Default: false.
724    #[serde(default)]
725    pub enabled: bool,
726
727    /// Sliding window size for response embeddings per provider. Default: 5.
728    #[serde(default = "default_asi_window")]
729    pub window: usize,
730
731    /// Coherence score [0.0, 1.0] below which the provider is penalized. Default: 0.7.
732    #[serde(default = "default_asi_coherence_threshold")]
733    pub coherence_threshold: f32,
734
735    /// Penalty weight applied to Thompson beta / EMA score on low coherence. Default: 0.3.
736    ///
737    /// For Thompson, this shifts the beta prior: `beta += penalty_weight * (threshold - coherence)`.
738    /// For EMA, the score is multiplied by `max(0.5, coherence / threshold)`.
739    #[serde(default = "default_asi_penalty_weight")]
740    pub penalty_weight: f32,
741}
742
743fn default_asi_window() -> usize {
744    5
745}
746
747fn default_asi_coherence_threshold() -> f32 {
748    0.7
749}
750
751fn default_asi_penalty_weight() -> f32 {
752    0.3
753}
754
755impl Default for AsiConfig {
756    fn default() -> Self {
757        Self {
758            enabled: false,
759            window: default_asi_window(),
760            coherence_threshold: default_asi_coherence_threshold(),
761            penalty_weight: default_asi_penalty_weight(),
762        }
763    }
764}
765
766/// Routing configuration for multi-provider setups.
767#[derive(Debug, Clone, Deserialize, Serialize)]
768pub struct RouterConfig {
769    /// Routing strategy: `"ema"` (default), `"thompson"`, `"cascade"`, or `"bandit"`.
770    #[serde(default)]
771    pub strategy: RouterStrategyConfig,
772    /// Path for persisting Thompson Sampling state. Defaults to `~/.zeph/router_thompson_state.json`.
773    ///
774    /// # Security
775    ///
776    /// This path is user-controlled. The application writes and reads a JSON file at
777    /// this location. Ensure the path is within a directory that is not world-writable
778    /// (e.g., avoid `/tmp`). The file is created with mode `0o600` on Unix.
779    #[serde(default)]
780    pub thompson_state_path: Option<String>,
781    /// Cascade routing configuration. Only used when `strategy = "cascade"`.
782    #[serde(default)]
783    pub cascade: Option<CascadeConfig>,
784    /// Bayesian reputation scoring configuration (RAPS). Disabled by default.
785    #[serde(default)]
786    pub reputation: Option<ReputationConfig>,
787    /// PILOT bandit routing configuration. Only used when `strategy = "bandit"`.
788    #[serde(default)]
789    pub bandit: Option<BanditConfig>,
790    /// Embedding-based quality gate threshold for Thompson/EMA routing. Default: disabled.
791    ///
792    /// When set, after provider selection, the cosine similarity between the query embedding
793    /// and the response embedding is computed. If below this threshold, the next provider in
794    /// the ordered list is tried. On exhaustion, the best response seen is returned.
795    ///
796    /// Only applies to Thompson and EMA strategies. Cascade uses its own quality classifier.
797    /// Fail-open: embedding errors disable the gate for that request.
798    #[serde(default)]
799    pub quality_gate: Option<f32>,
800    /// Agent Stability Index configuration. Disabled by default.
801    #[serde(default)]
802    pub asi: Option<AsiConfig>,
803    /// Maximum number of concurrent `embed_batch` calls through the router.
804    ///
805    /// Limits simultaneous embedding HTTP requests to prevent provider rate-limiting
806    /// and memory pressure during indexing or high-frequency recall. Default: 4.
807    /// Set to 0 to disable the semaphore (unlimited concurrency).
808    #[serde(default = "default_embed_concurrency")]
809    pub embed_concurrency: usize,
810}
811
812fn default_embed_concurrency() -> usize {
813    4
814}
815
816/// Configuration for Bayesian reputation scoring (RAPS — Reputation-Adjusted Provider Selection).
817///
818/// When enabled, quality outcomes from tool execution shift the routing scores over time,
819/// giving an advantage to providers that consistently produce valid tool arguments.
820///
821/// Default: disabled. Set `enabled = true` to activate.
822#[derive(Debug, Clone, Deserialize, Serialize)]
823pub struct ReputationConfig {
824    /// Enable reputation scoring. Default: false.
825    #[serde(default)]
826    pub enabled: bool,
827    /// Session-level decay factor applied on each load. Range: (0.0, 1.0]. Default: 0.95.
828    /// Lower values make reputation forget faster; 1.0 = no decay.
829    #[serde(default = "default_reputation_decay_factor")]
830    pub decay_factor: f64,
831    /// Weight of reputation in routing score blend. Range: [0.0, 1.0]. Default: 0.3.
832    ///
833    /// **Warning**: values above 0.5 can aggressively suppress low-reputation providers.
834    /// At `weight = 1.0` with `rep_factor = 0.0` (all failures), the routing score
835    /// drops to zero — the provider becomes unreachable for that session. Stick to
836    /// the default (0.3) unless you intentionally want strong reputation gating.
837    #[serde(default = "default_reputation_weight")]
838    pub weight: f64,
839    /// Minimum quality observations before reputation influences routing. Default: 5.
840    #[serde(default = "default_reputation_min_observations")]
841    pub min_observations: u64,
842    /// Path for persisting reputation state. Defaults to `~/.config/zeph/router_reputation_state.json`.
843    #[serde(default)]
844    pub state_path: Option<String>,
845}
846
847/// Configuration for cascade routing (`strategy = "cascade"`).
848///
849/// Cascade routing tries providers in chain order (cheapest first), escalating to
850/// the next provider when the response is classified as degenerate (empty, repetitive,
851/// incoherent). Chain order determines cost order: first provider = cheapest.
852///
853/// # Limitations
854///
855/// The heuristic classifier detects degenerate outputs only, not semantic failures.
856/// Use `classifier_mode = "judge"` for semantic quality gating (adds LLM call cost).
857#[derive(Debug, Clone, Deserialize, Serialize)]
858pub struct CascadeConfig {
859    /// Minimum quality score [0.0, 1.0] to accept a response without escalating.
860    /// Responses scoring below this threshold trigger escalation.
861    #[serde(default = "default_cascade_quality_threshold")]
862    pub quality_threshold: f64,
863
864    /// Maximum number of quality-based escalations per request.
865    /// Network/API errors do not count against this budget.
866    /// Default: 2 (allows up to 3 providers: cheap → mid → expensive).
867    #[serde(default = "default_cascade_max_escalations")]
868    pub max_escalations: u8,
869
870    /// Quality classifier mode: `"heuristic"` (default) or `"judge"`.
871    /// Heuristic is zero-cost but detects only degenerate outputs.
872    /// Judge requires a configured `summary_model` and adds one LLM call per evaluation.
873    #[serde(default)]
874    pub classifier_mode: CascadeClassifierMode,
875
876    /// Rolling quality history window size per provider. Default: 50.
877    #[serde(default = "default_cascade_window_size")]
878    pub window_size: usize,
879
880    /// Maximum cumulative input+output tokens across all escalation levels.
881    /// When exceeded, returns the best-seen response instead of escalating further.
882    /// `None` disables the budget (unbounded escalation cost).
883    #[serde(default)]
884    pub max_cascade_tokens: Option<u32>,
885
886    /// Explicit cost ordering of provider names (cheapest first).
887    /// When set, cascade routing sorts providers by their position in this list before
888    /// trying them. Providers not in the list are appended after listed ones in their
889    /// original chain order. When unset, chain order is used (default behavior).
890    #[serde(default, skip_serializing_if = "Option::is_none")]
891    pub cost_tiers: Option<Vec<String>>,
892}
893
894impl Default for CascadeConfig {
895    fn default() -> Self {
896        Self {
897            quality_threshold: default_cascade_quality_threshold(),
898            max_escalations: default_cascade_max_escalations(),
899            classifier_mode: CascadeClassifierMode::default(),
900            window_size: default_cascade_window_size(),
901            max_cascade_tokens: None,
902            cost_tiers: None,
903        }
904    }
905}
906
907/// Quality classifier mode for cascade routing.
908#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
909#[serde(rename_all = "lowercase")]
910pub enum CascadeClassifierMode {
911    /// Zero-cost heuristic: detects degenerate outputs (empty, repetitive, incoherent).
912    /// Does not detect semantic failures (hallucinations, wrong answers).
913    #[default]
914    Heuristic,
915    /// LLM-based judge: more accurate but adds latency. Falls back to heuristic on failure.
916    /// Requires `summary_model` to be configured.
917    Judge,
918}
919
920fn default_bandit_alpha() -> f32 {
921    1.0
922}
923
924fn default_bandit_dim() -> usize {
925    32
926}
927
928fn default_bandit_cost_weight() -> f32 {
929    0.1
930}
931
932fn default_bandit_decay_factor() -> f32 {
933    1.0
934}
935
936fn default_bandit_embedding_timeout_ms() -> u64 {
937    50
938}
939
940fn default_bandit_cache_size() -> usize {
941    512
942}
943
944/// Configuration for PILOT bandit routing (`strategy = "bandit"`).
945///
946/// PILOT (Provider Intelligence via Learned Online Tuning) uses a `LinUCB` contextual
947/// bandit to learn which provider performs best for a given query context. The feature
948/// vector is derived from the query embedding (first `dim` components, L2-normalised).
949///
950/// **Cold start**: the bandit falls back to Thompson sampling for the first
951/// `10 * num_providers` queries (configurable). After warmup, `LinUCB` takes over.
952///
953/// **Embedding**: an `embedding_provider` must be set for feature vectors. If the embed
954/// call exceeds `embedding_timeout_ms` or fails, the bandit falls back to Thompson/uniform.
955/// Use a local provider (Ollama, Candle) to avoid network latency on the hot path.
956#[derive(Debug, Clone, Deserialize, Serialize)]
957pub struct BanditConfig {
958    /// `LinUCB` exploration parameter. Default: 1.0.
959    /// Higher values increase exploration; lower values favour exploitation.
960    #[serde(default = "default_bandit_alpha")]
961    pub alpha: f32,
962
963    /// Feature vector dimension (first `dim` components of the embedding).
964    ///
965    /// This is simple truncation, not PCA. The first raw embedding dimensions do not
966    /// necessarily capture the most variance. For `OpenAI` `text-embedding-3-*` models,
967    /// consider using the `dimensions` API parameter (Matryoshka embeddings) instead.
968    /// Default: 32.
969    #[serde(default = "default_bandit_dim")]
970    pub dim: usize,
971
972    /// Cost penalty weight in the reward signal: `reward = quality - cost_weight * cost_fraction`.
973    /// Default: 0.1. Increase to penalise expensive providers more aggressively.
974    #[serde(default = "default_bandit_cost_weight")]
975    pub cost_weight: f32,
976
977    /// Session-level decay applied to arm state on startup: `A = I + decay*(A-I)`, `b = decay*b`.
978    /// Values < 1.0 cause re-exploration after provider quality changes. Default: 1.0 (no decay).
979    #[serde(default = "default_bandit_decay_factor")]
980    pub decay_factor: f32,
981
982    /// Provider name from `[[llm.providers]]` used for query embeddings.
983    ///
984    /// SLM recommended: prefer a fast local model (e.g. Ollama `nomic-embed-text`,
985    /// Candle, or `text-embedding-3-small`) — this is called on every bandit request.
986    /// Empty string disables `LinUCB` (bandit always falls back to Thompson/uniform).
987    #[serde(default)]
988    pub embedding_provider: ProviderName,
989
990    /// Hard timeout for the embedding call in milliseconds. Default: 50.
991    /// If exceeded, the request falls back to Thompson/uniform selection.
992    #[serde(default = "default_bandit_embedding_timeout_ms")]
993    pub embedding_timeout_ms: u64,
994
995    /// Maximum cached embeddings (keyed by query text hash). Default: 512.
996    #[serde(default = "default_bandit_cache_size")]
997    pub cache_size: usize,
998
999    /// Path for persisting bandit state. Defaults to `~/.config/zeph/router_bandit_state.json`.
1000    ///
1001    /// # Security
1002    ///
1003    /// This path is user-controlled. The file is created with mode `0o600` on Unix.
1004    /// Do not place it in world-writable directories.
1005    #[serde(default)]
1006    pub state_path: Option<String>,
1007
1008    /// MAR (Memory-Augmented Routing) confidence threshold.
1009    ///
1010    /// When the top-1 semantic recall score for the current query is >= this value,
1011    /// the bandit biases toward cheaper providers (the answer is likely in memory).
1012    /// Set to 1.0 to disable MAR. Default: 0.9.
1013    #[serde(default = "default_bandit_memory_confidence_threshold")]
1014    pub memory_confidence_threshold: f32,
1015
1016    /// Minimum number of queries before `LinUCB` takes over from Thompson warmup.
1017    ///
1018    /// When unset or `0`, defaults to `10 × number of providers` (computed at startup).
1019    /// Set explicitly to control how long the bandit explores uniformly before
1020    /// switching to context-aware routing. Setting `0` preserves the computed default.
1021    #[serde(default)]
1022    pub warmup_queries: Option<u64>,
1023}
1024
1025fn default_bandit_memory_confidence_threshold() -> f32 {
1026    0.9
1027}
1028
1029impl Default for BanditConfig {
1030    fn default() -> Self {
1031        Self {
1032            alpha: default_bandit_alpha(),
1033            dim: default_bandit_dim(),
1034            cost_weight: default_bandit_cost_weight(),
1035            decay_factor: default_bandit_decay_factor(),
1036            embedding_provider: ProviderName::default(),
1037            embedding_timeout_ms: default_bandit_embedding_timeout_ms(),
1038            cache_size: default_bandit_cache_size(),
1039            state_path: None,
1040            memory_confidence_threshold: default_bandit_memory_confidence_threshold(),
1041            warmup_queries: None,
1042        }
1043    }
1044}
1045
1046#[derive(Debug, Deserialize, Serialize)]
1047pub struct CandleConfig {
1048    #[serde(default = "default_candle_source")]
1049    pub source: String,
1050    #[serde(default)]
1051    pub local_path: String,
1052    #[serde(default)]
1053    pub filename: Option<String>,
1054    #[serde(default = "default_chat_template")]
1055    pub chat_template: String,
1056    #[serde(default = "default_candle_device")]
1057    pub device: String,
1058    #[serde(default)]
1059    pub embedding_repo: Option<String>,
1060    /// Resolved `HuggingFace` Hub API token for authenticated model downloads.
1061    ///
1062    /// Must be the **token value** — resolved by the caller before constructing this config.
1063    #[serde(default)]
1064    pub hf_token: Option<String>,
1065    #[serde(default)]
1066    pub generation: GenerationParams,
1067    /// Maximum seconds to wait for each half of a single inference request.
1068    ///
1069    /// The timeout is applied **twice** per `chat()` call: once for the channel send
1070    /// (waiting for a free slot) and once for the oneshot reply (waiting for the worker
1071    /// to finish). The effective maximum wall-clock wait per request is therefore
1072    /// `2 × inference_timeout_secs`. CPU inference can be slow; 120s is a conservative
1073    /// default for large models, giving up to 240s total before an error is returned.
1074    /// Values of 0 are silently promoted to 1 at bootstrap.
1075    #[serde(default = "default_inference_timeout_secs")]
1076    pub inference_timeout_secs: u64,
1077}
1078
1079fn default_inference_timeout_secs() -> u64 {
1080    120
1081}
1082
1083/// Sampling / generation parameters for Candle local inference.
1084///
1085/// Used inside `[llm.candle.generation]` or a `[[llm.providers]]` Candle entry.
1086#[derive(Debug, Clone, Deserialize, Serialize)]
1087pub struct GenerationParams {
1088    /// Sampling temperature. Higher values produce more creative outputs. Default: `0.7`.
1089    #[serde(default = "default_temperature")]
1090    pub temperature: f64,
1091    /// Nucleus sampling threshold. When set, tokens with cumulative probability above
1092    /// this value are excluded. Default: `None` (disabled).
1093    #[serde(default)]
1094    pub top_p: Option<f64>,
1095    /// Top-k sampling. When set, only the top-k most probable tokens are considered.
1096    /// Default: `None` (disabled).
1097    #[serde(default)]
1098    pub top_k: Option<usize>,
1099    /// Maximum number of tokens to generate per response. Capped at [`MAX_TOKENS_CAP`].
1100    /// Default: `2048`.
1101    #[serde(default = "default_max_tokens")]
1102    pub max_tokens: usize,
1103    /// Random seed for reproducible outputs. Default: `42`.
1104    #[serde(default = "default_seed")]
1105    pub seed: u64,
1106    /// Repetition penalty applied during sampling. Default: `1.1`.
1107    #[serde(default = "default_repeat_penalty")]
1108    pub repeat_penalty: f32,
1109    /// Number of last tokens to consider for the repetition penalty window. Default: `64`.
1110    #[serde(default = "default_repeat_last_n")]
1111    pub repeat_last_n: usize,
1112}
1113
1114/// Hard upper bound on `GenerationParams::max_tokens` to prevent unbounded generation.
1115pub const MAX_TOKENS_CAP: usize = 32768;
1116
1117impl GenerationParams {
1118    /// Returns `max_tokens` clamped to [`MAX_TOKENS_CAP`].
1119    ///
1120    /// # Examples
1121    ///
1122    /// ```
1123    /// use zeph_config::GenerationParams;
1124    ///
1125    /// let params = GenerationParams::default();
1126    /// assert!(params.capped_max_tokens() <= 32768);
1127    /// ```
1128    #[must_use]
1129    pub fn capped_max_tokens(&self) -> usize {
1130        self.max_tokens.min(MAX_TOKENS_CAP)
1131    }
1132}
1133
1134impl Default for GenerationParams {
1135    fn default() -> Self {
1136        Self {
1137            temperature: default_temperature(),
1138            top_p: None,
1139            top_k: None,
1140            max_tokens: default_max_tokens(),
1141            seed: default_seed(),
1142            repeat_penalty: default_repeat_penalty(),
1143            repeat_last_n: default_repeat_last_n(),
1144        }
1145    }
1146}
1147
1148// ─── Unified config types ─────────────────────────────────────────────────────
1149
1150/// Routing strategy for the `[[llm.providers]]` pool.
1151#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
1152#[serde(rename_all = "lowercase")]
1153pub enum LlmRoutingStrategy {
1154    /// Single provider or first-in-pool (default).
1155    #[default]
1156    None,
1157    /// Exponential moving average latency-aware ordering.
1158    Ema,
1159    /// Thompson Sampling with Beta distributions.
1160    Thompson,
1161    /// Cascade: try cheapest provider first, escalate on degenerate output.
1162    Cascade,
1163    /// Complexity triage routing: pre-classify each request, delegate to appropriate tier.
1164    Triage,
1165    /// PILOT: `LinUCB` contextual bandit with online learning and budget-aware reward.
1166    Bandit,
1167}
1168
1169fn default_triage_timeout_secs() -> u64 {
1170    5
1171}
1172
1173fn default_max_triage_tokens() -> u32 {
1174    50
1175}
1176
1177fn default_true() -> bool {
1178    true
1179}
1180
1181/// Tier-to-provider name mapping for complexity routing.
1182#[derive(Debug, Clone, Default, Deserialize, Serialize)]
1183pub struct TierMapping {
1184    pub simple: Option<String>,
1185    pub medium: Option<String>,
1186    pub complex: Option<String>,
1187    pub expert: Option<String>,
1188}
1189
1190/// Configuration for complexity-based triage routing (`routing = "triage"`).
1191///
1192/// When `[llm] routing = "triage"` is set, a cheap triage model classifies each request
1193/// and routes it to the appropriate tier provider. Requires at least one tier mapping.
1194///
1195/// # Example
1196///
1197/// ```toml
1198/// [llm]
1199/// routing = "triage"
1200///
1201/// [llm.complexity_routing]
1202/// triage_provider = "local-fast"
1203///
1204/// [llm.complexity_routing.tiers]
1205/// simple = "local-fast"
1206/// medium = "haiku"
1207/// complex = "sonnet"
1208/// expert = "opus"
1209/// ```
1210#[derive(Debug, Clone, Deserialize, Serialize)]
1211pub struct ComplexityRoutingConfig {
1212    /// Provider name from `[[llm.providers]]` used for triage classification.
1213    #[serde(default)]
1214    pub triage_provider: Option<ProviderName>,
1215
1216    /// Skip triage when all tiers map to the same provider.
1217    #[serde(default = "default_true")]
1218    pub bypass_single_provider: bool,
1219
1220    /// Tier-to-provider name mapping.
1221    #[serde(default)]
1222    pub tiers: TierMapping,
1223
1224    /// Max output tokens for the triage classification call. Default: 50.
1225    #[serde(default = "default_max_triage_tokens")]
1226    pub max_triage_tokens: u32,
1227
1228    /// Timeout in seconds for the triage classification call. Default: 5.
1229    /// On timeout, falls back to the default (first) tier provider.
1230    #[serde(default = "default_triage_timeout_secs")]
1231    pub triage_timeout_secs: u64,
1232
1233    /// Optional fallback strategy when triage misclassifies.
1234    /// Only `"cascade"` is currently supported (Phase 4).
1235    #[serde(default)]
1236    pub fallback_strategy: Option<String>,
1237}
1238
1239impl Default for ComplexityRoutingConfig {
1240    fn default() -> Self {
1241        Self {
1242            triage_provider: None,
1243            bypass_single_provider: true,
1244            tiers: TierMapping::default(),
1245            max_triage_tokens: default_max_triage_tokens(),
1246            triage_timeout_secs: default_triage_timeout_secs(),
1247            fallback_strategy: None,
1248        }
1249    }
1250}
1251
1252/// Configuration for the Collaborative Entropy (`CoE`) subsystem (`[llm.coe]` TOML section).
1253///
1254/// `CoE` detects uncertain responses from the primary provider and escalates to a
1255/// secondary provider when either the intra-entropy or inter-divergence signal crosses
1256/// its threshold. Only active for `RouterStrategy::Ema` and `RouterStrategy::Thompson`.
1257///
1258/// # Example
1259///
1260/// ```toml
1261/// [llm.coe]
1262/// enabled = true
1263/// intra_threshold = 0.8
1264/// inter_threshold = 0.20
1265/// shadow_sample_rate = 0.1
1266/// secondary_provider = "quality"
1267/// embed_provider = ""
1268/// ```
1269#[derive(Debug, Clone, Deserialize, Serialize)]
1270#[serde(default)]
1271pub struct CoeConfig {
1272    /// Enable `CoE`. When `false`, the struct is ignored.
1273    pub enabled: bool,
1274    /// Mean negative log-prob threshold; responses above this trigger intra escalation.
1275    pub intra_threshold: f64,
1276    /// Divergence threshold in `[0.0, 1.0]`.
1277    pub inter_threshold: f64,
1278    /// Baseline rate at which secondary is called even when intra is low.
1279    pub shadow_sample_rate: f64,
1280    /// Provider name from `[[llm.providers]]` used as the escalation target.
1281    pub secondary_provider: ProviderName,
1282    /// Provider name for inter-divergence embeddings. Empty → inherit bandit's embed provider.
1283    pub embed_provider: ProviderName,
1284}
1285
1286impl Default for CoeConfig {
1287    fn default() -> Self {
1288        Self {
1289            enabled: false,
1290            intra_threshold: 0.8,
1291            inter_threshold: 0.20,
1292            shadow_sample_rate: 0.1,
1293            secondary_provider: ProviderName::default(),
1294            embed_provider: ProviderName::default(),
1295        }
1296    }
1297}
1298
1299/// A single Gonka network node endpoint.
1300///
1301/// Used in `[[llm.providers]]` entries with `type = "gonka"` to declare
1302/// the node pool for blockchain inference routing.
1303#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
1304pub struct GonkaNode {
1305    /// HTTP(S) URL of the Gonka node (e.g. `"https://node1.gonka.ai"`).
1306    pub url: String,
1307    /// On-chain bech32 address of this node (e.g. `"gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"`).
1308    ///
1309    /// Required for signature construction: every signed request binds to the target node's
1310    /// on-chain address, making signatures non-replayable across different nodes.
1311    pub address: String,
1312    /// Optional human-readable label for `zeph gonka doctor` output.
1313    #[serde(default, skip_serializing_if = "Option::is_none")]
1314    pub name: Option<String>,
1315}
1316
1317/// Inline candle config for use inside `ProviderEntry`.
1318/// Re-uses the generation params from `CandleConfig`.
1319#[derive(Debug, Clone, Deserialize, Serialize)]
1320pub struct CandleInlineConfig {
1321    #[serde(default = "default_candle_source")]
1322    pub source: String,
1323    #[serde(default)]
1324    pub local_path: String,
1325    #[serde(default)]
1326    pub filename: Option<String>,
1327    #[serde(default = "default_chat_template")]
1328    pub chat_template: String,
1329    #[serde(default = "default_candle_device")]
1330    pub device: String,
1331    #[serde(default)]
1332    pub embedding_repo: Option<String>,
1333    /// Resolved `HuggingFace` Hub API token for authenticated model downloads.
1334    #[serde(default)]
1335    pub hf_token: Option<String>,
1336    #[serde(default)]
1337    pub generation: GenerationParams,
1338    /// Maximum wall-clock seconds to wait for a single inference request.
1339    ///
1340    /// Effective timeout is `2 × inference_timeout_secs` (send + recv each have this budget).
1341    /// CPU inference can be slow; 120s is a conservative default. Floored at 1s.
1342    #[serde(default = "default_inference_timeout_secs")]
1343    pub inference_timeout_secs: u64,
1344}
1345
1346impl Default for CandleInlineConfig {
1347    fn default() -> Self {
1348        Self {
1349            source: default_candle_source(),
1350            local_path: String::new(),
1351            filename: None,
1352            chat_template: default_chat_template(),
1353            device: default_candle_device(),
1354            embedding_repo: None,
1355            hf_token: None,
1356            generation: GenerationParams::default(),
1357            inference_timeout_secs: default_inference_timeout_secs(),
1358        }
1359    }
1360}
1361
1362/// Unified provider entry: one struct replaces `CloudLlmConfig`, `OpenAiConfig`,
1363/// `GeminiConfig`, `OllamaConfig`, `CompatibleConfig`, and `OrchestratorProviderConfig`.
1364///
1365/// Provider-specific fields use `#[serde(default)]` and are ignored by backends
1366/// that do not use them (flat-union pattern).
1367#[derive(Debug, Clone, Deserialize, Serialize)]
1368#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic for TOML-deserialized configuration
1369pub struct ProviderEntry {
1370    /// Required: provider backend type.
1371    #[serde(rename = "type")]
1372    pub provider_type: ProviderKind,
1373
1374    /// Optional name for multi-provider configs. Auto-generated from type if absent.
1375    #[serde(default)]
1376    pub name: Option<String>,
1377
1378    /// Model identifier. Required for most types.
1379    #[serde(default)]
1380    pub model: Option<String>,
1381
1382    /// API base URL. Each type has its own default.
1383    #[serde(default)]
1384    pub base_url: Option<String>,
1385
1386    /// Max output tokens.
1387    #[serde(default)]
1388    pub max_tokens: Option<u32>,
1389
1390    /// Embedding model. When set, this provider supports `embed()` calls.
1391    #[serde(default)]
1392    pub embedding_model: Option<String>,
1393
1394    /// STT model. When set, this provider supports speech-to-text via the Whisper API or
1395    /// Candle-local inference.
1396    #[serde(default)]
1397    pub stt_model: Option<String>,
1398
1399    /// Mark this entry as the embedding provider (handles `embed()` calls).
1400    #[serde(default)]
1401    pub embed: bool,
1402
1403    /// Mark this entry as the default chat provider (overrides position-based default).
1404    #[serde(default)]
1405    pub default: bool,
1406
1407    // --- Claude-specific ---
1408    #[serde(default)]
1409    pub thinking: Option<ThinkingConfig>,
1410    #[serde(default)]
1411    pub server_compaction: bool,
1412    #[serde(default)]
1413    pub enable_extended_context: bool,
1414    /// Prompt cache TTL variant. `None` keeps the default ~5-minute ephemeral TTL.
1415    /// Set to `"1h"` to enable the extended 1-hour TTL (beta, ~2× write cost).
1416    #[serde(default)]
1417    pub prompt_cache_ttl: Option<CacheTtl>,
1418
1419    // --- OpenAI-specific ---
1420    #[serde(default)]
1421    pub reasoning_effort: Option<String>,
1422
1423    // --- Gemini-specific ---
1424    #[serde(default)]
1425    pub thinking_level: Option<GeminiThinkingLevel>,
1426    #[serde(default)]
1427    pub thinking_budget: Option<i32>,
1428    #[serde(default)]
1429    pub include_thoughts: Option<bool>,
1430
1431    // --- Compatible-specific: optional inline api_key ---
1432    #[serde(default)]
1433    pub api_key: Option<String>,
1434
1435    // --- Candle-specific ---
1436    #[serde(default)]
1437    pub candle: Option<CandleInlineConfig>,
1438
1439    // --- Vision ---
1440    #[serde(default)]
1441    pub vision_model: Option<String>,
1442
1443    // --- Gonka-specific ---
1444    /// Gonka network node pool. Required (non-empty) when `type = "gonka"`.
1445    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1446    pub gonka_nodes: Vec<GonkaNode>,
1447    /// bech32 chain prefix for address encoding. Defaults to `"gonka"` when omitted.
1448    #[serde(default, skip_serializing_if = "Option::is_none")]
1449    pub gonka_chain_prefix: Option<String>,
1450
1451    /// Provider-specific instruction file.
1452    #[serde(default)]
1453    pub instruction_file: Option<std::path::PathBuf>,
1454
1455    /// Maximum concurrent LLM calls from orchestrated sub-agents to this provider.
1456    ///
1457    /// When set, `DagScheduler` acquires a semaphore permit before dispatching a
1458    /// sub-agent that targets this provider. Dispatch is deferred (using the existing
1459    /// `deferral_backoff` mechanism) when the semaphore is saturated.
1460    ///
1461    /// `None` (default) = unlimited — no admission control applied.
1462    ///
1463    /// # Example (TOML)
1464    ///
1465    /// ```toml
1466    /// [[llm.providers]]
1467    /// name = "quality"
1468    /// type = "openai"
1469    /// model = "gpt-5"
1470    /// max_concurrent = 3
1471    /// ```
1472    #[serde(default, skip_serializing_if = "Option::is_none")]
1473    pub max_concurrent: Option<u32>,
1474}
1475
1476impl Default for ProviderEntry {
1477    fn default() -> Self {
1478        Self {
1479            provider_type: ProviderKind::Ollama,
1480            name: None,
1481            model: None,
1482            base_url: None,
1483            max_tokens: None,
1484            embedding_model: None,
1485            stt_model: None,
1486            embed: false,
1487            default: false,
1488            thinking: None,
1489            server_compaction: false,
1490            enable_extended_context: false,
1491            prompt_cache_ttl: None,
1492            reasoning_effort: None,
1493            thinking_level: None,
1494            thinking_budget: None,
1495            include_thoughts: None,
1496            api_key: None,
1497            candle: None,
1498            vision_model: None,
1499            gonka_nodes: Vec::new(),
1500            gonka_chain_prefix: None,
1501            instruction_file: None,
1502            max_concurrent: None,
1503        }
1504    }
1505}
1506
1507impl ProviderEntry {
1508    /// Resolve the effective name: explicit `name` field or type string.
1509    #[must_use]
1510    pub fn effective_name(&self) -> String {
1511        self.name
1512            .clone()
1513            .unwrap_or_else(|| self.provider_type.as_str().to_owned())
1514    }
1515
1516    /// Resolve the effective model: explicit `model` field or the provider-type default.
1517    ///
1518    /// Defaults mirror those used in `build_provider_from_entry` so that `runtime.model_name`
1519    /// always reflects the actual model being used rather than the provider type string.
1520    #[must_use]
1521    pub fn effective_model(&self) -> String {
1522        if let Some(ref m) = self.model {
1523            return m.clone();
1524        }
1525        match self.provider_type {
1526            ProviderKind::Ollama => "qwen3:8b".to_owned(),
1527            ProviderKind::Claude => "claude-haiku-4-5-20251001".to_owned(),
1528            ProviderKind::OpenAi => "gpt-4o-mini".to_owned(),
1529            ProviderKind::Gemini => "gemini-2.0-flash".to_owned(),
1530            // Compatible/Candle return empty because the model is resolved elsewhere.
1531            // Gonka returns empty because it is a blockchain provider, not an LLM — there is no model concept.
1532            ProviderKind::Compatible | ProviderKind::Candle | ProviderKind::Gonka => String::new(),
1533        }
1534    }
1535
1536    /// Validate this entry for cross-field consistency.
1537    ///
1538    /// # Errors
1539    ///
1540    /// Returns `ConfigError` when a fatal invariant is violated (e.g. compatible provider
1541    /// without a name).
1542    pub fn validate(&self) -> Result<(), crate::error::ConfigError> {
1543        use crate::error::ConfigError;
1544
1545        // B2: compatible provider MUST have name set.
1546        if self.provider_type == ProviderKind::Compatible && self.name.is_none() {
1547            return Err(ConfigError::Validation(
1548                "[[llm.providers]] entry with type=\"compatible\" must set `name`".into(),
1549            ));
1550        }
1551
1552        // B3: gonka provider MUST have name and valid gonka_nodes.
1553        if self.provider_type == ProviderKind::Gonka {
1554            if self.name.is_none() {
1555                return Err(ConfigError::Validation(
1556                    "[[llm.providers]] entry with type=\"gonka\" must set `name`".into(),
1557                ));
1558            }
1559            self.validate_gonka_nodes()?;
1560        }
1561
1562        // B1: warn on irrelevant fields.
1563        self.warn_irrelevant_fields();
1564
1565        // W6: Candle STT-only provider (stt_model set, no model) is valid — no warning needed.
1566        // Warn if Ollama has stt_model set (Ollama does not support Whisper API).
1567        if self.stt_model.is_some() && self.provider_type == ProviderKind::Ollama {
1568            tracing::warn!(
1569                provider = self.effective_name(),
1570                "field `stt_model` is set on an Ollama provider; Ollama does not support the \
1571                 Whisper STT API — use OpenAI, compatible, or candle instead"
1572            );
1573        }
1574
1575        Ok(())
1576    }
1577
1578    /// Resolve the effective Gonka chain prefix: explicit value or `"gonka"` default.
1579    #[must_use]
1580    pub fn effective_gonka_chain_prefix(&self) -> &str {
1581        self.gonka_chain_prefix.as_deref().unwrap_or("gonka")
1582    }
1583
1584    fn warn_irrelevant_fields(&self) {
1585        let name = self.effective_name();
1586        match self.provider_type {
1587            ProviderKind::Ollama => {
1588                if self.thinking.is_some() {
1589                    tracing::warn!(
1590                        provider = name,
1591                        "field `thinking` is only used by Claude providers"
1592                    );
1593                }
1594                if self.reasoning_effort.is_some() {
1595                    tracing::warn!(
1596                        provider = name,
1597                        "field `reasoning_effort` is only used by OpenAI providers"
1598                    );
1599                }
1600                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1601                    tracing::warn!(
1602                        provider = name,
1603                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1604                    );
1605                }
1606            }
1607            ProviderKind::Claude => {
1608                if self.reasoning_effort.is_some() {
1609                    tracing::warn!(
1610                        provider = name,
1611                        "field `reasoning_effort` is only used by OpenAI providers"
1612                    );
1613                }
1614                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1615                    tracing::warn!(
1616                        provider = name,
1617                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1618                    );
1619                }
1620            }
1621            ProviderKind::OpenAi => {
1622                if self.thinking.is_some() {
1623                    tracing::warn!(
1624                        provider = name,
1625                        "field `thinking` is only used by Claude providers"
1626                    );
1627                }
1628                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1629                    tracing::warn!(
1630                        provider = name,
1631                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1632                    );
1633                }
1634            }
1635            ProviderKind::Gemini => {
1636                if self.thinking.is_some() {
1637                    tracing::warn!(
1638                        provider = name,
1639                        "field `thinking` is only used by Claude providers"
1640                    );
1641                }
1642                if self.reasoning_effort.is_some() {
1643                    tracing::warn!(
1644                        provider = name,
1645                        "field `reasoning_effort` is only used by OpenAI providers"
1646                    );
1647                }
1648            }
1649            ProviderKind::Gonka => {
1650                if self.thinking.is_some() {
1651                    tracing::warn!(
1652                        provider = name,
1653                        "field `thinking` is only used by Claude providers"
1654                    );
1655                }
1656                if self.reasoning_effort.is_some() {
1657                    tracing::warn!(
1658                        provider = name,
1659                        "field `reasoning_effort` is only used by OpenAI providers"
1660                    );
1661                }
1662                if self.thinking_level.is_some() || self.thinking_budget.is_some() {
1663                    tracing::warn!(
1664                        provider = name,
1665                        "fields `thinking_level`/`thinking_budget` are only used by Gemini providers"
1666                    );
1667                }
1668            }
1669            ProviderKind::Compatible | ProviderKind::Candle => {}
1670        }
1671    }
1672
1673    fn validate_gonka_nodes(&self) -> Result<(), crate::error::ConfigError> {
1674        use crate::error::ConfigError;
1675        if self.gonka_nodes.is_empty() {
1676            return Err(ConfigError::Validation(format!(
1677                "[[llm.providers]] entry '{}' with type=\"gonka\" must set non-empty `gonka_nodes`",
1678                self.effective_name()
1679            )));
1680        }
1681        for (i, node) in self.gonka_nodes.iter().enumerate() {
1682            if node.url.is_empty() {
1683                return Err(ConfigError::Validation(format!(
1684                    "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must not be empty",
1685                    self.effective_name()
1686                )));
1687            }
1688            if !node.url.starts_with("http://") && !node.url.starts_with("https://") {
1689                return Err(ConfigError::Validation(format!(
1690                    "[[llm.providers]] entry '{}' gonka_nodes[{i}].url must start with http:// or https://",
1691                    self.effective_name()
1692                )));
1693            }
1694        }
1695        Ok(())
1696    }
1697}
1698
1699/// Validate a pool of `ProviderEntry` items.
1700///
1701/// # Errors
1702///
1703/// Returns `ConfigError` for fatal validation failures:
1704/// - Empty pool
1705/// - Duplicate names
1706/// - Multiple entries marked `default = true`
1707/// - Individual entry validation errors
1708pub fn validate_pool(entries: &[ProviderEntry]) -> Result<(), crate::error::ConfigError> {
1709    use crate::error::ConfigError;
1710    use std::collections::HashSet;
1711
1712    if entries.is_empty() {
1713        return Err(ConfigError::Validation(
1714            "at least one LLM provider must be configured in [[llm.providers]]".into(),
1715        ));
1716    }
1717
1718    let default_count = entries.iter().filter(|e| e.default).count();
1719    if default_count > 1 {
1720        return Err(ConfigError::Validation(
1721            "only one [[llm.providers]] entry can be marked `default = true`".into(),
1722        ));
1723    }
1724
1725    let mut seen_names: HashSet<String> = HashSet::new();
1726    for entry in entries {
1727        let name = entry.effective_name();
1728        if !seen_names.insert(name.clone()) {
1729            return Err(ConfigError::Validation(format!(
1730                "duplicate provider name \"{name}\" in [[llm.providers]]"
1731            )));
1732        }
1733        entry.validate()?;
1734    }
1735
1736    Ok(())
1737}
1738
1739#[cfg(test)]
1740mod tests {
1741    use super::*;
1742
1743    fn ollama_entry() -> ProviderEntry {
1744        ProviderEntry {
1745            provider_type: ProviderKind::Ollama,
1746            name: Some("ollama".into()),
1747            model: Some("qwen3:8b".into()),
1748            ..Default::default()
1749        }
1750    }
1751
1752    fn claude_entry() -> ProviderEntry {
1753        ProviderEntry {
1754            provider_type: ProviderKind::Claude,
1755            name: Some("claude".into()),
1756            model: Some("claude-sonnet-4-6".into()),
1757            max_tokens: Some(8192),
1758            ..Default::default()
1759        }
1760    }
1761
1762    // ─── ProviderEntry::validate ─────────────────────────────────────────────
1763
1764    #[test]
1765    fn validate_ollama_valid() {
1766        assert!(ollama_entry().validate().is_ok());
1767    }
1768
1769    #[test]
1770    fn validate_claude_valid() {
1771        assert!(claude_entry().validate().is_ok());
1772    }
1773
1774    #[test]
1775    fn validate_compatible_without_name_errors() {
1776        let entry = ProviderEntry {
1777            provider_type: ProviderKind::Compatible,
1778            name: None,
1779            ..Default::default()
1780        };
1781        let err = entry.validate().unwrap_err();
1782        assert!(
1783            err.to_string().contains("compatible"),
1784            "error should mention compatible: {err}"
1785        );
1786    }
1787
1788    #[test]
1789    fn validate_compatible_with_name_ok() {
1790        let entry = ProviderEntry {
1791            provider_type: ProviderKind::Compatible,
1792            name: Some("my-proxy".into()),
1793            base_url: Some("http://localhost:8080".into()),
1794            model: Some("gpt-4o".into()),
1795            max_tokens: Some(4096),
1796            ..Default::default()
1797        };
1798        assert!(entry.validate().is_ok());
1799    }
1800
1801    #[test]
1802    fn validate_openai_valid() {
1803        let entry = ProviderEntry {
1804            provider_type: ProviderKind::OpenAi,
1805            name: Some("openai".into()),
1806            model: Some("gpt-4o".into()),
1807            max_tokens: Some(4096),
1808            ..Default::default()
1809        };
1810        assert!(entry.validate().is_ok());
1811    }
1812
1813    #[test]
1814    fn validate_gemini_valid() {
1815        let entry = ProviderEntry {
1816            provider_type: ProviderKind::Gemini,
1817            name: Some("gemini".into()),
1818            model: Some("gemini-2.0-flash".into()),
1819            ..Default::default()
1820        };
1821        assert!(entry.validate().is_ok());
1822    }
1823
1824    // ─── validate_pool ───────────────────────────────────────────────────────
1825
1826    #[test]
1827    fn validate_pool_empty_errors() {
1828        let err = validate_pool(&[]).unwrap_err();
1829        assert!(err.to_string().contains("at least one"), "{err}");
1830    }
1831
1832    #[test]
1833    fn validate_pool_single_entry_ok() {
1834        assert!(validate_pool(&[ollama_entry()]).is_ok());
1835    }
1836
1837    #[test]
1838    fn validate_pool_duplicate_names_errors() {
1839        let a = ollama_entry();
1840        let b = ollama_entry(); // same effective name "ollama"
1841        let err = validate_pool(&[a, b]).unwrap_err();
1842        assert!(err.to_string().contains("duplicate"), "{err}");
1843    }
1844
1845    #[test]
1846    fn validate_pool_multiple_defaults_errors() {
1847        let mut a = ollama_entry();
1848        let mut b = claude_entry();
1849        a.default = true;
1850        b.default = true;
1851        let err = validate_pool(&[a, b]).unwrap_err();
1852        assert!(err.to_string().contains("default"), "{err}");
1853    }
1854
1855    #[test]
1856    fn validate_pool_two_different_providers_ok() {
1857        assert!(validate_pool(&[ollama_entry(), claude_entry()]).is_ok());
1858    }
1859
1860    #[test]
1861    fn validate_pool_propagates_entry_error() {
1862        let bad = ProviderEntry {
1863            provider_type: ProviderKind::Compatible,
1864            name: None, // invalid: compatible without name
1865            ..Default::default()
1866        };
1867        assert!(validate_pool(&[bad]).is_err());
1868    }
1869
1870    // ─── ProviderEntry::effective_model ──────────────────────────────────────
1871
1872    #[test]
1873    fn effective_model_returns_explicit_when_set() {
1874        let entry = ProviderEntry {
1875            provider_type: ProviderKind::Claude,
1876            model: Some("claude-sonnet-4-6".into()),
1877            ..Default::default()
1878        };
1879        assert_eq!(entry.effective_model(), "claude-sonnet-4-6");
1880    }
1881
1882    #[test]
1883    fn effective_model_ollama_default_when_none() {
1884        let entry = ProviderEntry {
1885            provider_type: ProviderKind::Ollama,
1886            model: None,
1887            ..Default::default()
1888        };
1889        assert_eq!(entry.effective_model(), "qwen3:8b");
1890    }
1891
1892    #[test]
1893    fn effective_model_claude_default_when_none() {
1894        let entry = ProviderEntry {
1895            provider_type: ProviderKind::Claude,
1896            model: None,
1897            ..Default::default()
1898        };
1899        assert_eq!(entry.effective_model(), "claude-haiku-4-5-20251001");
1900    }
1901
1902    #[test]
1903    fn effective_model_openai_default_when_none() {
1904        let entry = ProviderEntry {
1905            provider_type: ProviderKind::OpenAi,
1906            model: None,
1907            ..Default::default()
1908        };
1909        assert_eq!(entry.effective_model(), "gpt-4o-mini");
1910    }
1911
1912    #[test]
1913    fn effective_model_gemini_default_when_none() {
1914        let entry = ProviderEntry {
1915            provider_type: ProviderKind::Gemini,
1916            model: None,
1917            ..Default::default()
1918        };
1919        assert_eq!(entry.effective_model(), "gemini-2.0-flash");
1920    }
1921
1922    // ─── LlmConfig::check_legacy_format ──────────────────────────────────────
1923
1924    // Parse a complete TOML snippet that includes the [llm] header.
1925    fn parse_llm(toml: &str) -> LlmConfig {
1926        #[derive(serde::Deserialize)]
1927        struct Wrapper {
1928            llm: LlmConfig,
1929        }
1930        toml::from_str::<Wrapper>(toml).unwrap().llm
1931    }
1932
1933    #[test]
1934    fn check_legacy_format_new_format_ok() {
1935        let cfg = parse_llm(
1936            r#"
1937[llm]
1938
1939[[llm.providers]]
1940type = "ollama"
1941model = "qwen3:8b"
1942"#,
1943        );
1944        assert!(cfg.check_legacy_format().is_ok());
1945    }
1946
1947    #[test]
1948    fn check_legacy_format_empty_providers_no_legacy_ok() {
1949        // No providers, no legacy fields — passes (empty [llm] is acceptable here)
1950        let cfg = parse_llm("[llm]\n");
1951        assert!(cfg.check_legacy_format().is_ok());
1952    }
1953
1954    // ─── LlmConfig::effective_* helpers ──────────────────────────────────────
1955
1956    #[test]
1957    fn effective_provider_falls_back_to_ollama_when_no_providers() {
1958        let cfg = parse_llm("[llm]\n");
1959        assert_eq!(cfg.effective_provider(), ProviderKind::Ollama);
1960    }
1961
1962    #[test]
1963    fn effective_provider_reads_from_providers_first() {
1964        let cfg = parse_llm(
1965            r#"
1966[llm]
1967
1968[[llm.providers]]
1969type = "claude"
1970model = "claude-sonnet-4-6"
1971"#,
1972        );
1973        assert_eq!(cfg.effective_provider(), ProviderKind::Claude);
1974    }
1975
1976    #[test]
1977    fn effective_model_reads_from_providers_first() {
1978        let cfg = parse_llm(
1979            r#"
1980[llm]
1981
1982[[llm.providers]]
1983type = "ollama"
1984model = "qwen3:8b"
1985"#,
1986        );
1987        assert_eq!(cfg.effective_model(), "qwen3:8b");
1988    }
1989
1990    #[test]
1991    fn effective_model_skips_embed_only_provider() {
1992        let cfg = parse_llm(
1993            r#"
1994[llm]
1995
1996[[llm.providers]]
1997type = "ollama"
1998model = "gemma4:26b"
1999embed = true
2000
2001[[llm.providers]]
2002type = "openai"
2003model = "gpt-4o-mini"
2004"#,
2005        );
2006        assert_eq!(cfg.effective_model(), "gpt-4o-mini");
2007    }
2008
2009    #[test]
2010    fn effective_base_url_default_when_absent() {
2011        let cfg = parse_llm("[llm]\n");
2012        assert_eq!(cfg.effective_base_url(), "http://localhost:11434");
2013    }
2014
2015    #[test]
2016    fn effective_base_url_from_providers_entry() {
2017        let cfg = parse_llm(
2018            r#"
2019[llm]
2020
2021[[llm.providers]]
2022type = "ollama"
2023base_url = "http://myhost:11434"
2024"#,
2025        );
2026        assert_eq!(cfg.effective_base_url(), "http://myhost:11434");
2027    }
2028
2029    // ─── ComplexityRoutingConfig / LlmRoutingStrategy::Triage TOML parsing ──
2030
2031    #[test]
2032    fn complexity_routing_defaults() {
2033        let cr = ComplexityRoutingConfig::default();
2034        assert!(
2035            cr.bypass_single_provider,
2036            "bypass_single_provider must default to true"
2037        );
2038        assert_eq!(cr.triage_timeout_secs, 5);
2039        assert_eq!(cr.max_triage_tokens, 50);
2040        assert!(cr.triage_provider.is_none());
2041        assert!(cr.tiers.simple.is_none());
2042    }
2043
2044    #[test]
2045    fn complexity_routing_toml_round_trip() {
2046        let cfg = parse_llm(
2047            r#"
2048[llm]
2049routing = "triage"
2050
2051[llm.complexity_routing]
2052triage_provider = "fast"
2053bypass_single_provider = false
2054triage_timeout_secs = 10
2055max_triage_tokens = 100
2056
2057[llm.complexity_routing.tiers]
2058simple = "fast"
2059medium = "medium"
2060complex = "large"
2061expert = "opus"
2062"#,
2063        );
2064        assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2065        let cr = cfg
2066            .complexity_routing
2067            .expect("complexity_routing must be present");
2068        assert_eq!(cr.triage_provider.as_deref(), Some("fast"));
2069        assert!(!cr.bypass_single_provider);
2070        assert_eq!(cr.triage_timeout_secs, 10);
2071        assert_eq!(cr.max_triage_tokens, 100);
2072        assert_eq!(cr.tiers.simple.as_deref(), Some("fast"));
2073        assert_eq!(cr.tiers.medium.as_deref(), Some("medium"));
2074        assert_eq!(cr.tiers.complex.as_deref(), Some("large"));
2075        assert_eq!(cr.tiers.expert.as_deref(), Some("opus"));
2076    }
2077
2078    #[test]
2079    fn complexity_routing_partial_tiers_toml() {
2080        // Only simple + complex configured; medium and expert are None.
2081        let cfg = parse_llm(
2082            r#"
2083[llm]
2084routing = "triage"
2085
2086[llm.complexity_routing.tiers]
2087simple = "haiku"
2088complex = "sonnet"
2089"#,
2090        );
2091        let cr = cfg
2092            .complexity_routing
2093            .expect("complexity_routing must be present");
2094        assert_eq!(cr.tiers.simple.as_deref(), Some("haiku"));
2095        assert!(cr.tiers.medium.is_none());
2096        assert_eq!(cr.tiers.complex.as_deref(), Some("sonnet"));
2097        assert!(cr.tiers.expert.is_none());
2098        // Defaults still applied.
2099        assert!(cr.bypass_single_provider);
2100        assert_eq!(cr.triage_timeout_secs, 5);
2101    }
2102
2103    #[test]
2104    fn routing_strategy_triage_deserialized() {
2105        let cfg = parse_llm(
2106            r#"
2107[llm]
2108routing = "triage"
2109"#,
2110        );
2111        assert!(matches!(cfg.routing, LlmRoutingStrategy::Triage));
2112    }
2113
2114    // ─── stt_provider_entry ───────────────────────────────────────────────────
2115
2116    #[test]
2117    fn stt_provider_entry_by_name_match() {
2118        let cfg = parse_llm(
2119            r#"
2120[llm]
2121
2122[[llm.providers]]
2123type = "openai"
2124name = "quality"
2125model = "gpt-5.4"
2126stt_model = "gpt-4o-mini-transcribe"
2127
2128[llm.stt]
2129provider = "quality"
2130"#,
2131        );
2132        let entry = cfg.stt_provider_entry().expect("should find stt provider");
2133        assert_eq!(entry.effective_name(), "quality");
2134        assert_eq!(entry.stt_model.as_deref(), Some("gpt-4o-mini-transcribe"));
2135    }
2136
2137    #[test]
2138    fn stt_provider_entry_auto_detect_when_provider_empty() {
2139        let cfg = parse_llm(
2140            r#"
2141[llm]
2142
2143[[llm.providers]]
2144type = "openai"
2145name = "openai-stt"
2146stt_model = "whisper-1"
2147
2148[llm.stt]
2149provider = ""
2150"#,
2151        );
2152        let entry = cfg.stt_provider_entry().expect("should auto-detect");
2153        assert_eq!(entry.effective_name(), "openai-stt");
2154    }
2155
2156    #[test]
2157    fn stt_provider_entry_auto_detect_no_stt_section() {
2158        let cfg = parse_llm(
2159            r#"
2160[llm]
2161
2162[[llm.providers]]
2163type = "openai"
2164name = "openai-stt"
2165stt_model = "whisper-1"
2166"#,
2167        );
2168        // No [llm.stt] section — should still find first provider with stt_model.
2169        let entry = cfg.stt_provider_entry().expect("should auto-detect");
2170        assert_eq!(entry.effective_name(), "openai-stt");
2171    }
2172
2173    #[test]
2174    fn stt_provider_entry_none_when_no_stt_model() {
2175        let cfg = parse_llm(
2176            r#"
2177[llm]
2178
2179[[llm.providers]]
2180type = "openai"
2181name = "quality"
2182model = "gpt-5.4"
2183"#,
2184        );
2185        assert!(cfg.stt_provider_entry().is_none());
2186    }
2187
2188    #[test]
2189    fn stt_provider_entry_name_mismatch_falls_back_to_none() {
2190        // Named provider exists but has no stt_model; another unnamed has stt_model.
2191        let cfg = parse_llm(
2192            r#"
2193[llm]
2194
2195[[llm.providers]]
2196type = "openai"
2197name = "quality"
2198model = "gpt-5.4"
2199
2200[[llm.providers]]
2201type = "openai"
2202name = "openai-stt"
2203stt_model = "whisper-1"
2204
2205[llm.stt]
2206provider = "quality"
2207"#,
2208        );
2209        // "quality" has no stt_model — returns None for name-based lookup.
2210        assert!(cfg.stt_provider_entry().is_none());
2211    }
2212
2213    #[test]
2214    fn stt_config_deserializes_new_slim_format() {
2215        let cfg = parse_llm(
2216            r#"
2217[llm]
2218
2219[[llm.providers]]
2220type = "openai"
2221name = "quality"
2222stt_model = "whisper-1"
2223
2224[llm.stt]
2225provider = "quality"
2226language = "en"
2227"#,
2228        );
2229        let stt = cfg.stt.as_ref().expect("stt section present");
2230        assert_eq!(stt.provider, "quality");
2231        assert_eq!(stt.language, "en");
2232    }
2233
2234    #[test]
2235    fn stt_config_default_provider_is_empty() {
2236        // Verify that W4 fix: default_stt_provider() returns "" not "whisper".
2237        assert_eq!(default_stt_provider(), "");
2238    }
2239
2240    #[test]
2241    fn validate_stt_missing_provider_ok() {
2242        let cfg = parse_llm("[llm]\n");
2243        assert!(cfg.validate_stt().is_ok());
2244    }
2245
2246    #[test]
2247    fn validate_stt_valid_reference() {
2248        let cfg = parse_llm(
2249            r#"
2250[llm]
2251
2252[[llm.providers]]
2253type = "openai"
2254name = "quality"
2255stt_model = "whisper-1"
2256
2257[llm.stt]
2258provider = "quality"
2259"#,
2260        );
2261        assert!(cfg.validate_stt().is_ok());
2262    }
2263
2264    #[test]
2265    fn validate_stt_nonexistent_provider_errors() {
2266        let cfg = parse_llm(
2267            r#"
2268[llm]
2269
2270[[llm.providers]]
2271type = "openai"
2272name = "quality"
2273model = "gpt-5.4"
2274
2275[llm.stt]
2276provider = "nonexistent"
2277"#,
2278        );
2279        assert!(cfg.validate_stt().is_err());
2280    }
2281
2282    #[test]
2283    fn validate_stt_provider_exists_but_no_stt_model_returns_ok_with_warn() {
2284        // MEDIUM: provider is found but has no stt_model — should return Ok (warn path, not error).
2285        let cfg = parse_llm(
2286            r#"
2287[llm]
2288
2289[[llm.providers]]
2290type = "openai"
2291name = "quality"
2292model = "gpt-5.4"
2293
2294[llm.stt]
2295provider = "quality"
2296"#,
2297        );
2298        // validate_stt must succeed (only a tracing::warn is emitted — not an error).
2299        assert!(cfg.validate_stt().is_ok());
2300        // stt_provider_entry must return None because no stt_model is set.
2301        assert!(
2302            cfg.stt_provider_entry().is_none(),
2303            "stt_provider_entry must be None when provider has no stt_model"
2304        );
2305    }
2306
2307    // ─── BanditConfig::warmup_queries deserialization ─────────────────────────
2308
2309    #[test]
2310    fn bandit_warmup_queries_explicit_value_is_deserialized() {
2311        let cfg = parse_llm(
2312            r#"
2313[llm]
2314
2315[llm.router]
2316strategy = "bandit"
2317
2318[llm.router.bandit]
2319warmup_queries = 50
2320"#,
2321        );
2322        let bandit = cfg
2323            .router
2324            .expect("router section must be present")
2325            .bandit
2326            .expect("bandit section must be present");
2327        assert_eq!(
2328            bandit.warmup_queries,
2329            Some(50),
2330            "warmup_queries = 50 must deserialize to Some(50)"
2331        );
2332    }
2333
2334    #[test]
2335    fn bandit_warmup_queries_explicit_null_is_none() {
2336        // Explicitly writing the field as absent: field simply not present is
2337        // equivalent due to #[serde(default)]. Test that an explicit 0 is Some(0).
2338        let cfg = parse_llm(
2339            r#"
2340[llm]
2341
2342[llm.router]
2343strategy = "bandit"
2344
2345[llm.router.bandit]
2346warmup_queries = 0
2347"#,
2348        );
2349        let bandit = cfg
2350            .router
2351            .expect("router section must be present")
2352            .bandit
2353            .expect("bandit section must be present");
2354        // 0 is a valid explicit value — it means "preserve computed default".
2355        assert_eq!(
2356            bandit.warmup_queries,
2357            Some(0),
2358            "warmup_queries = 0 must deserialize to Some(0)"
2359        );
2360    }
2361
2362    #[test]
2363    fn bandit_warmup_queries_missing_field_defaults_to_none() {
2364        // When warmup_queries is omitted entirely, #[serde(default)] must produce None.
2365        let cfg = parse_llm(
2366            r#"
2367[llm]
2368
2369[llm.router]
2370strategy = "bandit"
2371
2372[llm.router.bandit]
2373alpha = 1.5
2374"#,
2375        );
2376        let bandit = cfg
2377            .router
2378            .expect("router section must be present")
2379            .bandit
2380            .expect("bandit section must be present");
2381        assert_eq!(
2382            bandit.warmup_queries, None,
2383            "omitted warmup_queries must default to None"
2384        );
2385    }
2386
2387    #[test]
2388    fn provider_name_new_and_as_str() {
2389        let n = ProviderName::new("fast");
2390        assert_eq!(n.as_str(), "fast");
2391        assert!(!n.is_empty());
2392    }
2393
2394    #[test]
2395    fn provider_name_default_is_empty() {
2396        let n = ProviderName::default();
2397        assert!(n.is_empty());
2398        assert_eq!(n.as_str(), "");
2399    }
2400
2401    #[test]
2402    fn provider_name_deref_to_str() {
2403        let n = ProviderName::new("quality");
2404        let s: &str = &n;
2405        assert_eq!(s, "quality");
2406    }
2407
2408    #[test]
2409    fn provider_name_partial_eq_str() {
2410        let n = ProviderName::new("fast");
2411        assert_eq!(n, "fast");
2412        assert_ne!(n, "slow");
2413    }
2414
2415    #[test]
2416    fn provider_name_serde_roundtrip() {
2417        let n = ProviderName::new("my-provider");
2418        let json = serde_json::to_string(&n).expect("serialize");
2419        assert_eq!(json, "\"my-provider\"");
2420        let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2421        assert_eq!(back, n);
2422    }
2423
2424    #[test]
2425    fn provider_name_serde_empty_roundtrip() {
2426        let n = ProviderName::default();
2427        let json = serde_json::to_string(&n).expect("serialize");
2428        assert_eq!(json, "\"\"");
2429        let back: ProviderName = serde_json::from_str(&json).expect("deserialize");
2430        assert_eq!(back, n);
2431        assert!(back.is_empty());
2432    }
2433
2434    // ─── GonkaNode / ProviderKind::Gonka ─────────────────────────────────────
2435
2436    fn gonka_entry_with_nodes(nodes: Vec<GonkaNode>) -> ProviderEntry {
2437        ProviderEntry {
2438            provider_type: ProviderKind::Gonka,
2439            name: Some("my-gonka".into()),
2440            gonka_nodes: nodes,
2441            ..Default::default()
2442        }
2443    }
2444
2445    fn valid_gonka_nodes() -> Vec<GonkaNode> {
2446        vec![
2447            GonkaNode {
2448                url: "https://node1.gonka.ai".into(),
2449                address: "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6".into(),
2450                name: Some("node1".into()),
2451            },
2452            GonkaNode {
2453                url: "https://node2.gonka.ai".into(),
2454                address: "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum".into(),
2455                name: Some("node2".into()),
2456            },
2457            GonkaNode {
2458                url: "http://node3.internal".into(),
2459                address: "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg".into(),
2460                name: None,
2461            },
2462        ]
2463    }
2464
2465    #[test]
2466    fn validate_gonka_valid() {
2467        let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2468        assert!(entry.validate().is_ok());
2469    }
2470
2471    #[test]
2472    fn validate_gonka_empty_nodes_errors() {
2473        let entry = gonka_entry_with_nodes(vec![]);
2474        let err = entry.validate().unwrap_err();
2475        assert!(
2476            err.to_string().contains("gonka_nodes"),
2477            "error should mention gonka_nodes: {err}"
2478        );
2479    }
2480
2481    #[test]
2482    fn validate_gonka_node_empty_url_errors() {
2483        let entry = gonka_entry_with_nodes(vec![GonkaNode {
2484            url: String::new(),
2485            address: "gonka1test".into(),
2486            name: None,
2487        }]);
2488        let err = entry.validate().unwrap_err();
2489        assert!(err.to_string().contains("url"), "{err}");
2490    }
2491
2492    #[test]
2493    fn validate_gonka_node_invalid_scheme_errors() {
2494        let entry = gonka_entry_with_nodes(vec![GonkaNode {
2495            url: "ftp://node.gonka.ai".into(),
2496            address: "gonka1test".into(),
2497            name: None,
2498        }]);
2499        let err = entry.validate().unwrap_err();
2500        assert!(err.to_string().contains("http"), "{err}");
2501    }
2502
2503    #[test]
2504    fn validate_gonka_without_name_errors() {
2505        let entry = ProviderEntry {
2506            provider_type: ProviderKind::Gonka,
2507            name: None,
2508            gonka_nodes: valid_gonka_nodes(),
2509            ..Default::default()
2510        };
2511        let err = entry.validate().unwrap_err();
2512        assert!(err.to_string().contains("gonka"), "{err}");
2513    }
2514
2515    #[test]
2516    fn gonka_toml_round_trip() {
2517        let toml = r#"
2518[llm]
2519
2520[[llm.providers]]
2521type = "gonka"
2522name = "my-gonka"
2523gonka_chain_prefix = "custom-chain"
2524
2525[[llm.providers.gonka_nodes]]
2526url = "https://node1.gonka.ai"
2527address = "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2528name = "node1"
2529
2530[[llm.providers.gonka_nodes]]
2531url = "https://node2.gonka.ai"
2532address = "gonka14h0ycu78h88wzldxc7e79vhw5xsde0n85evmum"
2533name = "node2"
2534
2535[[llm.providers.gonka_nodes]]
2536url = "https://node3.gonka.ai"
2537address = "gonka1qyqszqgpqyqszqgpqyqszqgpqyqszqgpqyqszqg"
2538"#;
2539        let cfg = parse_llm(toml);
2540        assert_eq!(cfg.providers.len(), 1);
2541        let entry = &cfg.providers[0];
2542        assert_eq!(entry.provider_type, ProviderKind::Gonka);
2543        assert_eq!(entry.name.as_deref(), Some("my-gonka"));
2544        let nodes = &entry.gonka_nodes;
2545        assert_eq!(nodes.len(), 3);
2546        assert_eq!(nodes[0].url, "https://node1.gonka.ai");
2547        assert_eq!(
2548            nodes[0].address,
2549            "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6"
2550        );
2551        assert_eq!(nodes[0].name.as_deref(), Some("node1"));
2552        assert_eq!(nodes[2].name, None);
2553        assert_eq!(entry.gonka_chain_prefix.as_deref(), Some("custom-chain"));
2554    }
2555
2556    #[test]
2557    fn gonka_default_chain_prefix() {
2558        let entry = gonka_entry_with_nodes(valid_gonka_nodes());
2559        assert_eq!(entry.effective_gonka_chain_prefix(), "gonka");
2560    }
2561
2562    #[test]
2563    fn gonka_explicit_chain_prefix() {
2564        let entry = ProviderEntry {
2565            provider_type: ProviderKind::Gonka,
2566            name: Some("my-gonka".into()),
2567            gonka_nodes: valid_gonka_nodes(),
2568            gonka_chain_prefix: Some("my-chain".into()),
2569            ..Default::default()
2570        };
2571        assert_eq!(entry.effective_gonka_chain_prefix(), "my-chain");
2572    }
2573
2574    #[test]
2575    fn effective_model_gonka_is_empty() {
2576        let entry = ProviderEntry {
2577            provider_type: ProviderKind::Gonka,
2578            model: None,
2579            ..Default::default()
2580        };
2581        assert_eq!(entry.effective_model(), "");
2582    }
2583
2584    #[test]
2585    fn existing_configs_still_parse() {
2586        let toml = r#"
2587[llm]
2588
2589[[llm.providers]]
2590type = "ollama"
2591model = "qwen3:8b"
2592
2593[[llm.providers]]
2594type = "claude"
2595name = "claude"
2596model = "claude-sonnet-4-6"
2597"#;
2598        let cfg = parse_llm(toml);
2599        assert_eq!(cfg.providers.len(), 2);
2600        assert_eq!(cfg.providers[0].provider_type, ProviderKind::Ollama);
2601        assert_eq!(cfg.providers[1].provider_type, ProviderKind::Claude);
2602    }
2603}
zeph_config/providers.rs

zeph_config/
providers.rs