pub struct LlmConfig {Show 19 fields
pub providers: Vec<ProviderEntry>,
pub routing: LlmRoutingStrategy,
pub routes: HashMap<String, Vec<String>>,
pub embedding_model: String,
pub candle: Option<CandleConfig>,
pub stt: Option<SttConfig>,
pub response_cache_enabled: bool,
pub response_cache_ttl_secs: u64,
pub semantic_cache_enabled: bool,
pub semantic_cache_threshold: f32,
pub semantic_cache_max_candidates: u32,
pub router_ema_enabled: bool,
pub router_ema_alpha: f64,
pub router_reorder_interval: u64,
pub router: Option<RouterConfig>,
pub instruction_file: Option<PathBuf>,
pub summary_model: Option<String>,
pub summary_provider: Option<ProviderEntry>,
pub complexity_routing: Option<ComplexityRoutingConfig>,
}Expand description
LLM configuration, nested under [llm] in TOML.
Declares the provider pool and controls routing, embedding, caching, and STT.
All providers are declared in [[llm.providers]]; subsystems reference them by
the name field using a *_provider config key.
§Example (TOML)
[[llm.providers]]
name = "fast"
type = "openai"
model = "gpt-4o-mini"
[[llm.providers]]
name = "quality"
type = "claude"
model = "claude-opus-4-5"
[llm]
routing = "none"
embedding_model = "qwen3-embedding"Fields§
§providers: Vec<ProviderEntry>Provider pool. First entry is default unless one is marked default = true.
routing: LlmRoutingStrategyRouting strategy for multi-provider configs.
routes: HashMap<String, Vec<String>>Task-based routes (only used when routing = "task").
embedding_model: String§candle: Option<CandleConfig>§stt: Option<SttConfig>§response_cache_enabled: bool§response_cache_ttl_secs: u64§semantic_cache_enabled: boolEnable semantic similarity-based response caching. Requires embedding support.
semantic_cache_threshold: f32Cosine similarity threshold for semantic cache hits (0.0–1.0).
Only the highest-scoring candidate above this threshold is returned. Lower values produce more cache hits but risk returning less relevant responses. Recommended range: 0.92–0.98; default: 0.95.
semantic_cache_max_candidates: u32Maximum cached entries to examine per semantic lookup (SQL LIMIT clause in
ResponseCache::get_semantic()). Controls the recall-vs-performance tradeoff:
- Higher values (e.g. 50): scan more entries, better chance of finding a semantically similar cached response, but slower queries.
- Lower values (e.g. 5): faster queries, but may miss relevant cached entries when the cache is large.
- Default (10): balanced middle ground for typical workloads.
Tuning guidance: set to 50+ when recall matters more than latency (e.g. long-running
sessions with many cached responses); reduce to 5 for low-latency interactive use.
Env override: ZEPH_LLM_SEMANTIC_CACHE_MAX_CANDIDATES.
router_ema_enabled: bool§router_ema_alpha: f64§router_reorder_interval: u64§router: Option<RouterConfig>Routing configuration for Thompson/Cascade strategies.
instruction_file: Option<PathBuf>Provider-specific instruction file to inject into the system prompt.
Merged with agent.instruction_files at startup.
summary_model: Option<String>Shorthand model spec for tool-pair summarization and context compaction.
Format: ollama/<model>, claude[/<model>], openai[/<model>], compatible/<name>, candle.
Ignored when [llm.summary_provider] is set.
summary_provider: Option<ProviderEntry>Structured provider config for summarization. Takes precedence over summary_model.
complexity_routing: Option<ComplexityRoutingConfig>Complexity triage routing configuration. Required when routing = "triage".
Implementations§
Source§impl LlmConfig
impl LlmConfig
Sourcepub fn effective_provider(&self) -> ProviderKind
pub fn effective_provider(&self) -> ProviderKind
Effective provider kind for the primary (first/default) provider in the pool.
Sourcepub fn effective_base_url(&self) -> &str
pub fn effective_base_url(&self) -> &str
Effective base URL for the primary provider.
Sourcepub fn effective_model(&self) -> &str
pub fn effective_model(&self) -> &str
Effective model for the primary provider.
Sourcepub fn stt_provider_entry(&self) -> Option<&ProviderEntry>
pub fn stt_provider_entry(&self) -> Option<&ProviderEntry>
Find the provider entry designated for STT.
Resolution priority:
[llm.stt].providermatches[[llm.providers]].nameand the entry hasstt_model[llm.stt].provideris empty — fall through to auto-detect- First provider with
stt_modelset (auto-detect fallback) None— STT disabled
Sourcepub fn check_legacy_format(&self) -> Result<(), ConfigError>
pub fn check_legacy_format(&self) -> Result<(), ConfigError>
Validate that the config uses the new [[llm.providers]] format.
§Errors
Returns ConfigError::Validation when no providers are configured.
Sourcepub fn validate_stt(&self) -> Result<(), ConfigError>
pub fn validate_stt(&self) -> Result<(), ConfigError>
Validate STT config cross-references.
§Errors
Returns ConfigError::Validation when the referenced STT provider does not exist.
Trait Implementations§
Source§impl<'de> Deserialize<'de> for LlmConfig
impl<'de> Deserialize<'de> for LlmConfig
Source§fn deserialize<__D>(
__deserializer: __D,
) -> Result<LlmConfig, <__D as Deserializer<'de>>::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(
__deserializer: __D,
) -> Result<LlmConfig, <__D as Deserializer<'de>>::Error>where
__D: Deserializer<'de>,
Source§impl Serialize for LlmConfig
impl Serialize for LlmConfig
Source§fn serialize<__S>(
&self,
__serializer: __S,
) -> Result<<__S as Serializer>::Ok, <__S as Serializer>::Error>where
__S: Serializer,
fn serialize<__S>(
&self,
__serializer: __S,
) -> Result<<__S as Serializer>::Ok, <__S as Serializer>::Error>where
__S: Serializer,
Auto Trait Implementations§
impl Freeze for LlmConfig
impl RefUnwindSafe for LlmConfig
impl Send for LlmConfig
impl Sync for LlmConfig
impl Unpin for LlmConfig
impl UnsafeUnpin for LlmConfig
impl UnwindSafe for LlmConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
Source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
T in a tonic::Request