Struct LlmConfig

Source

pub struct LlmConfig {Show 19 fields
    pub providers: Vec<ProviderEntry>,
    pub routing: LlmRoutingStrategy,
    pub routes: HashMap<String, Vec<String>>,
    pub embedding_model: String,
    pub candle: Option<CandleConfig>,
    pub stt: Option<SttConfig>,
    pub response_cache_enabled: bool,
    pub response_cache_ttl_secs: u64,
    pub semantic_cache_enabled: bool,
    pub semantic_cache_threshold: f32,
    pub semantic_cache_max_candidates: u32,
    pub router_ema_enabled: bool,
    pub router_ema_alpha: f64,
    pub router_reorder_interval: u64,
    pub router: Option<RouterConfig>,
    pub instruction_file: Option<PathBuf>,
    pub summary_model: Option<String>,
    pub summary_provider: Option<ProviderEntry>,
    pub complexity_routing: Option<ComplexityRoutingConfig>,
}

Expand description

LLM configuration, nested under [llm] in TOML.

Declares the provider pool and controls routing, embedding, caching, and STT. All providers are declared in [[llm.providers]]; subsystems reference them by the name field using a *_provider config key.

§Example (TOML)

[[llm.providers]]
name = "fast"
type = "openai"
model = "gpt-4o-mini"

[[llm.providers]]
name = "quality"
type = "claude"
model = "claude-opus-4-5"

[llm]
routing = "none"
embedding_model = "qwen3-embedding"

Fields§

§providers: Vec<ProviderEntry>

Provider pool. First entry is default unless one is marked default = true.

§routing: LlmRoutingStrategy

Routing strategy for multi-provider configs.

§routes: HashMap<String, Vec<String>>

Task-based routes (only used when routing = "task").

§embedding_model: String§candle: Option<CandleConfig>§stt: Option<SttConfig>§response_cache_enabled: bool§response_cache_ttl_secs: u64§semantic_cache_enabled: bool

Enable semantic similarity-based response caching. Requires embedding support.

§semantic_cache_threshold: f32

Cosine similarity threshold for semantic cache hits (0.0–1.0).

Only the highest-scoring candidate above this threshold is returned. Lower values produce more cache hits but risk returning less relevant responses. Recommended range: 0.92–0.98; default: 0.95.

§semantic_cache_max_candidates: u32

Maximum cached entries to examine per semantic lookup (SQL LIMIT clause in ResponseCache::get_semantic()). Controls the recall-vs-performance tradeoff:

Higher values (e.g. 50): scan more entries, better chance of finding a semantically similar cached response, but slower queries.
Lower values (e.g. 5): faster queries, but may miss relevant cached entries when the cache is large.
Default (10): balanced middle ground for typical workloads.

Tuning guidance: set to 50+ when recall matters more than latency (e.g. long-running sessions with many cached responses); reduce to 5 for low-latency interactive use. Env override: ZEPH_LLM_SEMANTIC_CACHE_MAX_CANDIDATES.

§router_ema_enabled: bool§router_ema_alpha: f64§router_reorder_interval: u64§router: Option<RouterConfig>

Routing configuration for Thompson/Cascade strategies.

§instruction_file: Option<PathBuf>

Provider-specific instruction file to inject into the system prompt. Merged with agent.instruction_files at startup.

§summary_model: Option<String>

Shorthand model spec for tool-pair summarization and context compaction. Format: ollama/<model>, claude[/<model>], openai[/<model>], compatible/<name>, candle. Ignored when [llm.summary_provider] is set.

§summary_provider: Option<ProviderEntry>

Structured provider config for summarization. Takes precedence over summary_model.

§complexity_routing: Option<ComplexityRoutingConfig>

Complexity triage routing configuration. Required when routing = "triage".

Struct LlmConfig Copy item path

§Example (TOML)

Fields§

Implementations§

impl LlmConfig

pub fn effective_provider(&self) -> ProviderKind

pub fn effective_base_url(&self) -> &str

pub fn effective_model(&self) -> &str

pub fn stt_provider_entry(&self) -> Option<&ProviderEntry>

pub fn check_legacy_format(&self) -> Result<(), ConfigError>

§Errors

pub fn validate_stt(&self) -> Result<(), ConfigError>

§Errors

Trait Implementations§

impl Debug for LlmConfig

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

impl<'de> Deserialize<'de> for LlmConfig

fn deserialize<__D>( __deserializer: __D, ) -> Result<LlmConfig, <__D as Deserializer<'de>>::Error>where __D: Deserializer<'de>,

impl Serialize for LlmConfig

fn serialize<__S>( &self, __serializer: __S, ) -> Result<<__S as Serializer>::Ok, <__S as Serializer>::Error>where __S: Serializer,

Auto Trait Implementations§

impl Freeze for LlmConfig

impl RefUnwindSafe for LlmConfig

impl Send for LlmConfig

impl Sync for LlmConfig

impl Unpin for LlmConfig

impl UnsafeUnpin for LlmConfig

impl UnwindSafe for LlmConfig

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> IntoRequest<T> for T

fn into_request(self) -> Request<T>

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T> Same for T

type Output = T

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

Struct LlmConfig

fn deserialize<D>( deserializer: D, ) -> Result<LlmConfig, <D as Deserializer<'de>>::Error>
where __D: Deserializer<'de>,

fn serialize<S>( &self, serializer: S, ) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,