khive_runtime/config.rs
1//! RuntimeConfig, BackendId, NamespaceToken, and embedding model helpers.
2
3use std::sync::Arc;
4
5use khive_db::StorageBackend;
6use khive_gate::{ActorRef, AllowAllGate, GateRef};
7use khive_types::Namespace;
8use lattice_embed::EmbeddingModel;
9
10use crate::error::RuntimeResult;
11
12// ---- BackendId ----
13
14/// Identifies a named backend in a multi-backend deployment.
15///
16/// The `main` backend is the default single-backend name. Multi-backend deployments
17/// assign each `[[backends]]` entry a distinct `BackendId`. The
18/// [`SubstrateCoordinator`](kkernel::coordinator::SubstrateCoordinator) in `kkernel`
19/// uses `BackendId` for node-to-backend resolution and cross-backend edge routing.
20///
21/// A single-backend `KhiveRuntime` always has `BackendId("main")` by default.
22/// The boot path in `kkernel` or `khive-mcp` sets the id via `RuntimeConfig::backend_id`
23/// when constructing per-pack runtimes.
24#[derive(Clone, Debug, PartialEq, Eq, Hash)]
25pub struct BackendId(pub String);
26
27impl BackendId {
28 /// The default single-backend name.
29 pub const MAIN: &'static str = "main";
30
31 /// Construct from a string name.
32 pub fn new(name: impl Into<String>) -> Self {
33 Self(name.into())
34 }
35
36 /// The default `main` backend id.
37 pub fn main() -> Self {
38 Self(Self::MAIN.to_string())
39 }
40
41 /// Return the backend name as a `&str`.
42 pub fn as_str(&self) -> &str {
43 &self.0
44 }
45}
46
47impl std::fmt::Display for BackendId {
48 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49 f.write_str(&self.0)
50 }
51}
52
53// ---- Sealed token ----
54
55mod private {
56 #[derive(Clone, Debug)]
57 pub(crate) struct Sealed;
58}
59
60/// Authorization proof that a caller is permitted to access a specific namespace.
61///
62/// Created by [`VerbRegistry::dispatch`] after the gate approves the request.
63/// The sealed inner field prevents external code from constructing a token
64/// without going through the authorization path.
65#[derive(Clone, Debug)]
66pub struct NamespaceToken {
67 namespace: Namespace,
68 actor: ActorRef,
69 _sealed: private::Sealed,
70}
71
72impl NamespaceToken {
73 /// Mint an authorized token. Only callable from within `khive-runtime`.
74 pub(crate) fn mint_authorized(namespace: Namespace, actor: ActorRef) -> Self {
75 Self {
76 namespace,
77 actor,
78 _sealed: private::Sealed,
79 }
80 }
81
82 /// Convenience constructor for the local namespace with an anonymous actor.
83 ///
84 /// Only callable from within `khive-runtime`. External callers must use
85 /// [`KhiveRuntime::authorize`] to mint tokens.
86 // Used only in #[cfg(test)] blocks within this crate's src/ files.
87 #[allow(dead_code)]
88 pub(crate) fn local() -> Self {
89 Self::mint_authorized(Namespace::local(), ActorRef::anonymous())
90 }
91
92 /// Convenience constructor for a specific namespace with an anonymous actor.
93 ///
94 /// Only callable from within `khive-runtime`. External callers must use
95 /// [`KhiveRuntime::authorize`] to mint tokens.
96 // Used only in #[cfg(test)] blocks within this crate's src/ files.
97 #[allow(dead_code)]
98 pub(crate) fn for_namespace(ns: Namespace) -> Self {
99 Self::mint_authorized(ns, ActorRef::anonymous())
100 }
101
102 /// Return the namespace this token authorises access to.
103 pub fn namespace(&self) -> &Namespace {
104 &self.namespace
105 }
106
107 /// Return the actor reference embedded in this token.
108 pub fn actor(&self) -> &ActorRef {
109 &self.actor
110 }
111
112 /// Return a new token with the same actor but a different namespace.
113 ///
114 /// Used by packs that apply a namespace policy (e.g. the KG pack overrides the
115 /// caller's namespace to `Namespace::local()` so that entity/edge/note records
116 /// always land in the shared graph).
117 pub fn with_namespace(&self, ns: Namespace) -> Self {
118 Self::mint_authorized(ns, self.actor.clone())
119 }
120}
121
122// ---- RuntimeConfig ----
123
124/// Runtime configuration.
125///
126/// The `db_path` and `embedding_model` fields are deprecated in favour of
127/// constructing the backend externally and calling [`KhiveRuntime::from_backend`].
128/// They remain for backward compatibility with tests and single-binary deployments.
129#[derive(Clone, Debug)]
130pub struct RuntimeConfig {
131 /// Path to the SQLite database file. `None` = in-memory (tests).
132 ///
133 /// Deprecated: use [`KhiveRuntime::from_backend`] instead. The boot path
134 /// constructs backends from `khive.toml` (`AppConfig`) and passes them to
135 /// `from_backend`. Direct `db_path` usage persists only in tests.
136 pub db_path: Option<std::path::PathBuf>,
137 /// Namespace used when no explicit namespace is provided.
138 pub default_namespace: Namespace,
139 /// Local embedding model. `None` disables embedding and hybrid vector search;
140 /// `hybrid_search` then falls back to text-only.
141 ///
142 /// Deprecated: embedding engines move to a per-pack `EmbedderRegistry`.
143 /// This field persists for backward compatibility until the embedder registry
144 /// is fully plumbed.
145 pub embedding_model: Option<EmbeddingModel>,
146 /// Additional embedding models to make available by request name.
147 ///
148 /// `embedding_model` remains the default used by existing `embed()` and
149 /// `embed_batch()` callers. This list adds non-default models that can be
150 /// selected with `embedder(name)`, `embed_with_model(...)`, memory
151 /// `remember.embedding_model`, and memory `recall.embedding_model`.
152 pub additional_embedding_models: Vec<EmbeddingModel>,
153 /// Authorization gate consulted before each verb dispatch.
154 /// Default: `AllowAllGate` (permissive). For production policy enforcement,
155 /// plug in a Rego- or capability-witness-backed impl.
156 pub gate: GateRef,
157 /// Names of packs the transport layer should register into the VerbRegistry.
158 /// The transport layer (e.g. `khive-mcp`) reads this list and instantiates
159 /// the matching concrete pack types. Unknown names are reported as errors
160 /// by the transport, not silently ignored.
161 /// Default: `["kg"]`.
162 pub packs: Vec<String>,
163 /// Identifies this runtime's backend in a multi-backend deployment.
164 ///
165 /// Set by the boot path when constructing per-pack runtimes from `khive.toml`.
166 /// Single-backend deployments use the default `BackendId::MAIN`.
167 pub backend_id: BackendId,
168 /// Brain profile to use for `memory.feedback` / `knowledge.feedback` and
169 /// recall-time score boosting (ADR-035 §Brain profile configuration).
170 ///
171 /// Resolution order (highest to lowest, ADR-035): CLI flag, then
172 /// `runtime.brain_profile` in project/global `khive.toml`, then the
173 /// `KHIVE_BRAIN_PROFILE` env var as fallback default. Callers must keep
174 /// env OUT of the base config they pass in (see `khive-mcp` serve.rs).
175 /// 1. `--brain-profile` CLI flag (explicit only)
176 /// 2. Namespace-bound profile resolved via `brain.resolve` at feedback time
177 /// 3. Pack-local global tuning prior (default fallback)
178 pub brain_profile: Option<String>,
179}
180
181/// Parse a comma- or whitespace-separated pack list from a single string.
182///
183/// Empty entries are dropped, surrounding whitespace is trimmed.
184pub fn parse_pack_list(s: &str) -> Vec<String> {
185 s.split(|c: char| c == ',' || c.is_whitespace())
186 .map(str::trim)
187 .filter(|s| !s.is_empty())
188 .map(str::to_owned)
189 .collect()
190}
191
192impl Default for RuntimeConfig {
193 fn default() -> Self {
194 let db_path = std::env::var("HOME")
195 .ok()
196 .map(|h| std::path::PathBuf::from(h).join(".khive/khive.db"));
197 let embedding_model = std::env::var("KHIVE_EMBEDDING_MODEL")
198 .ok()
199 .and_then(|s| s.parse().ok())
200 .or(Some(EmbeddingModel::AllMiniLmL6V2));
201 let additional_embedding_models = std::env::var("KHIVE_ADDITIONAL_EMBEDDING_MODELS")
202 .ok()
203 .map(|s| parse_embedding_model_list(&s))
204 .unwrap_or_else(|| vec![EmbeddingModel::ParaphraseMultilingualMiniLmL12V2]);
205 let packs = std::env::var("KHIVE_PACKS")
206 .ok()
207 .map(|s| parse_pack_list(&s))
208 .filter(|v| !v.is_empty())
209 .unwrap_or_else(|| {
210 vec![
211 "kg",
212 "gtd",
213 "memory",
214 "brain",
215 "comm",
216 "schedule",
217 "knowledge",
218 ]
219 .into_iter()
220 .map(String::from)
221 .collect()
222 });
223 let brain_profile = std::env::var("KHIVE_BRAIN_PROFILE")
224 .ok()
225 .filter(|s| !s.trim().is_empty());
226 Self {
227 db_path,
228 default_namespace: Namespace::local(),
229 embedding_model,
230 additional_embedding_models,
231 gate: Arc::new(AllowAllGate),
232 packs,
233 backend_id: BackendId::main(),
234 brain_profile,
235 }
236 }
237}
238
239// ---- Embedding model helpers ----
240
241/// Sanitize an embedding model name into a valid SQL table suffix.
242/// e.g. `bge-small-en-v1.5` -> `bge_small_en_v1_5`
243pub(crate) fn vec_model_key(model: EmbeddingModel) -> String {
244 sanitize_key(&model.to_string())
245}
246
247pub(crate) fn sanitize_key(s: &str) -> String {
248 s.chars()
249 .map(|c| if c.is_ascii_alphanumeric() { c } else { '_' })
250 .collect()
251}
252
253pub(crate) fn build_embedder_registry(
254 config: &RuntimeConfig,
255) -> (crate::embedder_registry::EmbedderRegistry, Arc<str>) {
256 use crate::embedder_registry::{EmbedderRegistry, LatticeEmbedderProvider};
257 let mut registry = EmbedderRegistry::new();
258 for model in configured_embedding_models(config) {
259 registry.register(LatticeEmbedderProvider::new(model));
260 }
261 let default_embedder_name = config
262 .embedding_model
263 .map(|model| Arc::<str>::from(model.to_string()))
264 .unwrap_or_else(|| Arc::<str>::from(""));
265 (registry, default_embedder_name)
266}
267
268fn configured_embedding_models(config: &RuntimeConfig) -> Vec<EmbeddingModel> {
269 let mut models = Vec::new();
270 if let Some(model) = config.embedding_model {
271 models.push(model);
272 }
273 models.extend(config.additional_embedding_models.iter().copied());
274 models.sort_by_key(|model| model.to_string());
275 models.dedup();
276 models
277}
278
279pub(crate) fn register_configured_embedding_models(
280 backend: &StorageBackend,
281 config: &RuntimeConfig,
282) -> RuntimeResult<()> {
283 for model in configured_embedding_models(config) {
284 backend.register_embedding_model(
285 &model.to_string(),
286 model.model_id(),
287 model.key_version(),
288 model.dimensions() as u32,
289 )?;
290 }
291 Ok(())
292}
293
294/// Build a `RuntimeConfig` from a parsed `KhiveConfig`.
295///
296/// For each `[[engines]]` entry:
297/// - The engine flagged `default = true` becomes `RuntimeConfig::embedding_model`.
298/// - All other engines become `RuntimeConfig::additional_embedding_models`.
299///
300/// Model name validity is checked here: any engine whose `model` field cannot
301/// be parsed via `parse_embedding_model_alias` is skipped with a warning.
302///
303/// If `khive_cfg.engines` is empty, the returned `RuntimeConfig` uses the
304/// env-var-derived defaults from `RuntimeConfig::default()`.
305///
306/// When both a config file and `KHIVE_EMBEDDING_MODEL` env var are present,
307/// the caller is responsible for emitting a warning that env vars are ignored.
308/// This function purely converts `KhiveConfig` to `RuntimeConfig` fields.
309pub fn runtime_config_from_khive_config(
310 khive_cfg: &crate::engine_config::KhiveConfig,
311 base: RuntimeConfig,
312) -> RuntimeConfig {
313 // Apply actor.id as default_namespace when present and valid.
314 // KhiveConfig::validate() guarantees that actor.id, when present, is a
315 // structurally valid Namespace — so the Err arm here is unreachable for
316 // any config that passed load(). A panic here signals a caller contract
317 // violation (passing an unvalidated config).
318 let default_namespace = match khive_cfg.actor.id.as_deref() {
319 Some(id) if !id.is_empty() => match Namespace::parse(id) {
320 Ok(ns) => {
321 tracing::debug!(actor_id = id, "actor.id from config sets default_namespace");
322 ns
323 }
324 Err(e) => {
325 panic!(
326 "actor.id {id:?} passed validation but Namespace::parse failed: {e}; \
327 this is a bug — KhiveConfig must be validated before calling \
328 runtime_config_from_khive_config"
329 );
330 }
331 },
332 _ => base.default_namespace.clone(),
333 };
334
335 // base.brain_profile must carry ONLY the explicit CLI tier — never an env
336 // value (env sits BELOW toml per ADR-035; the MCP resolver applies it after).
337 let brain_profile = base.brain_profile.clone().or_else(|| {
338 khive_cfg
339 .runtime
340 .brain_profile
341 .clone()
342 .filter(|s| !s.trim().is_empty())
343 });
344
345 if khive_cfg.engines.is_empty() {
346 return RuntimeConfig {
347 default_namespace,
348 brain_profile,
349 ..base
350 };
351 }
352
353 let mut embedding_model: Option<EmbeddingModel> = None;
354 let mut additional: Vec<EmbeddingModel> = Vec::new();
355
356 for engine in &khive_cfg.engines {
357 match parse_embedding_model_alias(&engine.model) {
358 Some(model) => {
359 if engine.default {
360 embedding_model = Some(model);
361 } else {
362 additional.push(model);
363 }
364 }
365 None => {
366 tracing::warn!(
367 engine = %engine.name,
368 model = %engine.model,
369 "engine config: unknown model name; engine will be skipped"
370 );
371 }
372 }
373 }
374
375 RuntimeConfig {
376 embedding_model,
377 additional_embedding_models: additional,
378 default_namespace,
379 brain_profile,
380 ..base
381 }
382}
383
384/// Parse a comma- or whitespace-separated list of embedding model names.
385fn parse_embedding_model_list(s: &str) -> Vec<EmbeddingModel> {
386 parse_pack_list(s)
387 .into_iter()
388 .filter_map(|raw| {
389 let parsed = parse_embedding_model_alias(&raw);
390 if parsed.is_none() && !raw.trim().is_empty() {
391 tracing::warn!(
392 model = %raw,
393 "KHIVE_ADDITIONAL_EMBEDDING_MODELS contains unknown model name; ignored. \
394 Valid forms: short alias like 'paraphrase' or a fully-qualified key \
395 from lattice_embed::EmbeddingModel::from_str."
396 );
397 }
398 parsed
399 })
400 .collect()
401}
402
403pub(crate) fn parse_embedding_model_alias(name: &str) -> Option<EmbeddingModel> {
404 let normalized = name.trim().to_ascii_lowercase().replace('_', "-");
405 match normalized.as_str() {
406 "paraphrase" => Some(EmbeddingModel::ParaphraseMultilingualMiniLmL12V2),
407 _ => normalized.parse().ok(),
408 }
409}