Skip to main content

khive_runtime/
config.rs

1//! RuntimeConfig, BackendId, NamespaceToken, and embedding model helpers.
2
3use std::sync::Arc;
4
5use khive_db::StorageBackend;
6use khive_gate::{ActorRef, AllowAllGate, GateRef};
7use khive_types::Namespace;
8use lattice_embed::EmbeddingModel;
9
10use crate::error::RuntimeResult;
11
12// ---- BackendId ----
13
14/// Identifies a named backend in a multi-backend deployment.
15///
16/// The `main` backend is the default single-backend name. Multi-backend deployments
17/// assign each `[[backends]]` entry a distinct `BackendId`. The
18/// [`SubstrateCoordinator`](kkernel::coordinator::SubstrateCoordinator) in `kkernel`
19/// uses `BackendId` for node-to-backend resolution and cross-backend edge routing.
20///
21/// A single-backend `KhiveRuntime` always has `BackendId("main")` by default.
22/// The boot path in `kkernel` or `khive-mcp` sets the id via `RuntimeConfig::backend_id`
23/// when constructing per-pack runtimes.
24#[derive(Clone, Debug, PartialEq, Eq, Hash)]
25pub struct BackendId(pub String);
26
27impl BackendId {
28    /// The default single-backend name.
29    pub const MAIN: &'static str = "main";
30
31    /// Construct from a string name.
32    pub fn new(name: impl Into<String>) -> Self {
33        Self(name.into())
34    }
35
36    /// The default `main` backend id.
37    pub fn main() -> Self {
38        Self(Self::MAIN.to_string())
39    }
40
41    /// Return the backend name as a `&str`.
42    pub fn as_str(&self) -> &str {
43        &self.0
44    }
45}
46
47impl std::fmt::Display for BackendId {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        f.write_str(&self.0)
50    }
51}
52
53// ---- Sealed token ----
54
55mod private {
56    #[derive(Clone, Debug)]
57    pub(crate) struct Sealed;
58}
59
60/// Authorization proof that a caller is permitted to access a specific namespace.
61///
62/// Created by [`VerbRegistry::dispatch`] after the gate approves the request.
63/// The sealed inner field prevents external code from constructing a token
64/// without going through the authorization path.
65#[derive(Clone, Debug)]
66pub struct NamespaceToken {
67    namespace: Namespace,
68    actor: ActorRef,
69    _sealed: private::Sealed,
70}
71
72impl NamespaceToken {
73    /// Mint an authorized token. Only callable from within `khive-runtime`.
74    pub(crate) fn mint_authorized(namespace: Namespace, actor: ActorRef) -> Self {
75        Self {
76            namespace,
77            actor,
78            _sealed: private::Sealed,
79        }
80    }
81
82    /// Convenience constructor for the local namespace with an anonymous actor.
83    ///
84    /// Only callable from within `khive-runtime`. External callers must use
85    /// [`KhiveRuntime::authorize`] to mint tokens.
86    // Used only in #[cfg(test)] blocks within this crate's src/ files.
87    #[allow(dead_code)]
88    pub(crate) fn local() -> Self {
89        Self::mint_authorized(Namespace::local(), ActorRef::anonymous())
90    }
91
92    /// Convenience constructor for a specific namespace with an anonymous actor.
93    ///
94    /// Only callable from within `khive-runtime`. External callers must use
95    /// [`KhiveRuntime::authorize`] to mint tokens.
96    // Used only in #[cfg(test)] blocks within this crate's src/ files.
97    #[allow(dead_code)]
98    pub(crate) fn for_namespace(ns: Namespace) -> Self {
99        Self::mint_authorized(ns, ActorRef::anonymous())
100    }
101
102    /// Return the namespace this token authorises access to.
103    pub fn namespace(&self) -> &Namespace {
104        &self.namespace
105    }
106
107    /// Return the actor reference embedded in this token.
108    pub fn actor(&self) -> &ActorRef {
109        &self.actor
110    }
111
112    /// Return a new token with the same actor but a different namespace.
113    ///
114    /// Used by packs that apply a namespace policy (e.g. the KG pack overrides the
115    /// caller's namespace to `Namespace::local()` so that entity/edge/note records
116    /// always land in the shared graph).
117    pub fn with_namespace(&self, ns: Namespace) -> Self {
118        Self::mint_authorized(ns, self.actor.clone())
119    }
120}
121
122// ---- RuntimeConfig ----
123
124/// Runtime configuration.
125///
126/// The `db_path` and `embedding_model` fields are deprecated in favour of
127/// constructing the backend externally and calling [`KhiveRuntime::from_backend`].
128/// They remain for backward compatibility with tests and single-binary deployments.
129#[derive(Clone, Debug)]
130pub struct RuntimeConfig {
131    /// Path to the SQLite database file. `None` = in-memory (tests).
132    ///
133    /// Deprecated: use [`KhiveRuntime::from_backend`] instead. The boot path
134    /// constructs backends from `khive.toml` (`AppConfig`) and passes them to
135    /// `from_backend`. Direct `db_path` usage persists only in tests.
136    pub db_path: Option<std::path::PathBuf>,
137    /// Namespace used when no explicit namespace is provided.
138    pub default_namespace: Namespace,
139    /// Local embedding model. `None` disables embedding and hybrid vector search;
140    /// `hybrid_search` then falls back to text-only.
141    ///
142    /// Deprecated: embedding engines move to a per-pack `EmbedderRegistry`.
143    /// This field persists for backward compatibility until the embedder registry
144    /// is fully plumbed.
145    pub embedding_model: Option<EmbeddingModel>,
146    /// Additional embedding models to make available by request name.
147    ///
148    /// `embedding_model` remains the default used by existing `embed()` and
149    /// `embed_batch()` callers. This list adds non-default models that can be
150    /// selected with `embedder(name)`, `embed_with_model(...)`, memory
151    /// `remember.embedding_model`, and memory `recall.embedding_model`.
152    pub additional_embedding_models: Vec<EmbeddingModel>,
153    /// Authorization gate consulted before each verb dispatch.
154    /// Default: `AllowAllGate` (permissive). For production policy enforcement,
155    /// plug in a Rego- or capability-witness-backed impl.
156    pub gate: GateRef,
157    /// Names of packs the transport layer should register into the VerbRegistry.
158    /// The transport layer (e.g. `khive-mcp`) reads this list and instantiates
159    /// the matching concrete pack types. Unknown names are reported as errors
160    /// by the transport, not silently ignored.
161    /// Default: `["kg"]`.
162    pub packs: Vec<String>,
163    /// Identifies this runtime's backend in a multi-backend deployment.
164    ///
165    /// Set by the boot path when constructing per-pack runtimes from `khive.toml`.
166    /// Single-backend deployments use the default `BackendId::MAIN`.
167    pub backend_id: BackendId,
168    /// Brain profile to use for `memory.feedback` / `knowledge.feedback` and
169    /// recall-time score boosting (ADR-035 §Brain profile configuration).
170    ///
171    /// Resolution order (highest to lowest, ADR-035): CLI flag, then
172    /// `runtime.brain_profile` in project/global `khive.toml`, then the
173    /// `KHIVE_BRAIN_PROFILE` env var as fallback default. Callers must keep
174    /// env OUT of the base config they pass in (see `khive-mcp` serve.rs).
175    /// 1. `--brain-profile` CLI flag (explicit only)
176    /// 2. Namespace-bound profile resolved via `brain.resolve` at feedback time
177    /// 3. Pack-local global tuning prior (default fallback)
178    pub brain_profile: Option<String>,
179}
180
181/// Parse a comma- or whitespace-separated pack list from a single string.
182///
183/// Empty entries are dropped, surrounding whitespace is trimmed.
184pub fn parse_pack_list(s: &str) -> Vec<String> {
185    s.split(|c: char| c == ',' || c.is_whitespace())
186        .map(str::trim)
187        .filter(|s| !s.is_empty())
188        .map(str::to_owned)
189        .collect()
190}
191
192impl Default for RuntimeConfig {
193    fn default() -> Self {
194        let db_path = std::env::var("HOME")
195            .ok()
196            .map(|h| std::path::PathBuf::from(h).join(".khive/khive.db"));
197        let embedding_model = std::env::var("KHIVE_EMBEDDING_MODEL")
198            .ok()
199            .and_then(|s| s.parse().ok())
200            .or(Some(EmbeddingModel::AllMiniLmL6V2));
201        let additional_embedding_models = std::env::var("KHIVE_ADDITIONAL_EMBEDDING_MODELS")
202            .ok()
203            .map(|s| parse_embedding_model_list(&s))
204            .unwrap_or_else(|| vec![EmbeddingModel::ParaphraseMultilingualMiniLmL12V2]);
205        let packs = std::env::var("KHIVE_PACKS")
206            .ok()
207            .map(|s| parse_pack_list(&s))
208            .filter(|v| !v.is_empty())
209            .unwrap_or_else(|| {
210                vec![
211                    "kg",
212                    "gtd",
213                    "memory",
214                    "brain",
215                    "comm",
216                    "schedule",
217                    "knowledge",
218                ]
219                .into_iter()
220                .map(String::from)
221                .collect()
222            });
223        let brain_profile = std::env::var("KHIVE_BRAIN_PROFILE")
224            .ok()
225            .filter(|s| !s.trim().is_empty());
226        Self {
227            db_path,
228            default_namespace: Namespace::local(),
229            embedding_model,
230            additional_embedding_models,
231            gate: Arc::new(AllowAllGate),
232            packs,
233            backend_id: BackendId::main(),
234            brain_profile,
235        }
236    }
237}
238
239// ---- Embedding model helpers ----
240
241/// Sanitize an embedding model name into a valid SQL table suffix.
242/// e.g. `bge-small-en-v1.5` -> `bge_small_en_v1_5`
243pub(crate) fn vec_model_key(model: EmbeddingModel) -> String {
244    sanitize_key(&model.to_string())
245}
246
247pub(crate) fn sanitize_key(s: &str) -> String {
248    s.chars()
249        .map(|c| if c.is_ascii_alphanumeric() { c } else { '_' })
250        .collect()
251}
252
253pub(crate) fn build_embedder_registry(
254    config: &RuntimeConfig,
255) -> (crate::embedder_registry::EmbedderRegistry, Arc<str>) {
256    use crate::embedder_registry::{EmbedderRegistry, LatticeEmbedderProvider};
257    let mut registry = EmbedderRegistry::new();
258    for model in configured_embedding_models(config) {
259        registry.register(LatticeEmbedderProvider::new(model));
260    }
261    let default_embedder_name = config
262        .embedding_model
263        .map(|model| Arc::<str>::from(model.to_string()))
264        .unwrap_or_else(|| Arc::<str>::from(""));
265    (registry, default_embedder_name)
266}
267
268fn configured_embedding_models(config: &RuntimeConfig) -> Vec<EmbeddingModel> {
269    let mut models = Vec::new();
270    if let Some(model) = config.embedding_model {
271        models.push(model);
272    }
273    models.extend(config.additional_embedding_models.iter().copied());
274    models.sort_by_key(|model| model.to_string());
275    models.dedup();
276    models
277}
278
279pub(crate) fn register_configured_embedding_models(
280    backend: &StorageBackend,
281    config: &RuntimeConfig,
282) -> RuntimeResult<()> {
283    for model in configured_embedding_models(config) {
284        backend.register_embedding_model(
285            &model.to_string(),
286            model.model_id(),
287            model.key_version(),
288            model.dimensions() as u32,
289        )?;
290    }
291    Ok(())
292}
293
294/// Build a `RuntimeConfig` from a parsed `KhiveConfig`.
295///
296/// For each `[[engines]]` entry:
297/// - The engine flagged `default = true` becomes `RuntimeConfig::embedding_model`.
298/// - All other engines become `RuntimeConfig::additional_embedding_models`.
299///
300/// Model name validity is checked here: any engine whose `model` field cannot
301/// be parsed via `parse_embedding_model_alias` is skipped with a warning.
302///
303/// If `khive_cfg.engines` is empty, the returned `RuntimeConfig` uses the
304/// env-var-derived defaults from `RuntimeConfig::default()`.
305///
306/// When both a config file and `KHIVE_EMBEDDING_MODEL` env var are present,
307/// the caller is responsible for emitting a warning that env vars are ignored.
308/// This function purely converts `KhiveConfig` to `RuntimeConfig` fields.
309pub fn runtime_config_from_khive_config(
310    khive_cfg: &crate::engine_config::KhiveConfig,
311    base: RuntimeConfig,
312) -> RuntimeConfig {
313    // Apply actor.id as default_namespace when present and valid.
314    // KhiveConfig::validate() guarantees that actor.id, when present, is a
315    // structurally valid Namespace — so the Err arm here is unreachable for
316    // any config that passed load(). A panic here signals a caller contract
317    // violation (passing an unvalidated config).
318    let default_namespace = match khive_cfg.actor.id.as_deref() {
319        Some(id) if !id.is_empty() => match Namespace::parse(id) {
320            Ok(ns) => {
321                tracing::debug!(actor_id = id, "actor.id from config sets default_namespace");
322                ns
323            }
324            Err(e) => {
325                panic!(
326                    "actor.id {id:?} passed validation but Namespace::parse failed: {e}; \
327                     this is a bug — KhiveConfig must be validated before calling \
328                     runtime_config_from_khive_config"
329                );
330            }
331        },
332        _ => base.default_namespace.clone(),
333    };
334
335    // base.brain_profile must carry ONLY the explicit CLI tier — never an env
336    // value (env sits BELOW toml per ADR-035; the MCP resolver applies it after).
337    let brain_profile = base.brain_profile.clone().or_else(|| {
338        khive_cfg
339            .runtime
340            .brain_profile
341            .clone()
342            .filter(|s| !s.trim().is_empty())
343    });
344
345    if khive_cfg.engines.is_empty() {
346        return RuntimeConfig {
347            default_namespace,
348            brain_profile,
349            ..base
350        };
351    }
352
353    let mut embedding_model: Option<EmbeddingModel> = None;
354    let mut additional: Vec<EmbeddingModel> = Vec::new();
355
356    for engine in &khive_cfg.engines {
357        match parse_embedding_model_alias(&engine.model) {
358            Some(model) => {
359                if engine.default {
360                    embedding_model = Some(model);
361                } else {
362                    additional.push(model);
363                }
364            }
365            None => {
366                tracing::warn!(
367                    engine = %engine.name,
368                    model = %engine.model,
369                    "engine config: unknown model name; engine will be skipped"
370                );
371            }
372        }
373    }
374
375    RuntimeConfig {
376        embedding_model,
377        additional_embedding_models: additional,
378        default_namespace,
379        brain_profile,
380        ..base
381    }
382}
383
384/// Parse a comma- or whitespace-separated list of embedding model names.
385fn parse_embedding_model_list(s: &str) -> Vec<EmbeddingModel> {
386    parse_pack_list(s)
387        .into_iter()
388        .filter_map(|raw| {
389            let parsed = parse_embedding_model_alias(&raw);
390            if parsed.is_none() && !raw.trim().is_empty() {
391                tracing::warn!(
392                    model = %raw,
393                    "KHIVE_ADDITIONAL_EMBEDDING_MODELS contains unknown model name; ignored. \
394                     Valid forms: short alias like 'paraphrase' or a fully-qualified key \
395                     from lattice_embed::EmbeddingModel::from_str."
396                );
397            }
398            parsed
399        })
400        .collect()
401}
402
403pub(crate) fn parse_embedding_model_alias(name: &str) -> Option<EmbeddingModel> {
404    let normalized = name.trim().to_ascii_lowercase().replace('_', "-");
405    match normalized.as_str() {
406        "paraphrase" => Some(EmbeddingModel::ParaphraseMultilingualMiniLmL12V2),
407        _ => normalized.parse().ok(),
408    }
409}