Skip to main content

khive_runtime/
runtime.rs

1//! KhiveRuntime — composable handle to all storage capabilities.
2
3use std::sync::{Arc, RwLock};
4
5use khive_db::StorageBackend;
6use khive_gate::{ActorRef, AllowAllGate, GateRef};
7use khive_storage::{EntityStore, EventStore, GraphStore, NoteStore, SqlAccess};
8use khive_types::{EdgeEndpointRule, Namespace};
9use lattice_embed::{
10    CachedEmbeddingService, EmbeddingModel, EmbeddingService, NativeEmbeddingService,
11};
12use tokio::sync::OnceCell;
13
14use crate::error::RuntimeResult;
15
16// ---- BackendId ----
17
18/// Identifies a named backend in a multi-backend deployment (ADR-009, ADR-028).
19///
20/// The `main` backend is the default single-backend name. Multi-backend deployments
21/// assign each `[[backends]]` entry a distinct `BackendId`. The
22/// [`SubstrateCoordinator`](kkernel::coordinator::SubstrateCoordinator) in `kkernel`
23/// uses `BackendId` for node-to-backend resolution and cross-backend edge routing.
24///
25/// A single-backend `KhiveRuntime` always has `BackendId("main")` by default.
26/// The boot path in `kkernel` or `khive-mcp` sets the id via `RuntimeConfig::backend_id`
27/// when constructing per-pack runtimes.
28#[derive(Clone, Debug, PartialEq, Eq, Hash)]
29pub struct BackendId(pub String);
30
31impl BackendId {
32    /// The default single-backend name.
33    pub const MAIN: &'static str = "main";
34
35    /// Construct from a string name.
36    pub fn new(name: impl Into<String>) -> Self {
37        Self(name.into())
38    }
39
40    /// The default `main` backend id.
41    pub fn main() -> Self {
42        Self(Self::MAIN.to_string())
43    }
44
45    /// Return the backend name as a `&str`.
46    pub fn as_str(&self) -> &str {
47        &self.0
48    }
49}
50
51impl std::fmt::Display for BackendId {
52    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53        f.write_str(&self.0)
54    }
55}
56
57// ---- Sealed token ----
58
59mod private {
60    #[derive(Clone, Debug)]
61    pub(crate) struct Sealed;
62}
63
64/// Authorization proof that a caller is permitted to access a specific namespace.
65///
66/// Created by [`VerbRegistry::dispatch`] after the gate approves the request.
67/// The sealed inner field prevents external code from constructing a token
68/// without going through the authorization path.
69#[derive(Clone, Debug)]
70pub struct NamespaceToken {
71    namespace: Namespace,
72    actor: ActorRef,
73    _sealed: private::Sealed,
74}
75
76impl NamespaceToken {
77    /// Mint an authorized token. Only callable from within `khive-runtime`.
78    pub(crate) fn mint_authorized(namespace: Namespace, actor: ActorRef) -> Self {
79        Self {
80            namespace,
81            actor,
82            _sealed: private::Sealed,
83        }
84    }
85
86    /// Convenience constructor for the local namespace with an anonymous actor.
87    ///
88    /// Only callable from within `khive-runtime`. External callers must use
89    /// [`KhiveRuntime::authorize`] to mint tokens.
90    // Used only in #[cfg(test)] blocks within this crate's src/ files.
91    #[allow(dead_code)]
92    pub(crate) fn local() -> Self {
93        Self::mint_authorized(Namespace::local(), ActorRef::anonymous())
94    }
95
96    /// Convenience constructor for a specific namespace with an anonymous actor.
97    ///
98    /// Only callable from within `khive-runtime`. External callers must use
99    /// [`KhiveRuntime::authorize`] to mint tokens.
100    // Used only in #[cfg(test)] blocks within this crate's src/ files.
101    #[allow(dead_code)]
102    pub(crate) fn for_namespace(ns: Namespace) -> Self {
103        Self::mint_authorized(ns, ActorRef::anonymous())
104    }
105
106    pub fn namespace(&self) -> &Namespace {
107        &self.namespace
108    }
109
110    pub fn actor(&self) -> &ActorRef {
111        &self.actor
112    }
113}
114
115// ---- RuntimeConfig ----
116
117/// Runtime configuration.
118///
119/// Per ADR-028, the `db_path` and `embedding_model` fields are deprecated in favour of
120/// constructing the backend externally and calling [`KhiveRuntime::from_backend`].
121/// They remain for backward compatibility with tests and single-binary deployments.
122#[derive(Clone, Debug)]
123pub struct RuntimeConfig {
124    /// Path to the SQLite database file. `None` = in-memory (tests).
125    ///
126    /// Deprecated: use [`KhiveRuntime::from_backend`] instead. The boot path
127    /// constructs backends from `khive.toml` (`AppConfig`) and passes them to
128    /// `from_backend`. Direct `db_path` usage persists only in tests.
129    pub db_path: Option<std::path::PathBuf>,
130    /// Namespace used when no explicit namespace is provided.
131    pub default_namespace: Namespace,
132    /// Local embedding model. `None` disables embedding and hybrid vector search;
133    /// `hybrid_search` then falls back to text-only.
134    ///
135    /// Deprecated: per ADR-028/ADR-031, embedding engines move to a per-pack
136    /// `EmbedderRegistry`. This field persists for backward compatibility until
137    /// the embedder registry is fully plumbed.
138    pub embedding_model: Option<EmbeddingModel>,
139    /// Authorization gate consulted before each verb dispatch (ADR-029).
140    /// Default: `AllowAllGate` (permissive). For production policy enforcement,
141    /// plug in a Rego- or capability-witness-backed impl.
142    pub gate: GateRef,
143    /// Names of packs the transport layer should register into the VerbRegistry.
144    /// The transport layer (e.g. `khive-mcp`) reads this list and instantiates
145    /// the matching concrete pack types. Unknown names are reported as errors
146    /// by the transport, not silently ignored.
147    /// Default: `["kg"]`.
148    pub packs: Vec<String>,
149    /// Identifies this runtime's backend in a multi-backend deployment (ADR-009, ADR-028).
150    ///
151    /// Set by the boot path when constructing per-pack runtimes from `khive.toml`.
152    /// Single-backend deployments use the default `BackendId::MAIN`.
153    pub backend_id: BackendId,
154}
155
156/// Parse a comma- or whitespace-separated pack list from a single string.
157///
158/// Empty entries are dropped, surrounding whitespace is trimmed.
159pub fn parse_pack_list(s: &str) -> Vec<String> {
160    s.split(|c: char| c == ',' || c.is_whitespace())
161        .map(str::trim)
162        .filter(|s| !s.is_empty())
163        .map(str::to_owned)
164        .collect()
165}
166
167impl Default for RuntimeConfig {
168    fn default() -> Self {
169        let db_path = std::env::var("HOME")
170            .ok()
171            .map(|h| std::path::PathBuf::from(h).join(".khive/khive-graph.db"));
172        let embedding_model = std::env::var("KHIVE_EMBEDDING_MODEL")
173            .ok()
174            .and_then(|s| s.parse().ok())
175            .or(Some(EmbeddingModel::AllMiniLmL6V2));
176        let packs = std::env::var("KHIVE_PACKS")
177            .ok()
178            .map(|s| parse_pack_list(&s))
179            .filter(|v| !v.is_empty())
180            .unwrap_or_else(|| vec!["kg".to_string()]);
181        Self {
182            db_path,
183            default_namespace: Namespace::local(),
184            embedding_model,
185            gate: Arc::new(AllowAllGate),
186            packs,
187            backend_id: BackendId::main(),
188        }
189    }
190}
191
192// ---- KhiveRuntime ----
193
194/// Composable runtime handle used by the MCP server.
195///
196/// Wraps a `StorageBackend` and provides namespace-scoped accessor methods
197/// for each storage capability, plus a lazily-loaded embedder.
198#[derive(Clone)]
199pub struct KhiveRuntime {
200    backend: Arc<StorageBackend>,
201    config: RuntimeConfig,
202    embedder: Arc<OnceCell<Arc<dyn EmbeddingService>>>,
203    /// Pack-extensible edge endpoint rules (ADR-031). Shared across clones
204    /// via `Arc<RwLock<_>>`; installed once by the transport after the
205    /// `VerbRegistry` is built. Empty until installed — base rules
206    /// (ADR-002) still apply on their own.
207    edge_rules: Arc<RwLock<Vec<EdgeEndpointRule>>>,
208}
209
210impl KhiveRuntime {
211    /// Create a new runtime with the given config.
212    ///
213    /// The config's `db_path` is used to open or create the SQLite backend.
214    /// For the preferred boot path in multi-backend deployments, use
215    /// [`from_backend`](Self::from_backend) instead.
216    pub fn new(config: RuntimeConfig) -> RuntimeResult<Self> {
217        let backend = match &config.db_path {
218            Some(path) => {
219                if let Some(parent) = path.parent() {
220                    std::fs::create_dir_all(parent).ok();
221                }
222                StorageBackend::sqlite(path)?
223            }
224            None => StorageBackend::memory()?,
225        };
226        Ok(Self {
227            backend: Arc::new(backend),
228            config,
229            embedder: Arc::new(OnceCell::new()),
230            edge_rules: Arc::new(RwLock::new(Vec::new())),
231        })
232    }
233
234    /// Construct a runtime from an already-opened backend (ADR-028 boot path).
235    ///
236    /// This is the preferred constructor for multi-backend deployments. The caller
237    /// (boot path in `kkernel` or `khive-mcp`) opens each backend from `khive.toml`,
238    /// then constructs a `KhiveRuntime` per pack using this method.
239    ///
240    /// The returned runtime has `db_path = None` and `embedding_model = None`; all
241    /// storage access is through the provided `backend`. Set `backend_id` and
242    /// `default_namespace` via the config builder pattern if non-defaults are needed.
243    pub fn from_backend(backend: Arc<StorageBackend>, config: RuntimeConfig) -> Self {
244        Self {
245            backend,
246            config,
247            embedder: Arc::new(OnceCell::new()),
248            edge_rules: Arc::new(RwLock::new(Vec::new())),
249        }
250    }
251
252    /// Create an in-memory runtime (for tests and ephemeral use).
253    pub fn memory() -> RuntimeResult<Self> {
254        Self::new(RuntimeConfig {
255            db_path: None,
256            default_namespace: Namespace::local(),
257            embedding_model: None,
258            gate: Arc::new(AllowAllGate),
259            packs: vec!["kg".to_string()],
260            backend_id: BackendId::main(),
261        })
262    }
263
264    /// Return the [`BackendId`] for this runtime's backend.
265    ///
266    /// Used by the [`SubstrateCoordinator`](kkernel::coordinator::SubstrateCoordinator)
267    /// to identify which backend owns a given node, and to detect cross-backend merges.
268    pub fn backend_id(&self) -> &BackendId {
269        &self.config.backend_id
270    }
271
272    /// Return a reference to the runtime config.
273    pub fn config(&self) -> &RuntimeConfig {
274        &self.config
275    }
276
277    /// Return a reference to the underlying storage backend.
278    pub fn backend(&self) -> &StorageBackend {
279        &self.backend
280    }
281
282    // ---- Store accessors (token-scoped) ----
283
284    /// Get an EntityStore scoped to the token's namespace.
285    pub fn entities(&self, token: &NamespaceToken) -> RuntimeResult<Arc<dyn EntityStore>> {
286        Ok(self
287            .backend
288            .entities_for_namespace(token.namespace().as_str())?)
289    }
290
291    /// Get a GraphStore scoped to the token's namespace.
292    pub fn graph(&self, token: &NamespaceToken) -> RuntimeResult<Arc<dyn GraphStore>> {
293        Ok(self
294            .backend
295            .graph_for_namespace(token.namespace().as_str())?)
296    }
297
298    /// Get a NoteStore scoped to the token's namespace.
299    pub fn notes(&self, token: &NamespaceToken) -> RuntimeResult<Arc<dyn NoteStore>> {
300        Ok(self
301            .backend
302            .notes_for_namespace(token.namespace().as_str())?)
303    }
304
305    /// Get an EventStore scoped to the token's namespace.
306    pub fn events(&self, token: &NamespaceToken) -> RuntimeResult<Arc<dyn EventStore>> {
307        Ok(self
308            .backend
309            .events_for_namespace(token.namespace().as_str())?)
310    }
311
312    /// Get the raw SQL access capability (for ad-hoc queries).
313    pub fn sql(&self) -> Arc<dyn SqlAccess> {
314        self.backend.sql()
315    }
316
317    /// Get a VectorStore for the configured embedding model, scoped to the token's namespace.
318    ///
319    /// Returns `Unconfigured("embedding_model")` if no model is set.
320    pub fn vectors(
321        &self,
322        token: &NamespaceToken,
323    ) -> RuntimeResult<Arc<dyn khive_storage::VectorStore>> {
324        let model = self
325            .config
326            .embedding_model
327            .ok_or_else(|| crate::RuntimeError::Unconfigured("embedding_model".into()))?;
328        Ok(self.backend.vectors_for_namespace(
329            &vec_model_key(model),
330            model.dimensions(),
331            token.namespace().as_str(),
332        )?)
333    }
334
335    /// Get a TextSearch index for the token's namespace entity corpus.
336    pub fn text(
337        &self,
338        token: &NamespaceToken,
339    ) -> RuntimeResult<Arc<dyn khive_storage::TextSearch>> {
340        let key = format!("entities_{}", sanitize_key(token.namespace().as_str()));
341        Ok(self.backend.text(&key)?)
342    }
343
344    /// Get a TextSearch index for the token's namespace notes corpus.
345    pub fn text_for_notes(
346        &self,
347        token: &NamespaceToken,
348    ) -> RuntimeResult<Arc<dyn khive_storage::TextSearch>> {
349        let key = format!("notes_{}", sanitize_key(token.namespace().as_str()));
350        Ok(self.backend.text(&key)?)
351    }
352
353    /// Mint an authorization token for the given namespace.
354    ///
355    /// This is the official OSS API for obtaining a [`NamespaceToken`]. In
356    /// local / single-user mode (the default) this always succeeds — there is
357    /// no multi-tenant gate to consult. Multi-tenant deployments replace the
358    /// gate with a policy-backed impl; this method would then enforce it.
359    pub fn authorize(&self, ns: Namespace) -> NamespaceToken {
360        NamespaceToken::mint_authorized(ns, ActorRef::anonymous())
361    }
362
363    /// Install the pack-aggregated edge endpoint rules (ADR-031).
364    ///
365    /// Called by the transport layer after the `VerbRegistry` is built so
366    /// that runtime-layer edge validation (in `validate_edge_relation_endpoints`)
367    /// can consult pack rules in addition to the ADR-002 base contract. Idempotent:
368    /// later calls overwrite the previous rule set.
369    pub fn install_edge_rules(&self, rules: Vec<EdgeEndpointRule>) {
370        if let Ok(mut guard) = self.edge_rules.write() {
371            *guard = rules;
372        }
373    }
374
375    /// Snapshot of currently-installed pack edge rules.
376    pub(crate) fn pack_edge_rules(&self) -> Vec<EdgeEndpointRule> {
377        self.edge_rules
378            .read()
379            .map(|g| g.clone())
380            .unwrap_or_default()
381    }
382
383    /// Get the lazily-initialized embedding service.
384    ///
385    /// Returns a `CachedEmbeddingService` wrapping a `NativeEmbeddingService`.
386    /// First call loads the model (cold start cost); subsequent calls are cheap and
387    /// benefit from LRU caching of repeated inputs.
388    ///
389    /// Returns `Unconfigured("embedding_model")` if no model is set.
390    pub async fn embedder(&self) -> RuntimeResult<Arc<dyn EmbeddingService>> {
391        let model = self
392            .config
393            .embedding_model
394            .ok_or_else(|| crate::RuntimeError::Unconfigured("embedding_model".into()))?;
395        let service = self
396            .embedder
397            .get_or_init(|| async move {
398                let native = Arc::new(NativeEmbeddingService::with_model(model));
399                let cached = CachedEmbeddingService::with_default_cache(native);
400                Arc::new(cached) as Arc<dyn EmbeddingService>
401            })
402            .await
403            .clone();
404        Ok(service)
405    }
406}
407
408/// Sanitize an embedding model into a valid SQL table suffix.
409/// e.g. `bge-small-en-v1.5` -> `bge_small_en_v1_5`
410pub(crate) fn vec_model_key(model: EmbeddingModel) -> String {
411    sanitize_key(&model.to_string())
412}
413
414fn sanitize_key(s: &str) -> String {
415    s.chars()
416        .map(|c| if c.is_ascii_alphanumeric() { c } else { '_' })
417        .collect()
418}
419
420#[cfg(test)]
421mod tests {
422    use super::*;
423
424    #[test]
425    fn memory_runtime_creates_successfully() {
426        let rt = KhiveRuntime::memory().expect("memory runtime should create");
427        assert!(rt.config().db_path.is_none());
428    }
429
430    #[test]
431    fn file_runtime_creates_successfully() {
432        let dir = tempfile::tempdir().unwrap();
433        let path = dir.path().join("test.db");
434        let config = RuntimeConfig {
435            db_path: Some(path.clone()),
436            default_namespace: Namespace::parse("test").unwrap(),
437            embedding_model: None,
438            gate: Arc::new(AllowAllGate),
439            packs: vec!["kg".to_string()],
440            backend_id: BackendId::main(),
441        };
442        let rt = KhiveRuntime::new(config).expect("file runtime should create");
443        assert!(path.exists());
444        assert_eq!(rt.config().default_namespace.as_str(), "test");
445    }
446
447    #[test]
448    fn from_backend_uses_provided_backend() {
449        let backend = Arc::new(StorageBackend::memory().expect("memory backend"));
450        let config = RuntimeConfig {
451            db_path: None,
452            default_namespace: Namespace::local(),
453            embedding_model: None,
454            gate: Arc::new(AllowAllGate),
455            packs: vec!["kg".to_string()],
456            backend_id: BackendId::new("lore"),
457        };
458        let rt = KhiveRuntime::from_backend(backend, config);
459        assert_eq!(rt.backend_id().as_str(), "lore");
460        assert!(rt.config().db_path.is_none());
461    }
462
463    #[test]
464    fn backend_id_defaults_to_main() {
465        let rt = KhiveRuntime::memory().unwrap();
466        assert_eq!(rt.backend_id().as_str(), BackendId::MAIN);
467    }
468
469    #[test]
470    fn store_accessors_return_ok() {
471        let rt = KhiveRuntime::memory().unwrap();
472        let tok = NamespaceToken::local();
473        assert!(rt.entities(&tok).is_ok());
474        assert!(rt.graph(&tok).is_ok());
475        assert!(rt.notes(&tok).is_ok());
476        assert!(rt.events(&tok).is_ok());
477    }
478
479    #[test]
480    fn vectors_returns_unconfigured_without_model() {
481        let rt = KhiveRuntime::memory().unwrap();
482        let tok = NamespaceToken::local();
483        match rt.vectors(&tok) {
484            Err(crate::RuntimeError::Unconfigured(s)) => assert_eq!(s, "embedding_model"),
485            Err(other) => panic!("expected Unconfigured, got {:?}", other),
486            Ok(_) => panic!("expected Err, got Ok"),
487        }
488    }
489
490    #[test]
491    fn vec_model_key_sanitizes_dots_and_dashes() {
492        assert_eq!(
493            vec_model_key(EmbeddingModel::BgeSmallEnV15),
494            "bge_small_en_v1_5"
495        );
496        assert_eq!(
497            vec_model_key(EmbeddingModel::BgeBaseEnV15),
498            "bge_base_en_v1_5"
499        );
500        assert_eq!(
501            vec_model_key(EmbeddingModel::AllMiniLmL6V2),
502            "all_minilm_l6_v2"
503        );
504    }
505
506    #[test]
507    fn default_config_uses_allow_all_gate() {
508        let cfg = RuntimeConfig::default();
509        assert_eq!(cfg.default_namespace.as_str(), "local");
510        let _: GateRef = cfg.gate.clone();
511    }
512
513    #[test]
514    fn parse_pack_list_handles_comma_and_whitespace() {
515        assert_eq!(parse_pack_list("kg"), vec!["kg".to_string()]);
516        assert_eq!(
517            parse_pack_list("kg,gtd"),
518            vec!["kg".to_string(), "gtd".to_string()]
519        );
520        assert_eq!(
521            parse_pack_list("  kg ,  gtd  "),
522            vec!["kg".to_string(), "gtd".to_string()]
523        );
524        assert_eq!(
525            parse_pack_list("kg gtd"),
526            vec!["kg".to_string(), "gtd".to_string()]
527        );
528        assert_eq!(parse_pack_list(",,"), Vec::<String>::new());
529        assert_eq!(parse_pack_list(""), Vec::<String>::new());
530    }
531
532    #[test]
533    fn default_config_packs_falls_back_to_kg() {
534        let prior = std::env::var("KHIVE_PACKS").ok();
535        // SAFETY: test function runs single-threaded; no other threads read or write KHIVE_PACKS.
536        unsafe {
537            std::env::remove_var("KHIVE_PACKS");
538        }
539        let cfg = RuntimeConfig::default();
540        assert_eq!(cfg.packs, vec!["kg".to_string()]);
541        if let Some(v) = prior {
542            // SAFETY: single-threaded test cleanup; restores KHIVE_PACKS to its prior value.
543            unsafe {
544                std::env::set_var("KHIVE_PACKS", v);
545            }
546        }
547    }
548
549    #[test]
550    fn default_config_uses_minilm_when_env_unset() {
551        let prior = std::env::var("KHIVE_EMBEDDING_MODEL").ok();
552        // SAFETY: tests are serial by default for env mutation here; if other tests
553        // mutate this var, mark them with the same scope.
554        unsafe {
555            std::env::remove_var("KHIVE_EMBEDDING_MODEL");
556        }
557        let cfg = RuntimeConfig::default();
558        assert_eq!(cfg.embedding_model, Some(EmbeddingModel::AllMiniLmL6V2));
559        if let Some(v) = prior {
560            // SAFETY: single-threaded test cleanup; restores KHIVE_EMBEDDING_MODEL to its prior value.
561            unsafe {
562                std::env::set_var("KHIVE_EMBEDDING_MODEL", v);
563            }
564        }
565    }
566}