Skip to main content

khive_pack_knowledge/
lib.rs

1//! pack-knowledge — knowledge corpus verbs for khive.
2
3pub(crate) mod handlers;
4pub(crate) mod knowledge;
5mod pack;
6mod vocab;
7
8pub use pack::KnowledgePack;
9
10use khive_runtime::{KhiveRuntime, NamespaceToken, RuntimeError};
11use serde_json::{json, Value};
12
13/// Options for [`reindex_knowledge`].
14#[derive(Debug, Clone, Copy)]
15pub struct KnowledgeReindexOptions {
16    /// Embed atoms (and rebuild the atom Vamana ANN).
17    pub atoms: bool,
18    /// Embed sections into `knowledge_sections.embedding` (ADR-051).
19    pub sections: bool,
20    /// Re-embed everything; when false, only fill missing vectors.
21    pub drop_existing: bool,
22    /// Rebuild the atom Vamana ANN snapshot (only meaningful with `atoms`).
23    pub rebuild_ann: bool,
24    /// Records per embedding batch.
25    pub batch_size: Option<u32>,
26}
27
28/// Reindex the knowledge corpus for `token`'s namespace: embed atoms and/or
29/// sections with the default embedder and (optionally) rebuild the atom Vamana
30/// ANN snapshot.
31///
32/// Library entry for `kkernel reindex` — callable without an MCP server.
33/// Knowledge is single-model (search retrieves via the default embedder's ANN),
34/// so this does not fan out across registered models the way entity/note
35/// reindex does. Returns `{atoms_indexed, sections_indexed, failed, ann_failed,
36/// sections_failed}`.
37///
38/// Optional progress callbacks receive `(processed, total)` after each batch.
39pub async fn reindex_knowledge(
40    runtime: &KhiveRuntime,
41    token: &NamespaceToken,
42    opts: KnowledgeReindexOptions,
43    on_atom_progress: Option<&(dyn Fn(u64, u64) + Send + Sync)>,
44    on_section_progress: Option<&(dyn Fn(u64, u64) + Send + Sync)>,
45) -> Result<Value, RuntimeError> {
46    let mut atoms_indexed = 0u64;
47    let mut failed = 0u64;
48    let mut ann_failed = false;
49    if opts.atoms {
50        let ann = knowledge::vamana::new_shared();
51        let mut params = serde_json::Map::new();
52        params.insert("rebuild_ann".into(), Value::Bool(opts.rebuild_ann));
53        params.insert("insert_only".into(), Value::Bool(!opts.drop_existing));
54        if let Some(bs) = opts.batch_size {
55            params.insert("batch_size".into(), Value::from(bs));
56        }
57        let result = knowledge::KnowledgeHandlers::index(
58            runtime,
59            token,
60            Value::Object(params),
61            &ann,
62            on_atom_progress,
63        )
64        .await?;
65        atoms_indexed = result.get("indexed").and_then(|n| n.as_u64()).unwrap_or(0);
66        failed = result.get("failed").and_then(|n| n.as_u64()).unwrap_or(0);
67        ann_failed = result
68            .get("ann_failed")
69            .and_then(|b| b.as_bool())
70            .unwrap_or(false);
71    }
72
73    let mut sections_indexed = 0u64;
74    let mut sections_failed = 0u64;
75    if opts.sections {
76        let batch = opts.batch_size.unwrap_or(500) as usize;
77        let (indexed, _skipped, sec_failed) = knowledge::sections_index::embed_sections(
78            runtime,
79            token,
80            opts.drop_existing,
81            batch,
82            on_section_progress,
83        )
84        .await?;
85        sections_indexed = indexed as u64;
86        sections_failed = sec_failed as u64;
87    }
88
89    Ok(json!({
90        "atoms_indexed": atoms_indexed,
91        "sections_indexed": sections_indexed,
92        "failed": failed,
93        "ann_failed": ann_failed,
94        "sections_failed": sections_failed,
95    }))
96}