Skip to main content

khive_pack_knowledge/
lib.rs

1//! pack-knowledge — knowledge corpus verbs for khive.
2//!
3//! Two tiers of functionality in one pack:
4//!
5//! **Corpus tier** (lore port — atoms + domains):
6//! - `knowledge.upsert_atoms`    — bulk insert/update slug-keyed knowledge atoms
7//! - `knowledge.upsert_domains`  — bulk insert/update domain groupings of atoms
8//! - `knowledge.get`             — fetch one atom or domain by ID or slug
9//! - `knowledge.list`            — paginated listing of atoms or domains
10//! - `knowledge.delete_atoms`    — soft-delete atoms by slug
11//! - `knowledge.stats`           — corpus statistics (counts, coverage)
12//! - `knowledge.index`           — backfill embeddings + FTS for atoms
13//! - `knowledge.fold`            — budget-constrained knapsack selection (token budgeting)
14//! - `knowledge.search`          — TF-IDF + embedding rerank (default when embedder configured) over the corpus
15//! - `knowledge.suggest`         — orientation: ranked domain suggestions for a query
16//! - `knowledge.compose`         — orientation: markdown briefing from domains and atoms
17//!
18//! **Section tier** (ADR-048 Phase 2):
19//! - `knowledge.edit`   — upsert sections for an atom (section-level, non-destructive)
20//! - `knowledge.import` — ingest atlas markdown files as atoms with parsed sections
21//!
22//! **Concept tier** (KG sugar):
23//! - `knowledge.learn`  — register a concept entity (commissive)
24//! - `knowledge.cite`   — link a concept to its source paper via `introduced_by`
25//! - `knowledge.topic`  — list/search concepts, optionally filtered by domain
26//!
27//! Load with `KHIVE_PACKS=kg,knowledge` or `--pack knowledge`.
28
29pub mod handlers;
30pub mod knowledge;
31
32use async_trait::async_trait;
33use serde_json::Value;
34
35use khive_runtime::pack::PackRuntime;
36use khive_runtime::{KhiveRuntime, NamespaceToken, RuntimeError, VerbRegistry};
37use khive_types::{HandlerDef, Pack, ParamDef, VerbCategory, Visibility};
38
39use crate::knowledge::vamana;
40use crate::knowledge::KnowledgeHandlers;
41
42pub struct KnowledgePack {
43    pub(crate) runtime: KhiveRuntime,
44    pub(crate) ann: vamana::SharedAnn,
45}
46
47impl Pack for KnowledgePack {
48    const NAME: &'static str = "knowledge";
49    const NOTE_KINDS: &'static [&'static str] = &[];
50    const ENTITY_KINDS: &'static [&'static str] = &[];
51    const HANDLERS: &'static [HandlerDef] = &KNOWLEDGE_HANDLERS;
52    const REQUIRES: &'static [&'static str] = &["kg"];
53}
54
55static KNOWLEDGE_HANDLERS: [HandlerDef; 18] = [
56    // ── corpus tier ──────────────────────────────────────────────────────────
57    HandlerDef {
58        name: "knowledge.upsert_atoms",
59        description: "Bulk insert or update knowledge atoms by slug",
60        visibility: Visibility::Verb,
61        category: VerbCategory::Commissive,
62        params: &[
63            ParamDef {
64                name: "atoms",
65                param_type: "array<object>",
66                required: true,
67                description: "List of atoms: {slug, name, content, description?, tags?, properties?, finalized?}",
68            },
69            ParamDef {
70                name: "chunk_size",
71                param_type: "integer",
72                required: false,
73                description: "Per-chunk size for client-side chunking hint (max 5000)",
74            },
75        ],
76    },
77    HandlerDef {
78        name: "knowledge.upsert_domains",
79        description: "Bulk insert or update domain groupings of atoms",
80        visibility: Visibility::Verb,
81        category: VerbCategory::Commissive,
82        params: &[ParamDef {
83            name: "domains",
84            param_type: "array<object>",
85            required: true,
86            description: "List of domains: {slug, name, description?, tags?, members?}",
87        }],
88    },
89    HandlerDef {
90        name: "knowledge.get",
91        description: "Fetch a single atom or domain by UUID or slug",
92        visibility: Visibility::Verb,
93        category: VerbCategory::Assertive,
94        params: &[ParamDef {
95            name: "id",
96            param_type: "string",
97            required: true,
98            description: "Atom/domain UUID or slug",
99        }],
100    },
101    HandlerDef {
102        name: "knowledge.list",
103        description: "Paginated listing of atoms or domains",
104        visibility: Visibility::Verb,
105        category: VerbCategory::Assertive,
106        params: &[
107            ParamDef {
108                name: "type",
109                param_type: "string",
110                required: false,
111                description: "Filter: \"atom\" | \"domain\" (default: atom)",
112            },
113            ParamDef {
114                name: "limit",
115                param_type: "integer",
116                required: false,
117                description: "Max results (default 20, max 500)",
118            },
119            ParamDef {
120                name: "offset",
121                param_type: "integer",
122                required: false,
123                description: "Pagination offset",
124            },
125        ],
126    },
127    HandlerDef {
128        name: "knowledge.delete_atoms",
129        description: "Soft-delete atoms by slug or ID",
130        visibility: Visibility::Verb,
131        category: VerbCategory::Commissive,
132        params: &[ParamDef {
133            name: "ids",
134            param_type: "array<string>",
135            required: true,
136            description: "Atom slugs or UUIDs to delete",
137        }],
138    },
139    HandlerDef {
140        name: "knowledge.stats",
141        description: "Corpus statistics: atom count, domain count, coverage",
142        visibility: Visibility::Verb,
143        category: VerbCategory::Assertive,
144        params: &[],
145    },
146    HandlerDef {
147        name: "knowledge.index",
148        description: "Backfill embeddings + FTS for atoms/domains",
149        visibility: Visibility::Verb,
150        category: VerbCategory::Commissive,
151        params: &[
152            ParamDef {
153                name: "ids",
154                param_type: "array<string>",
155                required: false,
156                description: "Atom slugs/IDs to index. Omit to index all.",
157            },
158            ParamDef {
159                name: "batch_size",
160                param_type: "integer",
161                required: false,
162                description: "Page size for whole-corpus indexing (default 500, max 1000)",
163            },
164            ParamDef {
165                name: "insert_only",
166                param_type: "boolean",
167                required: false,
168                description: "Skip delete-then-insert (faster for fresh corpus backfill)",
169            },
170            ParamDef {
171                name: "rebuild_ann",
172                param_type: "boolean",
173                required: false,
174                description: "Rebuild in-memory Vamana ANN index from embeddings (default false)",
175            },
176        ],
177    },
178    HandlerDef {
179        name: "knowledge.fold",
180        description: "Budget-constrained knapsack selection of scored candidates",
181        visibility: Visibility::Verb,
182        category: VerbCategory::Assertive,
183        params: &[
184            ParamDef {
185                name: "candidates",
186                param_type: "array<object>",
187                required: true,
188                description: "Scored items: {id, score, size, content?, category?}",
189            },
190            ParamDef {
191                name: "budget",
192                param_type: "integer",
193                required: true,
194                description: "Token/size budget for the selected set",
195            },
196            ParamDef {
197                name: "min_score",
198                param_type: "number",
199                required: false,
200                description: "Minimum score threshold (default 0.0)",
201            },
202            ParamDef {
203                name: "category_weights",
204                param_type: "object",
205                required: false,
206                description: "Per-category score multipliers",
207            },
208        ],
209    },
210    HandlerDef {
211        name: "knowledge.search",
212        description: "TF-IDF ranked search over the knowledge corpus with embedding rerank (default when embedder is configured)",
213        visibility: Visibility::Verb,
214        category: VerbCategory::Assertive,
215        params: &[
216            ParamDef {
217                name: "query",
218                param_type: "string",
219                required: true,
220                description: "Search query text",
221            },
222            ParamDef {
223                name: "type",
224                param_type: "string",
225                required: false,
226                description: "Filter: \"atom\" | \"domain\" (default: both)",
227            },
228            ParamDef {
229                name: "role",
230                param_type: "string",
231                required: false,
232                description: "Agent role hint prepended to query for scoring",
233            },
234            ParamDef {
235                name: "limit",
236                param_type: "integer",
237                required: false,
238                description: "Max results (default 10, max 100)",
239            },
240            ParamDef {
241                name: "min_score",
242                param_type: "number",
243                required: false,
244                description: "Minimum TF-IDF score threshold (default 0.0)",
245            },
246            ParamDef {
247                name: "weights",
248                param_type: "object",
249                required: false,
250                description: "TF-IDF weight overrides: {w_name, w_description, w_tags, w_content, w_exact_name, w_bigram, expand_discount, coverage_alpha}",
251            },
252            ParamDef {
253                name: "decompose",
254                param_type: "boolean",
255                required: false,
256                description: "Enable query decomposition (default false)",
257            },
258            ParamDef {
259                name: "decompose_threshold",
260                param_type: "integer",
261                required: false,
262                description: "Min non-stop terms to trigger decomposition (default 4)",
263            },
264            ParamDef {
265                name: "intersection_bonus",
266                param_type: "number",
267                required: false,
268                description: "Score multiplier for multi-sub-query hits (default 0.25)",
269            },
270            ParamDef {
271                name: "rerank",
272                param_type: "boolean",
273                required: false,
274                description: "Enable embedding rerank (default true; set false to opt out; no-op if no embedder is configured)",
275            },
276            ParamDef {
277                name: "rerank_alpha",
278                param_type: "number",
279                required: false,
280                description: "TF-IDF vs embedding blend weight (default 0.7 = TF-IDF dominant)",
281            },
282        ],
283    },
284    HandlerDef {
285        name: "knowledge.suggest",
286        description: "Suggest relevant knowledge domains for a query",
287        visibility: Visibility::Verb,
288        category: VerbCategory::Assertive,
289        params: &[
290            ParamDef {
291                name: "query",
292                param_type: "string",
293                required: true,
294                description: "Orientation query text",
295            },
296            ParamDef {
297                name: "role",
298                param_type: "string",
299                required: false,
300                description: "Agent role hint prepended to query for scoring",
301            },
302            ParamDef {
303                name: "limit",
304                param_type: "integer",
305                required: false,
306                description: "Max domains (default 8, max 100)",
307            },
308        ],
309    },
310    HandlerDef {
311        name: "knowledge.compose",
312        description: "Compose a markdown briefing from selected knowledge domains and atoms",
313        visibility: Visibility::Verb,
314        category: VerbCategory::Assertive,
315        params: &[
316            ParamDef {
317                name: "domain_ids",
318                param_type: "array<string>",
319                required: false,
320                description: "Domain UUIDs or slugs whose member atoms should be included",
321            },
322            ParamDef {
323                name: "atom_ids",
324                param_type: "array<string>",
325                required: false,
326                description: "Atom UUIDs or slugs to include directly",
327            },
328            ParamDef {
329                name: "query",
330                param_type: "string",
331                required: true,
332                description: "Query used to rerank selected atom bodies",
333            },
334        ],
335    },
336    // ── section tier (ADR-048 Phase 2) ───────────────────────────────────────
337    HandlerDef {
338        name: "knowledge.edit",
339        description: "Upsert sections for an atom without wiping other sections",
340        visibility: Visibility::Verb,
341        category: VerbCategory::Commissive,
342        params: &[
343            ParamDef {
344                name: "id",
345                param_type: "string",
346                required: true,
347                description: "Atom UUID or slug to edit sections for",
348            },
349            ParamDef {
350                name: "sections",
351                param_type: "array<object>",
352                required: true,
353                description: "Sections to upsert: [{section_type, content, heading?, sort_order?}]",
354            },
355        ],
356    },
357    HandlerDef {
358        name: "knowledge.import",
359        description: "Ingest atlas markdown file(s) as atoms with parsed sections",
360        visibility: Visibility::Verb,
361        category: VerbCategory::Commissive,
362        params: &[
363            ParamDef {
364                name: "path",
365                param_type: "string",
366                required: true,
367                description: "Filesystem path to a markdown file or directory",
368            },
369            ParamDef {
370                name: "format",
371                param_type: "string",
372                required: false,
373                description: "Markdown format hint; only \"atlas_md\" supported (default)",
374            },
375            ParamDef {
376                name: "chunk_strategy",
377                param_type: "string",
378                required: false,
379                description: "\"section\" (one section per atom, default) or \"atom\" (entire file as one atom)",
380            },
381            ParamDef {
382                name: "namespace",
383                param_type: "string",
384                required: false,
385                description: "Namespace to write into; defaults to caller namespace",
386            },
387        ],
388    },
389    // ── section review tier (ADR-049) ────────────────────────────────────────
390    HandlerDef {
391        name: "knowledge.challenge",
392        description: "Mark a section as disputed and increment atom dispute_count",
393        visibility: Visibility::Verb,
394        category: VerbCategory::Commissive,
395        params: &[
396            ParamDef {
397                name: "atom_id",
398                param_type: "string",
399                required: true,
400                description: "Atom UUID or slug",
401            },
402            ParamDef {
403                name: "section_type",
404                param_type: "string",
405                required: true,
406                description: "Section type to challenge",
407            },
408            ParamDef {
409                name: "reason",
410                param_type: "string",
411                required: false,
412                description: "Optional challenge reason",
413            },
414        ],
415    },
416    HandlerDef {
417        name: "knowledge.adjudicate",
418        description: "Resolve a disputed section and decrement atom dispute_count",
419        visibility: Visibility::Verb,
420        category: VerbCategory::Commissive,
421        params: &[
422            ParamDef {
423                name: "atom_id",
424                param_type: "string",
425                required: true,
426                description: "Atom UUID or slug",
427            },
428            ParamDef {
429                name: "section_type",
430                param_type: "string",
431                required: true,
432                description: "Section type to adjudicate",
433            },
434            ParamDef {
435                name: "resolution",
436                param_type: "string",
437                required: true,
438                description: "\"accept\" (mark verified) or \"reject\" (mark reviewed)",
439            },
440        ],
441    },
442    // ── concept tier (KG sugar) ───────────────────────────────────────────────
443    HandlerDef {
444        name: "knowledge.learn",
445        description: "Register a concept entity with optional domain and tags",
446        visibility: Visibility::Verb,
447        category: VerbCategory::Commissive,
448        params: &[
449            ParamDef {
450                name: "name",
451                param_type: "string",
452                required: true,
453                description: "Concept name",
454            },
455            ParamDef {
456                name: "description",
457                param_type: "string",
458                required: false,
459                description: "Optional concept description",
460            },
461            ParamDef {
462                name: "domain",
463                param_type: "string",
464                required: false,
465                description: "Optional domain tag (folded into properties.domain)",
466            },
467            ParamDef {
468                name: "tags",
469                param_type: "array<string>",
470                required: false,
471                description: "Optional tag list",
472            },
473        ],
474    },
475    HandlerDef {
476        name: "knowledge.cite",
477        description: "Link a concept to the paper or source that introduced it",
478        visibility: Visibility::Verb,
479        category: VerbCategory::Commissive,
480        params: &[
481            ParamDef {
482                name: "concept_id",
483                param_type: "uuid",
484                required: true,
485                description: "Concept entity ID",
486            },
487            ParamDef {
488                name: "source_id",
489                param_type: "uuid",
490                required: true,
491                description: "Source entity ID; must be kind=document or kind=person (ADR-002 §introduced_by)",
492            },
493            ParamDef {
494                name: "weight",
495                param_type: "float",
496                required: false,
497                description: "Edge weight; defaults to 1.0",
498            },
499        ],
500    },
501    HandlerDef {
502        name: "knowledge.topic",
503        description: "List concepts filtered by domain or free-text query",
504        visibility: Visibility::Verb,
505        category: VerbCategory::Assertive,
506        params: &[
507            ParamDef {
508                name: "domain",
509                param_type: "string",
510                required: false,
511                description: "Filter to concepts with this domain tag",
512            },
513            ParamDef {
514                name: "query",
515                param_type: "string",
516                required: false,
517                description: "Free-text search across concept name + description",
518            },
519            ParamDef {
520                name: "limit",
521                param_type: "integer",
522                required: false,
523                description: "Max results; defaults to 20, capped at 100",
524            },
525        ],
526    },
527];
528
529impl KnowledgePack {
530    pub fn new(runtime: KhiveRuntime) -> Self {
531        Self {
532            runtime,
533            ann: vamana::new_shared(),
534        }
535    }
536}
537
538// ── ADR-027: inventory self-registration ──────────────────────────────────────
539
540struct KnowledgePackFactory;
541
542impl khive_runtime::PackFactory for KnowledgePackFactory {
543    fn name(&self) -> &'static str {
544        "knowledge"
545    }
546
547    fn requires(&self) -> &'static [&'static str] {
548        &["kg"]
549    }
550
551    fn create(&self, runtime: KhiveRuntime) -> Box<dyn khive_runtime::PackRuntime> {
552        Box::new(KnowledgePack::new(runtime))
553    }
554}
555
556inventory::submit! { khive_runtime::PackRegistration(&KnowledgePackFactory) }
557
558#[async_trait]
559impl PackRuntime for KnowledgePack {
560    fn name(&self) -> &str {
561        <KnowledgePack as Pack>::NAME
562    }
563
564    fn note_kinds(&self) -> &'static [&'static str] {
565        <KnowledgePack as Pack>::NOTE_KINDS
566    }
567
568    fn entity_kinds(&self) -> &'static [&'static str] {
569        <KnowledgePack as Pack>::ENTITY_KINDS
570    }
571
572    fn handlers(&self) -> &'static [HandlerDef] {
573        &KNOWLEDGE_HANDLERS
574    }
575
576    fn requires(&self) -> &'static [&'static str] {
577        <KnowledgePack as Pack>::REQUIRES
578    }
579
580    async fn warm(&self) {
581        knowledge::vamana::warm_known_snapshots(&self.runtime, &self.ann).await;
582        // Pre-warm the embedding model so the first reranked knowledge.search does not
583        // pay cold model-weight load in the request path (#595). Fire-and-forget so
584        // pack warm never blocks daemon startup.
585        if !self.runtime.default_embedder_name().is_empty() {
586            let runtime = self.runtime.clone();
587            tokio::spawn(async move {
588                let _ = runtime.embed("__khive_knowledge_warm__").await;
589            });
590        }
591    }
592
593    async fn dispatch(
594        &self,
595        verb: &str,
596        params: Value,
597        _registry: &VerbRegistry,
598        token: &NamespaceToken,
599    ) -> Result<Value, RuntimeError> {
600        match verb {
601            // corpus tier
602            "knowledge.upsert_atoms" => {
603                KnowledgeHandlers::upsert_atoms(&self.runtime, token, params).await
604            }
605            "knowledge.upsert_domains" => {
606                KnowledgeHandlers::upsert_domains(&self.runtime, token, params).await
607            }
608            "knowledge.get" => KnowledgeHandlers::get(&self.runtime, token, params).await,
609            "knowledge.list" => KnowledgeHandlers::list(&self.runtime, token, params).await,
610            "knowledge.delete_atoms" => {
611                KnowledgeHandlers::delete_atoms(&self.runtime, token, params).await
612            }
613            "knowledge.stats" => KnowledgeHandlers::stats(&self.runtime, token, params).await,
614            "knowledge.index" => {
615                KnowledgeHandlers::index(&self.runtime, token, params, &self.ann).await
616            }
617            "knowledge.fold" => KnowledgeHandlers::fold(&self.runtime, token, params).await,
618            "knowledge.search" => {
619                KnowledgeHandlers::search(&self.runtime, token, params, &self.ann).await
620            }
621            "knowledge.suggest" => {
622                KnowledgeHandlers::suggest(&self.runtime, token, params, &self.ann).await
623            }
624            "knowledge.compose" => KnowledgeHandlers::compose(&self.runtime, token, params).await,
625            // section tier
626            "knowledge.edit" => KnowledgeHandlers::edit(&self.runtime, token, params).await,
627            "knowledge.import" => KnowledgeHandlers::import(&self.runtime, token, params).await,
628            "knowledge.challenge" => {
629                KnowledgeHandlers::challenge(&self.runtime, token, params).await
630            }
631            "knowledge.adjudicate" => {
632                KnowledgeHandlers::adjudicate(&self.runtime, token, params).await
633            }
634            // concept tier
635            "knowledge.learn" => self.handle_learn(token, params).await,
636            "knowledge.cite" => self.handle_cite(token, params).await,
637            "knowledge.topic" => self.handle_topic(token, params).await,
638            _ => Err(RuntimeError::InvalidInput(format!(
639                "knowledge pack does not handle verb {verb:?}"
640            ))),
641        }
642    }
643}