Skip to main content

nenjo_knowledge/
lib.rs

1//! Shared knowledge pack primitives.
2//!
3//! Knowledge packs expose a common metadata/search/read API for
4//! filesystem, or remote document sets.
5
6use std::borrow::Cow;
7use std::collections::BTreeSet;
8use std::path::{Path, PathBuf};
9
10use anyhow::{Context, Result};
11use serde::{Deserialize, Deserializer, Serialize, Serializer};
12
13pub mod tools;
14
15/// Shared read-only metadata contract for any knowledge pack manifest.
16///
17/// This trait intentionally covers only pack identity and document metadata.
18/// Concrete pack manifests, such as project or remote manifests, should expose
19/// their own sync/cache mutation methods on their concrete types.
20pub trait KnowledgePackManifest: Send + Sync {
21    fn pack_id(&self) -> &str;
22    fn version(&self) -> &str;
23    fn schema_version(&self) -> u32;
24    fn root_uri(&self) -> &str;
25    fn content_hash(&self) -> &str;
26    fn docs(&self) -> &[KnowledgeDocManifest];
27
28    fn read_doc_manifest(&self, selector: &str) -> Option<&KnowledgeDocManifest> {
29        self.docs().iter().find(|doc| {
30            doc.id == selector || doc.selector == selector || doc.source_path == selector
31        })
32    }
33}
34
35/// Serializable base manifest used by read-only packs and generic consumers.
36///
37/// Project and remote packs may deserialize into richer concrete types, but
38/// their document entries should still use [`KnowledgeDocManifest`] so agents
39/// and MCP tools see one metadata schema across all pack sources.
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct KnowledgePackManifestData {
42    pub pack_id: String,
43    pub version: String,
44    pub schema_version: u32,
45    pub root_uri: String,
46    #[serde(default)]
47    pub content_hash: String,
48    pub docs: Vec<KnowledgeDocManifest>,
49}
50
51impl KnowledgePackManifest for KnowledgePackManifestData {
52    fn pack_id(&self) -> &str {
53        &self.pack_id
54    }
55
56    fn version(&self) -> &str {
57        &self.version
58    }
59
60    fn schema_version(&self) -> u32 {
61        self.schema_version
62    }
63
64    fn root_uri(&self) -> &str {
65        &self.root_uri
66    }
67
68    fn content_hash(&self) -> &str {
69        &self.content_hash
70    }
71
72    fn docs(&self) -> &[KnowledgeDocManifest] {
73        &self.docs
74    }
75}
76
77/// Stored metadata for one knowledge document.
78///
79/// Tool responses expose a slimmer projection of this type. `source_path` and
80/// `updated_at` are retained for pack hydration and local sync, not for agent
81/// selection.
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct KnowledgeDocManifest {
84    /// Stable document identifier within the pack.
85    pub id: String,
86    /// Agent-visible selector used for lookup and graph traversal.
87    pub selector: String,
88    /// Pack-local file path used to load the document body.
89    pub source_path: String,
90    /// Human-readable title.
91    pub title: String,
92    /// Short summary used for search and selection.
93    pub summary: String,
94    /// Open-ended document category normalized to a slug.
95    pub kind: KnowledgeDocKind,
96    /// Lightweight classification labels.
97    pub tags: Vec<String>,
98    /// Outbound graph edges authored on this document.
99    pub related: Vec<KnowledgeDocEdge>,
100    /// Sync timestamp for local library packs.
101    #[serde(default)]
102    pub updated_at: String,
103}
104
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
106#[serde(rename_all = "snake_case")]
107pub enum KnowledgeDocEdgeType {
108    PartOf,
109    Defines,
110    Governs,
111    Classifies,
112    References,
113    DependsOn,
114    Extends,
115    RelatedTo,
116}
117
118#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
119pub struct KnowledgeDocKind(String);
120
121/// Authored outbound edge from one document to another document.
122#[derive(Debug, Clone, Serialize, Deserialize)]
123pub struct KnowledgeDocEdge {
124    #[serde(rename = "type", alias = "edge_type")]
125    pub edge_type: KnowledgeDocEdgeType,
126    /// Target document id or path.
127    pub target: String,
128    /// Optional authoring note. Tool metadata omits this to keep traversal compact.
129    pub description: Option<String>,
130}
131
132#[derive(Debug, Clone, Default, Serialize, Deserialize)]
133pub struct KnowledgeDocFilter {
134    pub tags: Vec<String>,
135    pub kind: Option<KnowledgeDocKind>,
136    pub selector_prefix: Option<String>,
137    pub related_to: Option<String>,
138    pub edge_type: Option<KnowledgeDocEdgeType>,
139}
140
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct KnowledgeDocRead {
143    pub manifest: KnowledgeDocManifest,
144    pub content: String,
145}
146
147#[derive(Debug, Clone, Serialize, Deserialize)]
148pub struct KnowledgeDocNeighbor {
149    /// Source document for the neighbor request.
150    pub document: KnowledgeDocManifest,
151    /// Resolved outbound edges from the source document.
152    pub edges: Vec<KnowledgeDocNeighborEdge>,
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct KnowledgeDocNeighborEdge {
157    #[serde(rename = "type")]
158    pub edge_type: KnowledgeDocEdgeType,
159    /// Resolved target document metadata.
160    pub target: KnowledgeDocManifest,
161}
162
163#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct KnowledgeDocSearchHit {
165    /// Matched document metadata.
166    pub document: KnowledgeDocManifest,
167    /// Simple relevance score derived from metadata matches.
168    pub score: usize,
169    /// Metadata fields that matched the query.
170    pub matched: Vec<String>,
171}
172
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct KnowledgeDocTree {
175    pub root_uri: String,
176    pub entries: Vec<KnowledgeDocTreeEntry>,
177}
178
179#[derive(Debug, Clone, Serialize, Deserialize)]
180pub struct KnowledgeDocTreeEntry {
181    pub selector: String,
182    pub title: String,
183    pub kind: KnowledgeDocKind,
184    pub tags: Vec<String>,
185}
186
187/// Runtime access to a knowledge pack's metadata and lazy document content.
188pub trait KnowledgePack: Send + Sync {
189    fn manifest(&self) -> &dyn KnowledgePackManifest;
190
191    fn doc_content(&self, manifest: &KnowledgeDocManifest) -> Option<Cow<'_, str>>;
192
193    fn list_docs(&self, filter: KnowledgeDocFilter) -> Vec<&KnowledgeDocManifest> {
194        self.manifest()
195            .docs()
196            .iter()
197            .filter(|doc| matches_filter(self, doc, &filter))
198            .collect()
199    }
200
201    fn read_manifest(&self, path: &str) -> Option<&KnowledgeDocManifest> {
202        self.manifest().read_doc_manifest(path)
203    }
204
205    fn read_doc(&self, path: &str) -> Option<KnowledgeDocRead> {
206        let manifest = self.read_manifest(path)?.clone();
207        let content = self.doc_content(&manifest)?.into_owned();
208        Some(KnowledgeDocRead { manifest, content })
209    }
210
211    fn search(&self, query: &str, filter: KnowledgeDocFilter) -> Vec<KnowledgeDocSearchHit> {
212        search_pack(self, query, filter)
213    }
214
215    fn neighbors(
216        &self,
217        path: &str,
218        edge_type: Option<KnowledgeDocEdgeType>,
219    ) -> Option<KnowledgeDocNeighbor> {
220        let source = self.read_manifest(path)?;
221
222        let mut edges = Vec::new();
223
224        for edge in &source.related {
225            if let Some(expected) = edge_type
226                && edge.edge_type != expected
227            {
228                continue;
229            }
230            if let Some(target) = self.read_manifest(&edge.target) {
231                edges.push(KnowledgeDocNeighborEdge {
232                    edge_type: edge.edge_type,
233                    target: target.clone(),
234                });
235            }
236        }
237
238        edges.sort_by(|left, right| {
239            left.target
240                .selector
241                .cmp(&right.target.selector)
242                .then_with(|| left.edge_type.as_str().cmp(right.edge_type.as_str()))
243        });
244        edges.dedup_by(|left, right| {
245            left.edge_type == right.edge_type && left.target.selector == right.target.selector
246        });
247
248        Some(KnowledgeDocNeighbor {
249            document: source.clone(),
250            edges,
251        })
252    }
253}
254
255/// Filesystem-backed package knowledge pack loaded from an installed package
256/// knowledge manifest.
257#[derive(Debug, Clone)]
258pub struct PackageKnowledgePack {
259    content_root: PathBuf,
260    selector: Option<String>,
261    manifest: KnowledgePackManifestData,
262}
263
264impl PackageKnowledgePack {
265    pub fn load(path: &Path, package_version: &str) -> Result<Self> {
266        let content = std::fs::read_to_string(path)
267            .with_context(|| format!("failed to read knowledge manifest {}", path.display()))?;
268        let file: PackageKnowledgeManifestFile =
269            serde_yaml::from_str(&content).context("invalid package knowledge manifest")?;
270        let root_uri = file
271            .manifest
272            .root_uri
273            .or(file.root_uri)
274            .unwrap_or_else(|| format!("pkg://{}/", file.manifest.pack_id));
275        let pack_id = file.manifest.pack_id;
276        let docs = file
277            .manifest
278            .docs
279            .into_iter()
280            .map(|doc| doc.into_manifest(&pack_id))
281            .collect();
282        Ok(Self {
283            content_root: path.parent().unwrap_or_else(|| Path::new("")).to_path_buf(),
284            selector: file.manifest.selector.or(file.selector),
285            manifest: KnowledgePackManifestData {
286                pack_id,
287                version: file
288                    .manifest
289                    .version
290                    .unwrap_or_else(|| package_version.to_string()),
291                schema_version: file.manifest.schema_version.unwrap_or(1),
292                root_uri,
293                content_hash: file.manifest.content_hash.unwrap_or_default(),
294                docs,
295            },
296        })
297    }
298
299    pub fn selector(&self) -> Option<&str> {
300        self.selector.as_deref()
301    }
302}
303
304impl KnowledgePack for PackageKnowledgePack {
305    fn manifest(&self) -> &dyn KnowledgePackManifest {
306        &self.manifest
307    }
308
309    fn doc_content(&self, manifest: &KnowledgeDocManifest) -> Option<Cow<'_, str>> {
310        let content =
311            std::fs::read_to_string(self.content_root.join(&manifest.source_path)).ok()?;
312        Some(Cow::Owned(content))
313    }
314}
315
316#[derive(Debug, Deserialize)]
317struct PackageKnowledgeManifestFile {
318    selector: Option<String>,
319    root_uri: Option<String>,
320    manifest: PackageKnowledgeManifestBody,
321}
322
323#[derive(Debug, Deserialize)]
324struct PackageKnowledgeManifestBody {
325    pack_id: String,
326    selector: Option<String>,
327    version: Option<String>,
328    schema_version: Option<u32>,
329    root_uri: Option<String>,
330    content_hash: Option<String>,
331    #[serde(default)]
332    docs: Vec<PackageKnowledgeDoc>,
333}
334
335#[derive(Debug, Deserialize)]
336struct PackageKnowledgeDoc {
337    id: Option<String>,
338    selector: Option<String>,
339    source_path: String,
340    title: String,
341    summary: String,
342    #[serde(default)]
343    kind: KnowledgeDocKind,
344    #[serde(default)]
345    tags: Vec<String>,
346    #[serde(default)]
347    related: Vec<KnowledgeDocEdge>,
348    #[serde(default)]
349    updated_at: String,
350}
351
352impl PackageKnowledgeDoc {
353    fn into_manifest(self, pack_id: &str) -> KnowledgeDocManifest {
354        let id_hint = self.id.as_deref().unwrap_or_default();
355        let selector = self
356            .selector
357            .unwrap_or_else(|| selector_from_source_path(&self.source_path, pack_id, id_hint));
358        let id = self.id.unwrap_or_else(|| format!("{pack_id}.{selector}"));
359        KnowledgeDocManifest {
360            id,
361            selector,
362            source_path: self.source_path,
363            title: self.title,
364            summary: self.summary,
365            kind: self.kind,
366            tags: self.tags,
367            related: self.related,
368            updated_at: self.updated_at,
369        }
370    }
371}
372
373fn selector_from_source_path(source_path: &str, pack_id: &str, id: &str) -> String {
374    let trimmed = source_path.strip_prefix("docs/").unwrap_or(source_path);
375    let trimmed = trimmed.strip_suffix(".md").unwrap_or(trimmed);
376    let selector = trimmed.replace('/', ".");
377    if selector.is_empty() {
378        id.strip_prefix(&format!("{pack_id}."))
379            .unwrap_or(id)
380            .to_string()
381    } else {
382        selector
383    }
384}
385
386impl KnowledgeDocEdgeType {
387    pub fn as_str(self) -> &'static str {
388        match self {
389            KnowledgeDocEdgeType::PartOf => "part_of",
390            KnowledgeDocEdgeType::Defines => "defines",
391            KnowledgeDocEdgeType::Governs => "governs",
392            KnowledgeDocEdgeType::Classifies => "classifies",
393            KnowledgeDocEdgeType::References => "references",
394            KnowledgeDocEdgeType::DependsOn => "depends_on",
395            KnowledgeDocEdgeType::Extends => "extends",
396            KnowledgeDocEdgeType::RelatedTo => "related_to",
397        }
398    }
399}
400
401impl KnowledgeDocKind {
402    pub fn new(value: impl AsRef<str>) -> Self {
403        let value = value.as_ref().trim().to_ascii_lowercase();
404        let mut slug = String::new();
405        let mut last_was_separator = false;
406        for ch in value.chars() {
407            if ch.is_ascii_alphanumeric() {
408                slug.push(ch);
409                last_was_separator = false;
410            } else if !last_was_separator {
411                slug.push('_');
412                last_was_separator = true;
413            }
414        }
415        let slug = slug.trim_matches('_');
416        if slug.is_empty() {
417            Self("reference".to_string())
418        } else {
419            Self(slug.to_string())
420        }
421    }
422
423    pub fn as_str(&self) -> &str {
424        &self.0
425    }
426}
427
428impl Default for KnowledgeDocKind {
429    fn default() -> Self {
430        Self::new("reference")
431    }
432}
433
434impl From<&str> for KnowledgeDocKind {
435    fn from(value: &str) -> Self {
436        Self::new(value)
437    }
438}
439
440impl From<String> for KnowledgeDocKind {
441    fn from(value: String) -> Self {
442        Self::new(value)
443    }
444}
445
446impl Serialize for KnowledgeDocKind {
447    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
448    where
449        S: Serializer,
450    {
451        serializer.serialize_str(self.as_str())
452    }
453}
454
455impl<'de> Deserialize<'de> for KnowledgeDocKind {
456    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
457    where
458        D: Deserializer<'de>,
459    {
460        String::deserialize(deserializer).map(Self::new)
461    }
462}
463
464fn search_pack<P: KnowledgePack + ?Sized>(
465    pack: &P,
466    query: &str,
467    filter: KnowledgeDocFilter,
468) -> Vec<KnowledgeDocSearchHit> {
469    let needle = normalize(query);
470    let mut hits = Vec::new();
471
472    for manifest in pack.list_docs(filter) {
473        let mut score = 0;
474        let mut matched = BTreeSet::new();
475
476        score += score_field(&needle, &manifest.id, 100, "id", &mut matched);
477        score += score_field(&needle, &manifest.selector, 90, "selector", &mut matched);
478        score += score_field(&needle, &manifest.title, 80, "title", &mut matched);
479        score += score_field(&needle, &manifest.summary, 60, "summary", &mut matched);
480
481        for tag in &manifest.tags {
482            score += score_field(&needle, tag, 70, "tag", &mut matched);
483        }
484
485        if score > 0 || needle.is_empty() {
486            hits.push(KnowledgeDocSearchHit {
487                document: manifest.clone(),
488                score,
489                matched: matched.into_iter().collect(),
490            });
491        }
492    }
493
494    hits.sort_by(|a, b| {
495        b.score
496            .cmp(&a.score)
497            .then_with(|| a.document.selector.cmp(&b.document.selector))
498    });
499    hits
500}
501
502fn matches_filter<P: KnowledgePack + ?Sized>(
503    pack: &P,
504    doc: &KnowledgeDocManifest,
505    filter: &KnowledgeDocFilter,
506) -> bool {
507    if let Some(kind) = &filter.kind
508        && doc.kind != *kind
509    {
510        return false;
511    }
512    if let Some(prefix) = &filter.selector_prefix
513        && !doc.selector.starts_with(prefix)
514    {
515        return false;
516    }
517    if !filter.tags.is_empty()
518        && !filter
519            .tags
520            .iter()
521            .all(|tag| doc.tags.iter().any(|doc_tag| doc_tag == tag))
522    {
523        return false;
524    }
525    if let Some(target) = &filter.related_to {
526        let has_edge = doc.related.iter().any(|edge| {
527            let edge_matches_target = edge.target == *target
528                || pack
529                    .read_manifest(&edge.target)
530                    .map(|edge_target| edge_target.id == *target || edge_target.selector == *target)
531                    .unwrap_or(false);
532            edge_matches_target
533                && filter
534                    .edge_type
535                    .as_ref()
536                    .map(|expected| edge.edge_type == *expected)
537                    .unwrap_or(true)
538        });
539        if !has_edge {
540            return false;
541        }
542    }
543    true
544}
545
546fn score_field(
547    needle: &str,
548    haystack: &str,
549    weight: usize,
550    label: &str,
551    matched: &mut BTreeSet<String>,
552) -> usize {
553    if needle.is_empty() {
554        return 1;
555    }
556    let haystack = normalize(haystack);
557    if haystack == needle {
558        matched.insert(label.to_string());
559        weight * 2
560    } else if haystack.contains(needle) {
561        matched.insert(label.to_string());
562        weight
563    } else {
564        0
565    }
566}
567
568fn normalize(value: &str) -> String {
569    value.trim().to_lowercase()
570}
571
572#[cfg(test)]
573mod tests {
574    use super::*;
575    use std::time::{SystemTime, UNIX_EPOCH};
576
577    #[test]
578    fn package_knowledge_manifest_accepts_selector_without_doc_id() {
579        let unique = SystemTime::now()
580            .duration_since(UNIX_EPOCH)
581            .unwrap()
582            .as_nanos();
583        let dir = std::env::temp_dir().join(format!(
584            "nenjo-knowledge-package-manifest-{pid}-{unique}",
585            pid = std::process::id()
586        ));
587        let docs_dir = dir.join("docs/domain");
588        std::fs::create_dir_all(&docs_dir).unwrap();
589        std::fs::write(
590            dir.join("manifest.yaml"),
591            r#"
592schema: nenjo.knowledge.v1
593manifest:
594  pack_id: nenjo.core
595  version: 0.1.0
596  docs:
597    - selector: domain.nenjo
598      source_path: docs/domain/nenjo.md
599      title: Nenjo
600      summary: Platform overview.
601      kind: domain
602      tags: [domain:nenjo]
603      related: []
604"#,
605        )
606        .unwrap();
607        std::fs::write(docs_dir.join("nenjo.md"), "# Nenjo\n\nKnowledge content.").unwrap();
608
609        let pack = PackageKnowledgePack::load(&dir.join("manifest.yaml"), "0.1.0").unwrap();
610        let doc = pack.read_doc("domain.nenjo").unwrap();
611
612        assert_eq!(doc.manifest.selector, "domain.nenjo");
613        assert_eq!(doc.manifest.id, "nenjo.core.domain.nenjo");
614        assert!(doc.content.contains("Knowledge content"));
615
616        std::fs::remove_dir_all(dir).unwrap();
617    }
618}