Skip to main content

nenjo_knowledge/
lib.rs

1//! Shared knowledge pack primitives.
2//!
3//! Knowledge packs expose a common metadata/search/read API for
4//! filesystem, or remote document sets.
5
6use std::borrow::Cow;
7use std::collections::BTreeSet;
8use std::path::{Path, PathBuf};
9
10use anyhow::{Context, Result};
11use serde::{Deserialize, Deserializer, Serialize, Serializer};
12
13pub mod tools;
14
15/// Shared read-only metadata contract for any knowledge pack manifest.
16///
17/// This trait intentionally covers only pack identity and document metadata.
18/// Concrete pack manifests, such as project or remote manifests, should expose
19/// their own sync/cache mutation methods on their concrete types.
20pub trait KnowledgePackManifest: Send + Sync {
21    fn pack_id(&self) -> &str;
22    fn version(&self) -> &str;
23    fn schema_version(&self) -> u32;
24    fn root_uri(&self) -> &str;
25    fn content_hash(&self) -> &str;
26    fn docs(&self) -> &[KnowledgeDocManifest];
27
28    fn read_doc_manifest(&self, selector: &str) -> Option<&KnowledgeDocManifest> {
29        self.docs().iter().find(|doc| {
30            doc.id == selector || doc.selector == selector || doc.source_path == selector
31        })
32    }
33}
34
35/// Serializable base manifest used by read-only packs and generic consumers.
36///
37/// Project and remote packs may deserialize into richer concrete types, but
38/// their document entries should still use [`KnowledgeDocManifest`] so agents
39/// and MCP tools see one metadata schema across all pack sources.
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct KnowledgePackManifestData {
42    #[serde(alias = "pack_slug")]
43    pub pack_id: String,
44    #[serde(default = "default_knowledge_pack_version")]
45    pub version: String,
46    #[serde(default = "default_knowledge_schema_version")]
47    pub schema_version: u32,
48    #[serde(default)]
49    pub root_uri: String,
50    #[serde(default)]
51    pub content_hash: String,
52    #[serde(default)]
53    pub docs: Vec<KnowledgeDocManifest>,
54}
55
56impl KnowledgePackManifest for KnowledgePackManifestData {
57    fn pack_id(&self) -> &str {
58        &self.pack_id
59    }
60
61    fn version(&self) -> &str {
62        &self.version
63    }
64
65    fn schema_version(&self) -> u32 {
66        self.schema_version
67    }
68
69    fn root_uri(&self) -> &str {
70        &self.root_uri
71    }
72
73    fn content_hash(&self) -> &str {
74        &self.content_hash
75    }
76
77    fn docs(&self) -> &[KnowledgeDocManifest] {
78        &self.docs
79    }
80}
81
82fn default_knowledge_pack_version() -> String {
83    "1".to_string()
84}
85
86fn default_knowledge_schema_version() -> u32 {
87    1
88}
89
90/// Stored metadata for one knowledge document.
91///
92/// Tool responses expose a slimmer projection of this type. `source_path` and
93/// `updated_at` are retained for pack hydration and local sync, not for agent
94/// selection.
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct KnowledgeDocManifest {
97    /// Stable document identifier within the pack.
98    pub id: String,
99    /// Agent-visible selector used for lookup and graph traversal.
100    pub selector: String,
101    /// Pack-local file path used to load the document body.
102    pub source_path: String,
103    /// Human-readable title.
104    pub title: String,
105    /// Short summary used for search and selection.
106    pub summary: String,
107    /// Open-ended document category normalized to a slug.
108    pub kind: KnowledgeDocKind,
109    /// Lightweight classification labels.
110    pub tags: Vec<String>,
111    /// Outbound graph edges authored on this document.
112    pub related: Vec<KnowledgeDocEdge>,
113    /// Sync timestamp for local library packs.
114    #[serde(default)]
115    pub updated_at: String,
116}
117
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
119#[serde(rename_all = "snake_case")]
120pub enum KnowledgeDocEdgeType {
121    PartOf,
122    Defines,
123    Governs,
124    Classifies,
125    References,
126    DependsOn,
127    Extends,
128    RelatedTo,
129}
130
131#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
132pub struct KnowledgeDocKind(String);
133
134/// Authored outbound edge from one document to another document.
135#[derive(Debug, Clone, Serialize, Deserialize)]
136pub struct KnowledgeDocEdge {
137    #[serde(rename = "type", alias = "edge_type")]
138    pub edge_type: KnowledgeDocEdgeType,
139    /// Target document id or path.
140    pub target: String,
141    /// Optional authoring note. Tool metadata omits this to keep traversal compact.
142    pub description: Option<String>,
143}
144
145#[derive(Debug, Clone, Default, Serialize, Deserialize)]
146pub struct KnowledgeDocFilter {
147    pub tags: Vec<String>,
148    pub kind: Option<KnowledgeDocKind>,
149    pub selector_prefix: Option<String>,
150    pub related_to: Option<String>,
151    pub edge_type: Option<KnowledgeDocEdgeType>,
152}
153
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct KnowledgeDocRead {
156    pub manifest: KnowledgeDocManifest,
157    pub content: String,
158}
159
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct KnowledgeDocNeighbor {
162    /// Source document for the neighbor request.
163    pub document: KnowledgeDocManifest,
164    /// Resolved outbound edges from the source document.
165    pub edges: Vec<KnowledgeDocNeighborEdge>,
166}
167
168#[derive(Debug, Clone, Serialize, Deserialize)]
169pub struct KnowledgeDocNeighborEdge {
170    #[serde(rename = "type")]
171    pub edge_type: KnowledgeDocEdgeType,
172    /// Resolved target document metadata.
173    pub target: KnowledgeDocManifest,
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct KnowledgeDocSearchHit {
178    /// Matched document metadata.
179    pub document: KnowledgeDocManifest,
180    /// Simple relevance score derived from metadata matches.
181    pub score: usize,
182    /// Metadata fields that matched the query.
183    pub matched: Vec<String>,
184}
185
186/// Runtime access to a knowledge pack's metadata and lazy document content.
187pub trait KnowledgePack: Send + Sync {
188    fn manifest(&self) -> &dyn KnowledgePackManifest;
189
190    fn doc_content(&self, manifest: &KnowledgeDocManifest) -> Option<Cow<'_, str>>;
191
192    fn list_docs(&self, filter: KnowledgeDocFilter) -> Vec<&KnowledgeDocManifest> {
193        self.manifest()
194            .docs()
195            .iter()
196            .filter(|doc| matches_filter(self, doc, &filter))
197            .collect()
198    }
199
200    fn read_manifest(&self, path: &str) -> Option<&KnowledgeDocManifest> {
201        self.manifest().read_doc_manifest(path)
202    }
203
204    fn read_doc(&self, path: &str) -> Option<KnowledgeDocRead> {
205        let manifest = self.read_manifest(path)?.clone();
206        let content = self.doc_content(&manifest)?.into_owned();
207        Some(KnowledgeDocRead { manifest, content })
208    }
209
210    fn search(&self, query: &str, filter: KnowledgeDocFilter) -> Vec<KnowledgeDocSearchHit> {
211        search_pack(self, query, filter)
212    }
213
214    fn neighbors(
215        &self,
216        path: &str,
217        edge_type: Option<KnowledgeDocEdgeType>,
218    ) -> Option<KnowledgeDocNeighbor> {
219        let source = self.read_manifest(path)?;
220
221        let mut edges = Vec::new();
222
223        for edge in &source.related {
224            if let Some(expected) = edge_type
225                && edge.edge_type != expected
226            {
227                continue;
228            }
229            if let Some(target) = self.read_manifest(&edge.target) {
230                edges.push(KnowledgeDocNeighborEdge {
231                    edge_type: edge.edge_type,
232                    target: target.clone(),
233                });
234            }
235        }
236
237        edges.sort_by(|left, right| {
238            left.target
239                .selector
240                .cmp(&right.target.selector)
241                .then_with(|| left.edge_type.as_str().cmp(right.edge_type.as_str()))
242        });
243        edges.dedup_by(|left, right| {
244            left.edge_type == right.edge_type && left.target.selector == right.target.selector
245        });
246
247        Some(KnowledgeDocNeighbor {
248            document: source.clone(),
249            edges,
250        })
251    }
252}
253
254/// Filesystem-backed package knowledge pack loaded from an installed package
255/// knowledge manifest.
256#[derive(Debug, Clone)]
257pub struct PackageKnowledgePack {
258    content_root: PathBuf,
259    selector: Option<String>,
260    manifest: KnowledgePackManifestData,
261}
262
263impl PackageKnowledgePack {
264    pub fn load(path: &Path, package_version: &str) -> Result<Self> {
265        let content = std::fs::read_to_string(path)
266            .with_context(|| format!("failed to read knowledge manifest {}", path.display()))?;
267        let file: PackageKnowledgeManifestFile =
268            serde_yaml::from_str(&content).context("invalid package knowledge manifest")?;
269        let root_uri = file
270            .manifest
271            .root_uri
272            .or(file.root_uri)
273            .unwrap_or_else(|| format!("pkg://{}/", file.manifest.pack_id));
274        let pack_id = file.manifest.pack_id;
275        let docs = file
276            .manifest
277            .docs
278            .into_iter()
279            .map(|doc| doc.into_manifest(&pack_id))
280            .collect();
281        Ok(Self {
282            content_root: path.parent().unwrap_or_else(|| Path::new("")).to_path_buf(),
283            selector: file.manifest.selector.or(file.selector),
284            manifest: KnowledgePackManifestData {
285                pack_id,
286                version: file
287                    .manifest
288                    .version
289                    .unwrap_or_else(|| package_version.to_string()),
290                schema_version: file.manifest.schema_version.unwrap_or(1),
291                root_uri,
292                content_hash: file.manifest.content_hash.unwrap_or_default(),
293                docs,
294            },
295        })
296    }
297
298    pub fn selector(&self) -> Option<&str> {
299        self.selector.as_deref()
300    }
301}
302
303impl KnowledgePack for PackageKnowledgePack {
304    fn manifest(&self) -> &dyn KnowledgePackManifest {
305        &self.manifest
306    }
307
308    fn doc_content(&self, manifest: &KnowledgeDocManifest) -> Option<Cow<'_, str>> {
309        let content =
310            std::fs::read_to_string(self.content_root.join(&manifest.source_path)).ok()?;
311        Some(Cow::Owned(content))
312    }
313}
314
315/// Filesystem-backed knowledge pack loaded from a local cache directory.
316///
317/// The manifest is loaded eagerly, while document bodies are read lazily from
318/// `source_path` when a tool asks for a document.
319#[derive(Debug, Clone)]
320pub struct FilesystemKnowledgePack {
321    content_root: PathBuf,
322    manifest: KnowledgePackManifestData,
323}
324
325impl FilesystemKnowledgePack {
326    pub const MANIFEST_FILENAME: &'static str = "manifest.json";
327
328    pub fn load(root: &Path) -> Option<Self> {
329        let manifest_path = root.join(Self::MANIFEST_FILENAME);
330        let content = std::fs::read_to_string(&manifest_path).ok()?;
331        let mut manifest: KnowledgePackManifestData = serde_json::from_str(&content).ok()?;
332        if manifest.root_uri.trim().is_empty() {
333            manifest.root_uri = format!("file://{}/", root.display());
334        }
335        Some(Self {
336            content_root: root.to_path_buf(),
337            manifest,
338        })
339    }
340
341    fn doc_path(&self, source_path: &str) -> Option<PathBuf> {
342        let relative = Path::new(source_path);
343        if relative.is_absolute()
344            || relative
345                .components()
346                .any(|component| !matches!(component, std::path::Component::Normal(_)))
347        {
348            return None;
349        }
350        Some(self.content_root.join(relative))
351    }
352}
353
354impl KnowledgePack for FilesystemKnowledgePack {
355    fn manifest(&self) -> &dyn KnowledgePackManifest {
356        &self.manifest
357    }
358
359    fn doc_content(&self, manifest: &KnowledgeDocManifest) -> Option<Cow<'_, str>> {
360        let content = std::fs::read_to_string(self.doc_path(&manifest.source_path)?).ok()?;
361        Some(Cow::Owned(content))
362    }
363}
364
365#[derive(Debug, Deserialize)]
366struct PackageKnowledgeManifestFile {
367    selector: Option<String>,
368    root_uri: Option<String>,
369    manifest: PackageKnowledgeManifestBody,
370}
371
372#[derive(Debug, Deserialize)]
373struct PackageKnowledgeManifestBody {
374    pack_id: String,
375    selector: Option<String>,
376    version: Option<String>,
377    schema_version: Option<u32>,
378    root_uri: Option<String>,
379    content_hash: Option<String>,
380    #[serde(default)]
381    docs: Vec<PackageKnowledgeDoc>,
382}
383
384#[derive(Debug, Deserialize)]
385struct PackageKnowledgeDoc {
386    id: Option<String>,
387    selector: Option<String>,
388    source_path: String,
389    title: String,
390    summary: String,
391    #[serde(default)]
392    kind: KnowledgeDocKind,
393    #[serde(default)]
394    tags: Vec<String>,
395    #[serde(default)]
396    related: Vec<KnowledgeDocEdge>,
397    #[serde(default)]
398    updated_at: String,
399}
400
401impl PackageKnowledgeDoc {
402    fn into_manifest(self, pack_id: &str) -> KnowledgeDocManifest {
403        let id_hint = self.id.as_deref().unwrap_or_default();
404        let selector = self
405            .selector
406            .unwrap_or_else(|| selector_from_source_path(&self.source_path, pack_id, id_hint));
407        let id = self.id.unwrap_or_else(|| format!("{pack_id}.{selector}"));
408        KnowledgeDocManifest {
409            id,
410            selector,
411            source_path: self.source_path,
412            title: self.title,
413            summary: self.summary,
414            kind: self.kind,
415            tags: self.tags,
416            related: self.related,
417            updated_at: self.updated_at,
418        }
419    }
420}
421
422fn selector_from_source_path(source_path: &str, pack_id: &str, id: &str) -> String {
423    let trimmed = source_path.strip_prefix("docs/").unwrap_or(source_path);
424    let trimmed = trimmed.strip_suffix(".md").unwrap_or(trimmed);
425    let selector = trimmed.replace('/', ".");
426    if selector.is_empty() {
427        id.strip_prefix(&format!("{pack_id}."))
428            .unwrap_or(id)
429            .to_string()
430    } else {
431        selector
432    }
433}
434
435impl KnowledgeDocEdgeType {
436    pub fn as_str(self) -> &'static str {
437        match self {
438            KnowledgeDocEdgeType::PartOf => "part_of",
439            KnowledgeDocEdgeType::Defines => "defines",
440            KnowledgeDocEdgeType::Governs => "governs",
441            KnowledgeDocEdgeType::Classifies => "classifies",
442            KnowledgeDocEdgeType::References => "references",
443            KnowledgeDocEdgeType::DependsOn => "depends_on",
444            KnowledgeDocEdgeType::Extends => "extends",
445            KnowledgeDocEdgeType::RelatedTo => "related_to",
446        }
447    }
448}
449
450impl KnowledgeDocKind {
451    pub fn new(value: impl AsRef<str>) -> Self {
452        let value = value.as_ref().trim().to_ascii_lowercase();
453        let mut slug = String::new();
454        let mut last_was_separator = false;
455        for ch in value.chars() {
456            if ch.is_ascii_alphanumeric() {
457                slug.push(ch);
458                last_was_separator = false;
459            } else if !last_was_separator {
460                slug.push('_');
461                last_was_separator = true;
462            }
463        }
464        let slug = slug.trim_matches('_');
465        if slug.is_empty() {
466            Self("reference".to_string())
467        } else {
468            Self(slug.to_string())
469        }
470    }
471
472    pub fn as_str(&self) -> &str {
473        &self.0
474    }
475}
476
477impl Default for KnowledgeDocKind {
478    fn default() -> Self {
479        Self::new("reference")
480    }
481}
482
483impl From<&str> for KnowledgeDocKind {
484    fn from(value: &str) -> Self {
485        Self::new(value)
486    }
487}
488
489impl From<String> for KnowledgeDocKind {
490    fn from(value: String) -> Self {
491        Self::new(value)
492    }
493}
494
495impl Serialize for KnowledgeDocKind {
496    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
497    where
498        S: Serializer,
499    {
500        serializer.serialize_str(self.as_str())
501    }
502}
503
504impl<'de> Deserialize<'de> for KnowledgeDocKind {
505    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
506    where
507        D: Deserializer<'de>,
508    {
509        String::deserialize(deserializer).map(Self::new)
510    }
511}
512
513fn search_pack<P: KnowledgePack + ?Sized>(
514    pack: &P,
515    query: &str,
516    filter: KnowledgeDocFilter,
517) -> Vec<KnowledgeDocSearchHit> {
518    let needle = normalize(query);
519    let mut hits = Vec::new();
520
521    for manifest in pack.list_docs(filter) {
522        let mut score = 0;
523        let mut matched = BTreeSet::new();
524
525        score += score_field(&needle, &manifest.id, 100, "id", &mut matched);
526        score += score_field(&needle, &manifest.selector, 90, "selector", &mut matched);
527        score += score_field(&needle, &manifest.title, 80, "title", &mut matched);
528        score += score_field(&needle, &manifest.summary, 60, "summary", &mut matched);
529
530        for tag in &manifest.tags {
531            score += score_field(&needle, tag, 70, "tag", &mut matched);
532        }
533
534        if score > 0 || needle.is_empty() {
535            hits.push(KnowledgeDocSearchHit {
536                document: manifest.clone(),
537                score,
538                matched: matched.into_iter().collect(),
539            });
540        }
541    }
542
543    hits.sort_by(|a, b| {
544        b.score
545            .cmp(&a.score)
546            .then_with(|| a.document.selector.cmp(&b.document.selector))
547    });
548    hits
549}
550
551fn matches_filter<P: KnowledgePack + ?Sized>(
552    pack: &P,
553    doc: &KnowledgeDocManifest,
554    filter: &KnowledgeDocFilter,
555) -> bool {
556    if let Some(kind) = &filter.kind
557        && doc.kind != *kind
558    {
559        return false;
560    }
561    if let Some(prefix) = &filter.selector_prefix
562        && !doc.selector.starts_with(prefix)
563    {
564        return false;
565    }
566    if !filter.tags.is_empty()
567        && !filter
568            .tags
569            .iter()
570            .all(|tag| doc.tags.iter().any(|doc_tag| doc_tag == tag))
571    {
572        return false;
573    }
574    if let Some(target) = &filter.related_to {
575        let has_edge = doc.related.iter().any(|edge| {
576            let edge_matches_target = edge.target == *target
577                || pack
578                    .read_manifest(&edge.target)
579                    .map(|edge_target| edge_target.id == *target || edge_target.selector == *target)
580                    .unwrap_or(false);
581            edge_matches_target
582                && filter
583                    .edge_type
584                    .as_ref()
585                    .map(|expected| edge.edge_type == *expected)
586                    .unwrap_or(true)
587        });
588        if !has_edge {
589            return false;
590        }
591    }
592    true
593}
594
595fn score_field(
596    needle: &str,
597    haystack: &str,
598    weight: usize,
599    label: &str,
600    matched: &mut BTreeSet<String>,
601) -> usize {
602    if needle.is_empty() {
603        return 1;
604    }
605    let haystack = normalize(haystack);
606    if haystack == needle {
607        matched.insert(label.to_string());
608        weight * 2
609    } else if haystack.contains(needle) {
610        matched.insert(label.to_string());
611        weight
612    } else {
613        0
614    }
615}
616
617fn normalize(value: &str) -> String {
618    value.trim().to_lowercase()
619}
620
621#[cfg(test)]
622mod tests {
623    use super::*;
624    use std::time::{SystemTime, UNIX_EPOCH};
625
626    #[test]
627    fn package_knowledge_manifest_accepts_selector_without_doc_id() {
628        let unique = SystemTime::now()
629            .duration_since(UNIX_EPOCH)
630            .unwrap()
631            .as_nanos();
632        let dir = std::env::temp_dir().join(format!(
633            "nenjo-knowledge-package-manifest-{pid}-{unique}",
634            pid = std::process::id()
635        ));
636        let docs_dir = dir.join("docs/domain");
637        std::fs::create_dir_all(&docs_dir).unwrap();
638        std::fs::write(
639            dir.join("manifest.yaml"),
640            r#"
641schema: nenjo.knowledge.v1
642manifest:
643  pack_id: nenjo.core
644  version: 0.1.0
645  docs:
646    - selector: domain.nenjo
647      source_path: docs/domain/nenjo.md
648      title: Nenjo
649      summary: Platform overview.
650      kind: domain
651      tags: [domain:nenjo]
652      related: []
653"#,
654        )
655        .unwrap();
656        std::fs::write(docs_dir.join("nenjo.md"), "# Nenjo\n\nKnowledge content.").unwrap();
657
658        let pack = PackageKnowledgePack::load(&dir.join("manifest.yaml"), "0.1.0").unwrap();
659        let doc = pack.read_doc("domain.nenjo").unwrap();
660
661        assert_eq!(doc.manifest.selector, "domain.nenjo");
662        assert_eq!(doc.manifest.id, "nenjo.core.domain.nenjo");
663        assert!(doc.content.contains("Knowledge content"));
664
665        std::fs::remove_dir_all(dir).unwrap();
666    }
667}