Skip to main content

nenjo_knowledge/
lib.rs

1//! Shared knowledge pack primitives.
2//!
3//! Knowledge packs expose a common metadata/search/read API for
4//! filesystem, or remote document sets.
5
6use std::borrow::Cow;
7use std::collections::{BTreeMap, BTreeSet};
8
9use serde::{Deserialize, Serialize};
10
11pub mod tools;
12
13/// Shared read-only metadata contract for any knowledge pack manifest.
14///
15/// This trait intentionally covers only pack identity and document metadata.
16/// Concrete pack manifests, such as project or remote manifests, should expose
17/// their own sync/cache mutation methods on their concrete types.
18pub trait KnowledgePackManifest: Send + Sync {
19    fn pack_id(&self) -> &str;
20    fn pack_version(&self) -> &str;
21    fn schema_version(&self) -> u32;
22    fn root_uri(&self) -> &str;
23    fn content_hash(&self) -> &str;
24    fn docs(&self) -> &[KnowledgeDocManifest];
25
26    fn read_doc_manifest(&self, path: &str) -> Option<&KnowledgeDocManifest> {
27        self.docs()
28            .iter()
29            .find(|doc| doc.id == path || doc.virtual_path == path || doc.source_path == path)
30    }
31}
32
33/// Serializable base manifest used by read-only packs and generic consumers.
34///
35/// Project and remote packs may deserialize into richer concrete types, but
36/// their document entries should still use [`KnowledgeDocManifest`] so agents
37/// and MCP tools see one metadata schema across all pack sources.
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct KnowledgePackManifestData {
40    pub pack_id: String,
41    pub pack_version: String,
42    pub schema_version: u32,
43    pub root_uri: String,
44    #[serde(default)]
45    pub content_hash: String,
46    pub docs: Vec<KnowledgeDocManifest>,
47}
48
49impl KnowledgePackManifest for KnowledgePackManifestData {
50    fn pack_id(&self) -> &str {
51        &self.pack_id
52    }
53
54    fn pack_version(&self) -> &str {
55        &self.pack_version
56    }
57
58    fn schema_version(&self) -> u32 {
59        self.schema_version
60    }
61
62    fn root_uri(&self) -> &str {
63        &self.root_uri
64    }
65
66    fn content_hash(&self) -> &str {
67        &self.content_hash
68    }
69
70    fn docs(&self) -> &[KnowledgeDocManifest] {
71        &self.docs
72    }
73}
74
75/// Shared document metadata visible through knowledge pack APIs.
76///
77/// `size_bytes` and `updated_at` are sync hints used by local project caches.
78/// Builtin and remote manifests may leave them empty/defaulted.
79#[derive(Debug, Clone, Serialize, Deserialize)]
80pub struct KnowledgeDocManifest {
81    pub id: String,
82    pub virtual_path: String,
83    pub source_path: String,
84    pub title: String,
85    pub summary: String,
86    pub description: Option<String>,
87    pub kind: KnowledgeDocKind,
88    pub authority: KnowledgeDocAuthority,
89    pub status: KnowledgeDocStatus,
90    pub tags: Vec<String>,
91    pub aliases: Vec<String>,
92    pub keywords: Vec<String>,
93    pub related: Vec<KnowledgeDocEdge>,
94    #[serde(default)]
95    pub size_bytes: i64,
96    #[serde(default)]
97    pub updated_at: String,
98}
99
100#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
101#[serde(rename_all = "snake_case")]
102pub enum KnowledgeDocEdgeType {
103    PartOf,
104    Defines,
105    Governs,
106    Classifies,
107    References,
108    DependsOn,
109    Extends,
110    RelatedTo,
111}
112
113#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
114#[serde(rename_all = "snake_case")]
115pub enum KnowledgeDocKind {
116    Guide,
117    Reference,
118    Taxonomy,
119    Domain,
120    Entity,
121    Policy,
122}
123
124#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
125#[serde(rename_all = "snake_case")]
126pub enum KnowledgeDocAuthority {
127    Canonical,
128    Supporting,
129    Pattern,
130    Reference,
131    Advisory,
132    Example,
133    Draft,
134    Deprecated,
135}
136
137#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
138#[serde(rename_all = "snake_case")]
139pub enum KnowledgeDocStatus {
140    Stable,
141    Draft,
142    Deprecated,
143}
144
145#[derive(Debug, Clone, Serialize, Deserialize)]
146pub struct KnowledgeDocEdge {
147    #[serde(rename = "type", alias = "edge_type")]
148    pub edge_type: KnowledgeDocEdgeType,
149    pub target: String,
150    pub description: Option<String>,
151}
152
153#[derive(Debug, Clone, Default, Serialize, Deserialize)]
154pub struct KnowledgeDocFilter {
155    pub tags: Vec<String>,
156    pub kind: Option<KnowledgeDocKind>,
157    pub authority: Option<KnowledgeDocAuthority>,
158    pub status: Option<KnowledgeDocStatus>,
159    pub path_prefix: Option<String>,
160    pub related_to: Option<String>,
161    pub edge_type: Option<KnowledgeDocEdgeType>,
162}
163
164#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct KnowledgeDocRead {
166    pub manifest: KnowledgeDocManifest,
167    pub content: String,
168}
169
170#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
171pub struct KnowledgeDocNeighbor {
172    pub target: String,
173    pub edges: Vec<KnowledgeDocNeighborEdge>,
174}
175
176#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
177pub struct KnowledgeDocNeighborEdge {
178    pub edge_type: KnowledgeDocEdgeType,
179    pub source: String,
180    pub target: String,
181    #[serde(skip_serializing_if = "Option::is_none")]
182    pub note: Option<String>,
183}
184
185#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct KnowledgeDocSearchHit {
187    pub id: String,
188    pub virtual_path: String,
189    pub title: String,
190    pub summary: String,
191    pub kind: KnowledgeDocKind,
192    pub authority: KnowledgeDocAuthority,
193    pub tags: Vec<String>,
194    pub score: usize,
195    pub matched: Vec<String>,
196    #[serde(skip_serializing_if = "Option::is_none")]
197    pub content: Option<String>,
198}
199
200#[derive(Debug, Clone, Serialize, Deserialize)]
201pub struct KnowledgeDocTree {
202    pub root_uri: String,
203    pub entries: Vec<KnowledgeDocTreeEntry>,
204}
205
206#[derive(Debug, Clone, Serialize, Deserialize)]
207pub struct KnowledgeDocTreeEntry {
208    pub path: String,
209    pub title: String,
210    pub kind: KnowledgeDocKind,
211    pub tags: Vec<String>,
212}
213
214enum SearchMode {
215    MetadataOnly,
216    FullText,
217}
218
219/// Runtime access to a knowledge pack's metadata and lazy document content.
220pub trait KnowledgePack: Send + Sync {
221    fn manifest(&self) -> &dyn KnowledgePackManifest;
222
223    fn doc_content(&self, manifest: &KnowledgeDocManifest) -> Option<Cow<'_, str>>;
224
225    fn list_tree(&self, prefix: Option<&str>) -> KnowledgeDocTree {
226        let mut entries: Vec<_> = self
227            .manifest()
228            .docs()
229            .iter()
230            .filter(|doc| {
231                prefix
232                    .map(|prefix| doc.virtual_path.starts_with(prefix))
233                    .unwrap_or(true)
234            })
235            .map(|doc| KnowledgeDocTreeEntry {
236                path: doc.virtual_path.clone(),
237                title: doc.title.clone(),
238                kind: doc.kind,
239                tags: doc.tags.clone(),
240            })
241            .collect();
242        entries.sort_by(|a, b| a.path.cmp(&b.path));
243        KnowledgeDocTree {
244            root_uri: self.manifest().root_uri().to_string(),
245            entries,
246        }
247    }
248
249    fn list_docs(&self, filter: KnowledgeDocFilter) -> Vec<&KnowledgeDocManifest> {
250        self.manifest()
251            .docs()
252            .iter()
253            .filter(|doc| matches_filter(self, doc, &filter))
254            .collect()
255    }
256
257    fn read_manifest(&self, path: &str) -> Option<&KnowledgeDocManifest> {
258        self.manifest().read_doc_manifest(path)
259    }
260
261    fn read_doc(&self, path: &str) -> Option<KnowledgeDocRead> {
262        let manifest = self.read_manifest(path)?.clone();
263        let content = self.doc_content(&manifest)?.into_owned();
264        Some(KnowledgeDocRead { manifest, content })
265    }
266
267    fn search_paths(&self, query: &str, filter: KnowledgeDocFilter) -> Vec<KnowledgeDocSearchHit> {
268        search_pack(self, query, filter, SearchMode::MetadataOnly)
269    }
270
271    fn search_docs(&self, query: &str, filter: KnowledgeDocFilter) -> Vec<KnowledgeDocSearchHit> {
272        search_pack(self, query, filter, SearchMode::FullText)
273    }
274
275    fn neighbors(
276        &self,
277        path: &str,
278        edge_type: Option<KnowledgeDocEdgeType>,
279    ) -> Vec<KnowledgeDocNeighbor> {
280        let Some(source) = self.read_manifest(path) else {
281            return Vec::new();
282        };
283
284        let mut neighbors: BTreeMap<String, KnowledgeDocNeighbor> = BTreeMap::new();
285
286        for edge in &source.related {
287            if let Some(expected) = edge_type
288                && edge.edge_type != expected
289            {
290                continue;
291            }
292            if let Some(target) = self.read_manifest(&edge.target) {
293                push_neighbor_edge(
294                    &mut neighbors,
295                    target.virtual_path.clone(),
296                    KnowledgeDocNeighborEdge {
297                        edge_type: edge.edge_type,
298                        source: source.virtual_path.clone(),
299                        target: target.virtual_path.clone(),
300                        note: edge.description.clone(),
301                    },
302                );
303            }
304        }
305
306        for candidate in self.manifest().docs() {
307            for edge in &candidate.related {
308                let points_to_source = self
309                    .read_manifest(&edge.target)
310                    .map(|target| {
311                        target.id == source.id || target.virtual_path == source.virtual_path
312                    })
313                    .unwrap_or_else(|| {
314                        edge.target == source.id || edge.target == source.virtual_path
315                    });
316                if !points_to_source {
317                    continue;
318                }
319                if let Some(expected) = edge_type
320                    && edge.edge_type != expected
321                {
322                    continue;
323                }
324                push_neighbor_edge(
325                    &mut neighbors,
326                    candidate.virtual_path.clone(),
327                    KnowledgeDocNeighborEdge {
328                        edge_type: edge.edge_type,
329                        source: candidate.virtual_path.clone(),
330                        target: source.virtual_path.clone(),
331                        note: edge.description.clone(),
332                    },
333                );
334            }
335        }
336
337        neighbors.into_values().collect()
338    }
339}
340
341impl KnowledgeDocEdgeType {
342    pub fn as_str(self) -> &'static str {
343        match self {
344            KnowledgeDocEdgeType::PartOf => "part_of",
345            KnowledgeDocEdgeType::Defines => "defines",
346            KnowledgeDocEdgeType::Governs => "governs",
347            KnowledgeDocEdgeType::Classifies => "classifies",
348            KnowledgeDocEdgeType::References => "references",
349            KnowledgeDocEdgeType::DependsOn => "depends_on",
350            KnowledgeDocEdgeType::Extends => "extends",
351            KnowledgeDocEdgeType::RelatedTo => "related_to",
352        }
353    }
354}
355
356impl KnowledgeDocKind {
357    pub fn as_str(self) -> &'static str {
358        match self {
359            KnowledgeDocKind::Guide => "guide",
360            KnowledgeDocKind::Reference => "reference",
361            KnowledgeDocKind::Taxonomy => "taxonomy",
362            KnowledgeDocKind::Domain => "domain",
363            KnowledgeDocKind::Entity => "entity",
364            KnowledgeDocKind::Policy => "policy",
365        }
366    }
367}
368
369impl KnowledgeDocAuthority {
370    pub fn as_str(self) -> &'static str {
371        match self {
372            KnowledgeDocAuthority::Canonical => "canonical",
373            KnowledgeDocAuthority::Supporting => "supporting",
374            KnowledgeDocAuthority::Pattern => "pattern",
375            KnowledgeDocAuthority::Reference => "reference",
376            KnowledgeDocAuthority::Advisory => "advisory",
377            KnowledgeDocAuthority::Example => "example",
378            KnowledgeDocAuthority::Draft => "draft",
379            KnowledgeDocAuthority::Deprecated => "deprecated",
380        }
381    }
382}
383
384impl KnowledgeDocStatus {
385    pub fn as_str(self) -> &'static str {
386        match self {
387            KnowledgeDocStatus::Stable => "stable",
388            KnowledgeDocStatus::Draft => "draft",
389            KnowledgeDocStatus::Deprecated => "deprecated",
390        }
391    }
392}
393
394fn search_pack<P: KnowledgePack + ?Sized>(
395    pack: &P,
396    query: &str,
397    filter: KnowledgeDocFilter,
398    mode: SearchMode,
399) -> Vec<KnowledgeDocSearchHit> {
400    let needle = normalize(query);
401    let mut hits = Vec::new();
402
403    for manifest in pack.list_docs(filter) {
404        let mut score = 0;
405        let mut matched = BTreeSet::new();
406
407        score += score_field(&needle, &manifest.id, 100, "id", &mut matched);
408        score += score_field(
409            &needle,
410            &manifest.virtual_path,
411            90,
412            "virtual_path",
413            &mut matched,
414        );
415        score += score_field(&needle, &manifest.title, 80, "title", &mut matched);
416        score += score_field(&needle, &manifest.summary, 60, "summary", &mut matched);
417
418        for alias in &manifest.aliases {
419            score += score_field(&needle, alias, 75, "alias", &mut matched);
420        }
421        for tag in &manifest.tags {
422            score += score_field(&needle, tag, 70, "tag", &mut matched);
423        }
424        for keyword in &manifest.keywords {
425            score += score_field(&needle, keyword, 65, "keyword", &mut matched);
426        }
427
428        let content = match mode {
429            SearchMode::MetadataOnly => None,
430            SearchMode::FullText => pack.doc_content(manifest),
431        };
432        if let Some(content) = content.as_ref() {
433            score += score_field(&needle, content, 20, "content", &mut matched);
434        }
435
436        if score > 0 || needle.is_empty() {
437            hits.push(KnowledgeDocSearchHit {
438                id: manifest.id.clone(),
439                virtual_path: manifest.virtual_path.clone(),
440                title: manifest.title.clone(),
441                summary: manifest.summary.clone(),
442                kind: manifest.kind,
443                authority: manifest.authority,
444                tags: manifest.tags.clone(),
445                score,
446                matched: matched.into_iter().collect(),
447                content: matches!(mode, SearchMode::FullText)
448                    .then(|| content.map(Cow::into_owned).unwrap_or_default()),
449            });
450        }
451    }
452
453    hits.sort_by(|a, b| {
454        b.score
455            .cmp(&a.score)
456            .then_with(|| a.virtual_path.cmp(&b.virtual_path))
457    });
458    hits
459}
460
461fn matches_filter<P: KnowledgePack + ?Sized>(
462    pack: &P,
463    doc: &KnowledgeDocManifest,
464    filter: &KnowledgeDocFilter,
465) -> bool {
466    if let Some(kind) = filter.kind
467        && doc.kind != kind
468    {
469        return false;
470    }
471    if let Some(authority) = filter.authority
472        && doc.authority != authority
473    {
474        return false;
475    }
476    if let Some(status) = filter.status
477        && doc.status != status
478    {
479        return false;
480    }
481    if let Some(prefix) = &filter.path_prefix
482        && !doc.virtual_path.starts_with(prefix)
483    {
484        return false;
485    }
486    if !filter.tags.is_empty()
487        && !filter
488            .tags
489            .iter()
490            .all(|tag| doc.tags.iter().any(|doc_tag| doc_tag == tag))
491    {
492        return false;
493    }
494    if let Some(target) = &filter.related_to {
495        let has_edge = doc.related.iter().any(|edge| {
496            let edge_matches_target = edge.target == *target
497                || pack
498                    .read_manifest(&edge.target)
499                    .map(|edge_target| {
500                        edge_target.id == *target || edge_target.virtual_path == *target
501                    })
502                    .unwrap_or(false);
503            edge_matches_target
504                && filter
505                    .edge_type
506                    .as_ref()
507                    .map(|expected| edge.edge_type == *expected)
508                    .unwrap_or(true)
509        });
510        if !has_edge {
511            return false;
512        }
513    }
514    true
515}
516
517fn push_neighbor_edge(
518    neighbors: &mut BTreeMap<String, KnowledgeDocNeighbor>,
519    neighbor_target: String,
520    edge: KnowledgeDocNeighborEdge,
521) {
522    let neighbor =
523        neighbors
524            .entry(neighbor_target.clone())
525            .or_insert_with(|| KnowledgeDocNeighbor {
526                target: neighbor_target,
527                edges: Vec::new(),
528            });
529    if !neighbor.edges.contains(&edge) {
530        neighbor.edges.push(edge);
531        neighbor.edges.sort_by(|left, right| {
532            left.source
533                .cmp(&right.source)
534                .then_with(|| left.target.cmp(&right.target))
535                .then_with(|| left.edge_type.as_str().cmp(right.edge_type.as_str()))
536                .then_with(|| left.note.cmp(&right.note))
537        });
538    }
539}
540
541fn score_field(
542    needle: &str,
543    haystack: &str,
544    weight: usize,
545    label: &str,
546    matched: &mut BTreeSet<String>,
547) -> usize {
548    if needle.is_empty() {
549        return 1;
550    }
551    let haystack = normalize(haystack);
552    if haystack == needle {
553        matched.insert(label.to_string());
554        weight * 2
555    } else if haystack.contains(needle) {
556        matched.insert(label.to_string());
557        weight
558    } else {
559        0
560    }
561}
562
563fn normalize(value: &str) -> String {
564    value.trim().to_lowercase()
565}