1use std::borrow::Cow;
7use std::collections::BTreeSet;
8use std::path::{Path, PathBuf};
9
10use anyhow::{Context, Result};
11use serde::{Deserialize, Deserializer, Serialize, Serializer};
12
13pub mod tools;
14
15pub trait KnowledgePackManifest: Send + Sync {
21 fn pack_id(&self) -> &str;
22 fn version(&self) -> &str;
23 fn schema_version(&self) -> u32;
24 fn root_uri(&self) -> &str;
25 fn content_hash(&self) -> &str;
26 fn docs(&self) -> &[KnowledgeDocManifest];
27
28 fn read_doc_manifest(&self, selector: &str) -> Option<&KnowledgeDocManifest> {
29 self.docs().iter().find(|doc| {
30 doc.id == selector || doc.selector == selector || doc.source_path == selector
31 })
32 }
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct KnowledgePackManifestData {
42 #[serde(alias = "pack_slug")]
43 pub pack_id: String,
44 #[serde(default = "default_knowledge_pack_version")]
45 pub version: String,
46 #[serde(default = "default_knowledge_schema_version")]
47 pub schema_version: u32,
48 #[serde(default)]
49 pub root_uri: String,
50 #[serde(default)]
51 pub content_hash: String,
52 #[serde(default)]
53 pub docs: Vec<KnowledgeDocManifest>,
54}
55
56impl KnowledgePackManifest for KnowledgePackManifestData {
57 fn pack_id(&self) -> &str {
58 &self.pack_id
59 }
60
61 fn version(&self) -> &str {
62 &self.version
63 }
64
65 fn schema_version(&self) -> u32 {
66 self.schema_version
67 }
68
69 fn root_uri(&self) -> &str {
70 &self.root_uri
71 }
72
73 fn content_hash(&self) -> &str {
74 &self.content_hash
75 }
76
77 fn docs(&self) -> &[KnowledgeDocManifest] {
78 &self.docs
79 }
80}
81
82fn default_knowledge_pack_version() -> String {
83 "1".to_string()
84}
85
86fn default_knowledge_schema_version() -> u32 {
87 1
88}
89
90#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct KnowledgeDocManifest {
97 pub id: String,
99 pub selector: String,
101 pub source_path: String,
103 pub title: String,
105 pub summary: String,
107 pub kind: KnowledgeDocKind,
109 pub tags: Vec<String>,
111 pub related: Vec<KnowledgeDocEdge>,
113 #[serde(default)]
115 pub updated_at: String,
116}
117
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
119#[serde(rename_all = "snake_case")]
120pub enum KnowledgeDocEdgeType {
121 PartOf,
122 Defines,
123 Governs,
124 Classifies,
125 References,
126 DependsOn,
127 Extends,
128 RelatedTo,
129}
130
131#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
132pub struct KnowledgeDocKind(String);
133
134#[derive(Debug, Clone, Serialize, Deserialize)]
136pub struct KnowledgeDocEdge {
137 #[serde(rename = "type", alias = "edge_type")]
138 pub edge_type: KnowledgeDocEdgeType,
139 pub target: String,
141 pub description: Option<String>,
143}
144
145#[derive(Debug, Clone, Default, Serialize, Deserialize)]
146pub struct KnowledgeDocFilter {
147 pub tags: Vec<String>,
148 pub kind: Option<KnowledgeDocKind>,
149 pub selector_prefix: Option<String>,
150 pub related_to: Option<String>,
151 pub edge_type: Option<KnowledgeDocEdgeType>,
152}
153
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct KnowledgeDocRead {
156 pub manifest: KnowledgeDocManifest,
157 pub content: String,
158}
159
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct KnowledgeDocNeighbor {
162 pub document: KnowledgeDocManifest,
164 pub edges: Vec<KnowledgeDocNeighborEdge>,
166}
167
168#[derive(Debug, Clone, Serialize, Deserialize)]
169pub struct KnowledgeDocNeighborEdge {
170 #[serde(rename = "type")]
171 pub edge_type: KnowledgeDocEdgeType,
172 pub target: KnowledgeDocManifest,
174}
175
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct KnowledgeDocSearchHit {
178 pub document: KnowledgeDocManifest,
180 pub score: usize,
182 pub matched: Vec<String>,
184}
185
186pub trait KnowledgePack: Send + Sync {
188 fn manifest(&self) -> &dyn KnowledgePackManifest;
189
190 fn doc_content(&self, manifest: &KnowledgeDocManifest) -> Option<Cow<'_, str>>;
191
192 fn list_docs(&self, filter: KnowledgeDocFilter) -> Vec<&KnowledgeDocManifest> {
193 self.manifest()
194 .docs()
195 .iter()
196 .filter(|doc| matches_filter(self, doc, &filter))
197 .collect()
198 }
199
200 fn read_manifest(&self, path: &str) -> Option<&KnowledgeDocManifest> {
201 self.manifest().read_doc_manifest(path)
202 }
203
204 fn read_doc(&self, path: &str) -> Option<KnowledgeDocRead> {
205 let manifest = self.read_manifest(path)?.clone();
206 let content = self.doc_content(&manifest)?.into_owned();
207 Some(KnowledgeDocRead { manifest, content })
208 }
209
210 fn search(&self, query: &str, filter: KnowledgeDocFilter) -> Vec<KnowledgeDocSearchHit> {
211 search_pack(self, query, filter)
212 }
213
214 fn neighbors(
215 &self,
216 path: &str,
217 edge_type: Option<KnowledgeDocEdgeType>,
218 ) -> Option<KnowledgeDocNeighbor> {
219 let source = self.read_manifest(path)?;
220
221 let mut edges = Vec::new();
222
223 for edge in &source.related {
224 if let Some(expected) = edge_type
225 && edge.edge_type != expected
226 {
227 continue;
228 }
229 if let Some(target) = self.read_manifest(&edge.target) {
230 edges.push(KnowledgeDocNeighborEdge {
231 edge_type: edge.edge_type,
232 target: target.clone(),
233 });
234 }
235 }
236
237 edges.sort_by(|left, right| {
238 left.target
239 .selector
240 .cmp(&right.target.selector)
241 .then_with(|| left.edge_type.as_str().cmp(right.edge_type.as_str()))
242 });
243 edges.dedup_by(|left, right| {
244 left.edge_type == right.edge_type && left.target.selector == right.target.selector
245 });
246
247 Some(KnowledgeDocNeighbor {
248 document: source.clone(),
249 edges,
250 })
251 }
252}
253
254#[derive(Debug, Clone)]
257pub struct PackageKnowledgePack {
258 content_root: PathBuf,
259 selector: Option<String>,
260 manifest: KnowledgePackManifestData,
261}
262
263impl PackageKnowledgePack {
264 pub fn load(path: &Path, package_version: &str) -> Result<Self> {
265 let content = std::fs::read_to_string(path)
266 .with_context(|| format!("failed to read knowledge manifest {}", path.display()))?;
267 let file: PackageKnowledgeManifestFile =
268 serde_yaml::from_str(&content).context("invalid package knowledge manifest")?;
269 let root_uri = file
270 .manifest
271 .root_uri
272 .or(file.root_uri)
273 .unwrap_or_else(|| format!("pkg://{}/", file.manifest.pack_id));
274 let pack_id = file.manifest.pack_id;
275 let docs = file
276 .manifest
277 .docs
278 .into_iter()
279 .map(|doc| doc.into_manifest(&pack_id))
280 .collect();
281 Ok(Self {
282 content_root: path.parent().unwrap_or_else(|| Path::new("")).to_path_buf(),
283 selector: file.manifest.selector.or(file.selector),
284 manifest: KnowledgePackManifestData {
285 pack_id,
286 version: file
287 .manifest
288 .version
289 .unwrap_or_else(|| package_version.to_string()),
290 schema_version: file.manifest.schema_version.unwrap_or(1),
291 root_uri,
292 content_hash: file.manifest.content_hash.unwrap_or_default(),
293 docs,
294 },
295 })
296 }
297
298 pub fn selector(&self) -> Option<&str> {
299 self.selector.as_deref()
300 }
301}
302
303impl KnowledgePack for PackageKnowledgePack {
304 fn manifest(&self) -> &dyn KnowledgePackManifest {
305 &self.manifest
306 }
307
308 fn doc_content(&self, manifest: &KnowledgeDocManifest) -> Option<Cow<'_, str>> {
309 let content =
310 std::fs::read_to_string(self.content_root.join(&manifest.source_path)).ok()?;
311 Some(Cow::Owned(content))
312 }
313}
314
315#[derive(Debug, Clone)]
320pub struct FilesystemKnowledgePack {
321 content_root: PathBuf,
322 manifest: KnowledgePackManifestData,
323}
324
325impl FilesystemKnowledgePack {
326 pub const MANIFEST_FILENAME: &'static str = "manifest.json";
327
328 pub fn load(root: &Path) -> Option<Self> {
329 let manifest_path = root.join(Self::MANIFEST_FILENAME);
330 let content = std::fs::read_to_string(&manifest_path).ok()?;
331 let mut manifest: KnowledgePackManifestData = serde_json::from_str(&content).ok()?;
332 if manifest.root_uri.trim().is_empty() {
333 manifest.root_uri = format!("file://{}/", root.display());
334 }
335 Some(Self {
336 content_root: root.to_path_buf(),
337 manifest,
338 })
339 }
340
341 fn doc_path(&self, source_path: &str) -> Option<PathBuf> {
342 let relative = Path::new(source_path);
343 if relative.is_absolute()
344 || relative
345 .components()
346 .any(|component| !matches!(component, std::path::Component::Normal(_)))
347 {
348 return None;
349 }
350 Some(self.content_root.join(relative))
351 }
352}
353
354impl KnowledgePack for FilesystemKnowledgePack {
355 fn manifest(&self) -> &dyn KnowledgePackManifest {
356 &self.manifest
357 }
358
359 fn doc_content(&self, manifest: &KnowledgeDocManifest) -> Option<Cow<'_, str>> {
360 let content = std::fs::read_to_string(self.doc_path(&manifest.source_path)?).ok()?;
361 Some(Cow::Owned(content))
362 }
363}
364
365#[derive(Debug, Deserialize)]
366struct PackageKnowledgeManifestFile {
367 selector: Option<String>,
368 root_uri: Option<String>,
369 manifest: PackageKnowledgeManifestBody,
370}
371
372#[derive(Debug, Deserialize)]
373struct PackageKnowledgeManifestBody {
374 pack_id: String,
375 selector: Option<String>,
376 version: Option<String>,
377 schema_version: Option<u32>,
378 root_uri: Option<String>,
379 content_hash: Option<String>,
380 #[serde(default)]
381 docs: Vec<PackageKnowledgeDoc>,
382}
383
384#[derive(Debug, Deserialize)]
385struct PackageKnowledgeDoc {
386 id: Option<String>,
387 selector: Option<String>,
388 source_path: String,
389 title: String,
390 summary: String,
391 #[serde(default)]
392 kind: KnowledgeDocKind,
393 #[serde(default)]
394 tags: Vec<String>,
395 #[serde(default)]
396 related: Vec<KnowledgeDocEdge>,
397 #[serde(default)]
398 updated_at: String,
399}
400
401impl PackageKnowledgeDoc {
402 fn into_manifest(self, pack_id: &str) -> KnowledgeDocManifest {
403 let id_hint = self.id.as_deref().unwrap_or_default();
404 let selector = self
405 .selector
406 .unwrap_or_else(|| selector_from_source_path(&self.source_path, pack_id, id_hint));
407 let id = self.id.unwrap_or_else(|| format!("{pack_id}.{selector}"));
408 KnowledgeDocManifest {
409 id,
410 selector,
411 source_path: self.source_path,
412 title: self.title,
413 summary: self.summary,
414 kind: self.kind,
415 tags: self.tags,
416 related: self.related,
417 updated_at: self.updated_at,
418 }
419 }
420}
421
422fn selector_from_source_path(source_path: &str, pack_id: &str, id: &str) -> String {
423 let trimmed = source_path.strip_prefix("docs/").unwrap_or(source_path);
424 let trimmed = trimmed.strip_suffix(".md").unwrap_or(trimmed);
425 let selector = trimmed.replace('/', ".");
426 if selector.is_empty() {
427 id.strip_prefix(&format!("{pack_id}."))
428 .unwrap_or(id)
429 .to_string()
430 } else {
431 selector
432 }
433}
434
435impl KnowledgeDocEdgeType {
436 pub fn as_str(self) -> &'static str {
437 match self {
438 KnowledgeDocEdgeType::PartOf => "part_of",
439 KnowledgeDocEdgeType::Defines => "defines",
440 KnowledgeDocEdgeType::Governs => "governs",
441 KnowledgeDocEdgeType::Classifies => "classifies",
442 KnowledgeDocEdgeType::References => "references",
443 KnowledgeDocEdgeType::DependsOn => "depends_on",
444 KnowledgeDocEdgeType::Extends => "extends",
445 KnowledgeDocEdgeType::RelatedTo => "related_to",
446 }
447 }
448}
449
450impl KnowledgeDocKind {
451 pub fn new(value: impl AsRef<str>) -> Self {
452 let value = value.as_ref().trim().to_ascii_lowercase();
453 let mut slug = String::new();
454 let mut last_was_separator = false;
455 for ch in value.chars() {
456 if ch.is_ascii_alphanumeric() {
457 slug.push(ch);
458 last_was_separator = false;
459 } else if !last_was_separator {
460 slug.push('_');
461 last_was_separator = true;
462 }
463 }
464 let slug = slug.trim_matches('_');
465 if slug.is_empty() {
466 Self("reference".to_string())
467 } else {
468 Self(slug.to_string())
469 }
470 }
471
472 pub fn as_str(&self) -> &str {
473 &self.0
474 }
475}
476
477impl Default for KnowledgeDocKind {
478 fn default() -> Self {
479 Self::new("reference")
480 }
481}
482
483impl From<&str> for KnowledgeDocKind {
484 fn from(value: &str) -> Self {
485 Self::new(value)
486 }
487}
488
489impl From<String> for KnowledgeDocKind {
490 fn from(value: String) -> Self {
491 Self::new(value)
492 }
493}
494
495impl Serialize for KnowledgeDocKind {
496 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
497 where
498 S: Serializer,
499 {
500 serializer.serialize_str(self.as_str())
501 }
502}
503
504impl<'de> Deserialize<'de> for KnowledgeDocKind {
505 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
506 where
507 D: Deserializer<'de>,
508 {
509 String::deserialize(deserializer).map(Self::new)
510 }
511}
512
513fn search_pack<P: KnowledgePack + ?Sized>(
514 pack: &P,
515 query: &str,
516 filter: KnowledgeDocFilter,
517) -> Vec<KnowledgeDocSearchHit> {
518 let needle = normalize(query);
519 let mut hits = Vec::new();
520
521 for manifest in pack.list_docs(filter) {
522 let mut score = 0;
523 let mut matched = BTreeSet::new();
524
525 score += score_field(&needle, &manifest.id, 100, "id", &mut matched);
526 score += score_field(&needle, &manifest.selector, 90, "selector", &mut matched);
527 score += score_field(&needle, &manifest.title, 80, "title", &mut matched);
528 score += score_field(&needle, &manifest.summary, 60, "summary", &mut matched);
529
530 for tag in &manifest.tags {
531 score += score_field(&needle, tag, 70, "tag", &mut matched);
532 }
533
534 if score > 0 || needle.is_empty() {
535 hits.push(KnowledgeDocSearchHit {
536 document: manifest.clone(),
537 score,
538 matched: matched.into_iter().collect(),
539 });
540 }
541 }
542
543 hits.sort_by(|a, b| {
544 b.score
545 .cmp(&a.score)
546 .then_with(|| a.document.selector.cmp(&b.document.selector))
547 });
548 hits
549}
550
551fn matches_filter<P: KnowledgePack + ?Sized>(
552 pack: &P,
553 doc: &KnowledgeDocManifest,
554 filter: &KnowledgeDocFilter,
555) -> bool {
556 if let Some(kind) = &filter.kind
557 && doc.kind != *kind
558 {
559 return false;
560 }
561 if let Some(prefix) = &filter.selector_prefix
562 && !doc.selector.starts_with(prefix)
563 {
564 return false;
565 }
566 if !filter.tags.is_empty()
567 && !filter
568 .tags
569 .iter()
570 .all(|tag| doc.tags.iter().any(|doc_tag| doc_tag == tag))
571 {
572 return false;
573 }
574 if let Some(target) = &filter.related_to {
575 let has_edge = doc.related.iter().any(|edge| {
576 let edge_matches_target = edge.target == *target
577 || pack
578 .read_manifest(&edge.target)
579 .map(|edge_target| edge_target.id == *target || edge_target.selector == *target)
580 .unwrap_or(false);
581 edge_matches_target
582 && filter
583 .edge_type
584 .as_ref()
585 .map(|expected| edge.edge_type == *expected)
586 .unwrap_or(true)
587 });
588 if !has_edge {
589 return false;
590 }
591 }
592 true
593}
594
595fn score_field(
596 needle: &str,
597 haystack: &str,
598 weight: usize,
599 label: &str,
600 matched: &mut BTreeSet<String>,
601) -> usize {
602 if needle.is_empty() {
603 return 1;
604 }
605 let haystack = normalize(haystack);
606 if haystack == needle {
607 matched.insert(label.to_string());
608 weight * 2
609 } else if haystack.contains(needle) {
610 matched.insert(label.to_string());
611 weight
612 } else {
613 0
614 }
615}
616
617fn normalize(value: &str) -> String {
618 value.trim().to_lowercase()
619}
620
621#[cfg(test)]
622mod tests {
623 use super::*;
624 use std::time::{SystemTime, UNIX_EPOCH};
625
626 #[test]
627 fn package_knowledge_manifest_accepts_selector_without_doc_id() {
628 let unique = SystemTime::now()
629 .duration_since(UNIX_EPOCH)
630 .unwrap()
631 .as_nanos();
632 let dir = std::env::temp_dir().join(format!(
633 "nenjo-knowledge-package-manifest-{pid}-{unique}",
634 pid = std::process::id()
635 ));
636 let docs_dir = dir.join("docs/domain");
637 std::fs::create_dir_all(&docs_dir).unwrap();
638 std::fs::write(
639 dir.join("manifest.yaml"),
640 r#"
641schema: nenjo.knowledge.v1
642manifest:
643 pack_id: nenjo.core
644 version: 0.1.0
645 docs:
646 - selector: domain.nenjo
647 source_path: docs/domain/nenjo.md
648 title: Nenjo
649 summary: Platform overview.
650 kind: domain
651 tags: [domain:nenjo]
652 related: []
653"#,
654 )
655 .unwrap();
656 std::fs::write(docs_dir.join("nenjo.md"), "# Nenjo\n\nKnowledge content.").unwrap();
657
658 let pack = PackageKnowledgePack::load(&dir.join("manifest.yaml"), "0.1.0").unwrap();
659 let doc = pack.read_doc("domain.nenjo").unwrap();
660
661 assert_eq!(doc.manifest.selector, "domain.nenjo");
662 assert_eq!(doc.manifest.id, "nenjo.core.domain.nenjo");
663 assert!(doc.content.contains("Knowledge content"));
664
665 std::fs::remove_dir_all(dir).unwrap();
666 }
667}