1use std::borrow::Cow;
7use std::collections::BTreeSet;
8use std::path::{Path, PathBuf};
9
10use anyhow::{Context, Result};
11use serde::{Deserialize, Deserializer, Serialize, Serializer};
12
13pub mod tools;
14
15pub trait KnowledgePackManifest: Send + Sync {
21 fn pack_id(&self) -> &str;
22 fn version(&self) -> &str;
23 fn schema_version(&self) -> u32;
24 fn root_uri(&self) -> &str;
25 fn content_hash(&self) -> &str;
26 fn docs(&self) -> &[KnowledgeDocManifest];
27
28 fn read_doc_manifest(&self, selector: &str) -> Option<&KnowledgeDocManifest> {
29 self.docs().iter().find(|doc| {
30 doc.id == selector || doc.selector == selector || doc.source_path == selector
31 })
32 }
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct KnowledgePackManifestData {
42 pub pack_id: String,
43 pub version: String,
44 pub schema_version: u32,
45 pub root_uri: String,
46 #[serde(default)]
47 pub content_hash: String,
48 pub docs: Vec<KnowledgeDocManifest>,
49}
50
51impl KnowledgePackManifest for KnowledgePackManifestData {
52 fn pack_id(&self) -> &str {
53 &self.pack_id
54 }
55
56 fn version(&self) -> &str {
57 &self.version
58 }
59
60 fn schema_version(&self) -> u32 {
61 self.schema_version
62 }
63
64 fn root_uri(&self) -> &str {
65 &self.root_uri
66 }
67
68 fn content_hash(&self) -> &str {
69 &self.content_hash
70 }
71
72 fn docs(&self) -> &[KnowledgeDocManifest] {
73 &self.docs
74 }
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct KnowledgeDocManifest {
84 pub id: String,
86 pub selector: String,
88 pub source_path: String,
90 pub title: String,
92 pub summary: String,
94 pub kind: KnowledgeDocKind,
96 pub tags: Vec<String>,
98 pub related: Vec<KnowledgeDocEdge>,
100 #[serde(default)]
102 pub updated_at: String,
103}
104
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
106#[serde(rename_all = "snake_case")]
107pub enum KnowledgeDocEdgeType {
108 PartOf,
109 Defines,
110 Governs,
111 Classifies,
112 References,
113 DependsOn,
114 Extends,
115 RelatedTo,
116}
117
118#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
119pub struct KnowledgeDocKind(String);
120
121#[derive(Debug, Clone, Serialize, Deserialize)]
123pub struct KnowledgeDocEdge {
124 #[serde(rename = "type", alias = "edge_type")]
125 pub edge_type: KnowledgeDocEdgeType,
126 pub target: String,
128 pub description: Option<String>,
130}
131
132#[derive(Debug, Clone, Default, Serialize, Deserialize)]
133pub struct KnowledgeDocFilter {
134 pub tags: Vec<String>,
135 pub kind: Option<KnowledgeDocKind>,
136 pub selector_prefix: Option<String>,
137 pub related_to: Option<String>,
138 pub edge_type: Option<KnowledgeDocEdgeType>,
139}
140
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct KnowledgeDocRead {
143 pub manifest: KnowledgeDocManifest,
144 pub content: String,
145}
146
147#[derive(Debug, Clone, Serialize, Deserialize)]
148pub struct KnowledgeDocNeighbor {
149 pub document: KnowledgeDocManifest,
151 pub edges: Vec<KnowledgeDocNeighborEdge>,
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct KnowledgeDocNeighborEdge {
157 #[serde(rename = "type")]
158 pub edge_type: KnowledgeDocEdgeType,
159 pub target: KnowledgeDocManifest,
161}
162
163#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct KnowledgeDocSearchHit {
165 pub document: KnowledgeDocManifest,
167 pub score: usize,
169 pub matched: Vec<String>,
171}
172
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct KnowledgeDocTree {
175 pub root_uri: String,
176 pub entries: Vec<KnowledgeDocTreeEntry>,
177}
178
179#[derive(Debug, Clone, Serialize, Deserialize)]
180pub struct KnowledgeDocTreeEntry {
181 pub selector: String,
182 pub title: String,
183 pub kind: KnowledgeDocKind,
184 pub tags: Vec<String>,
185}
186
187pub trait KnowledgePack: Send + Sync {
189 fn manifest(&self) -> &dyn KnowledgePackManifest;
190
191 fn doc_content(&self, manifest: &KnowledgeDocManifest) -> Option<Cow<'_, str>>;
192
193 fn list_docs(&self, filter: KnowledgeDocFilter) -> Vec<&KnowledgeDocManifest> {
194 self.manifest()
195 .docs()
196 .iter()
197 .filter(|doc| matches_filter(self, doc, &filter))
198 .collect()
199 }
200
201 fn read_manifest(&self, path: &str) -> Option<&KnowledgeDocManifest> {
202 self.manifest().read_doc_manifest(path)
203 }
204
205 fn read_doc(&self, path: &str) -> Option<KnowledgeDocRead> {
206 let manifest = self.read_manifest(path)?.clone();
207 let content = self.doc_content(&manifest)?.into_owned();
208 Some(KnowledgeDocRead { manifest, content })
209 }
210
211 fn search(&self, query: &str, filter: KnowledgeDocFilter) -> Vec<KnowledgeDocSearchHit> {
212 search_pack(self, query, filter)
213 }
214
215 fn neighbors(
216 &self,
217 path: &str,
218 edge_type: Option<KnowledgeDocEdgeType>,
219 ) -> Option<KnowledgeDocNeighbor> {
220 let source = self.read_manifest(path)?;
221
222 let mut edges = Vec::new();
223
224 for edge in &source.related {
225 if let Some(expected) = edge_type
226 && edge.edge_type != expected
227 {
228 continue;
229 }
230 if let Some(target) = self.read_manifest(&edge.target) {
231 edges.push(KnowledgeDocNeighborEdge {
232 edge_type: edge.edge_type,
233 target: target.clone(),
234 });
235 }
236 }
237
238 edges.sort_by(|left, right| {
239 left.target
240 .selector
241 .cmp(&right.target.selector)
242 .then_with(|| left.edge_type.as_str().cmp(right.edge_type.as_str()))
243 });
244 edges.dedup_by(|left, right| {
245 left.edge_type == right.edge_type && left.target.selector == right.target.selector
246 });
247
248 Some(KnowledgeDocNeighbor {
249 document: source.clone(),
250 edges,
251 })
252 }
253}
254
255#[derive(Debug, Clone)]
258pub struct PackageKnowledgePack {
259 content_root: PathBuf,
260 selector: Option<String>,
261 manifest: KnowledgePackManifestData,
262}
263
264impl PackageKnowledgePack {
265 pub fn load(path: &Path, package_version: &str) -> Result<Self> {
266 let content = std::fs::read_to_string(path)
267 .with_context(|| format!("failed to read knowledge manifest {}", path.display()))?;
268 let file: PackageKnowledgeManifestFile =
269 serde_yaml::from_str(&content).context("invalid package knowledge manifest")?;
270 let root_uri = file
271 .manifest
272 .root_uri
273 .or(file.root_uri)
274 .unwrap_or_else(|| format!("pkg://{}/", file.manifest.pack_id));
275 let pack_id = file.manifest.pack_id;
276 let docs = file
277 .manifest
278 .docs
279 .into_iter()
280 .map(|doc| doc.into_manifest(&pack_id))
281 .collect();
282 Ok(Self {
283 content_root: path.parent().unwrap_or_else(|| Path::new("")).to_path_buf(),
284 selector: file.manifest.selector.or(file.selector),
285 manifest: KnowledgePackManifestData {
286 pack_id,
287 version: file
288 .manifest
289 .version
290 .unwrap_or_else(|| package_version.to_string()),
291 schema_version: file.manifest.schema_version.unwrap_or(1),
292 root_uri,
293 content_hash: file.manifest.content_hash.unwrap_or_default(),
294 docs,
295 },
296 })
297 }
298
299 pub fn selector(&self) -> Option<&str> {
300 self.selector.as_deref()
301 }
302}
303
304impl KnowledgePack for PackageKnowledgePack {
305 fn manifest(&self) -> &dyn KnowledgePackManifest {
306 &self.manifest
307 }
308
309 fn doc_content(&self, manifest: &KnowledgeDocManifest) -> Option<Cow<'_, str>> {
310 let content =
311 std::fs::read_to_string(self.content_root.join(&manifest.source_path)).ok()?;
312 Some(Cow::Owned(content))
313 }
314}
315
316#[derive(Debug, Deserialize)]
317struct PackageKnowledgeManifestFile {
318 selector: Option<String>,
319 root_uri: Option<String>,
320 manifest: PackageKnowledgeManifestBody,
321}
322
323#[derive(Debug, Deserialize)]
324struct PackageKnowledgeManifestBody {
325 pack_id: String,
326 selector: Option<String>,
327 version: Option<String>,
328 schema_version: Option<u32>,
329 root_uri: Option<String>,
330 content_hash: Option<String>,
331 #[serde(default)]
332 docs: Vec<PackageKnowledgeDoc>,
333}
334
335#[derive(Debug, Deserialize)]
336struct PackageKnowledgeDoc {
337 id: Option<String>,
338 selector: Option<String>,
339 source_path: String,
340 title: String,
341 summary: String,
342 #[serde(default)]
343 kind: KnowledgeDocKind,
344 #[serde(default)]
345 tags: Vec<String>,
346 #[serde(default)]
347 related: Vec<KnowledgeDocEdge>,
348 #[serde(default)]
349 updated_at: String,
350}
351
352impl PackageKnowledgeDoc {
353 fn into_manifest(self, pack_id: &str) -> KnowledgeDocManifest {
354 let id_hint = self.id.as_deref().unwrap_or_default();
355 let selector = self
356 .selector
357 .unwrap_or_else(|| selector_from_source_path(&self.source_path, pack_id, id_hint));
358 let id = self.id.unwrap_or_else(|| format!("{pack_id}.{selector}"));
359 KnowledgeDocManifest {
360 id,
361 selector,
362 source_path: self.source_path,
363 title: self.title,
364 summary: self.summary,
365 kind: self.kind,
366 tags: self.tags,
367 related: self.related,
368 updated_at: self.updated_at,
369 }
370 }
371}
372
373fn selector_from_source_path(source_path: &str, pack_id: &str, id: &str) -> String {
374 let trimmed = source_path.strip_prefix("docs/").unwrap_or(source_path);
375 let trimmed = trimmed.strip_suffix(".md").unwrap_or(trimmed);
376 let selector = trimmed.replace('/', ".");
377 if selector.is_empty() {
378 id.strip_prefix(&format!("{pack_id}."))
379 .unwrap_or(id)
380 .to_string()
381 } else {
382 selector
383 }
384}
385
386impl KnowledgeDocEdgeType {
387 pub fn as_str(self) -> &'static str {
388 match self {
389 KnowledgeDocEdgeType::PartOf => "part_of",
390 KnowledgeDocEdgeType::Defines => "defines",
391 KnowledgeDocEdgeType::Governs => "governs",
392 KnowledgeDocEdgeType::Classifies => "classifies",
393 KnowledgeDocEdgeType::References => "references",
394 KnowledgeDocEdgeType::DependsOn => "depends_on",
395 KnowledgeDocEdgeType::Extends => "extends",
396 KnowledgeDocEdgeType::RelatedTo => "related_to",
397 }
398 }
399}
400
401impl KnowledgeDocKind {
402 pub fn new(value: impl AsRef<str>) -> Self {
403 let value = value.as_ref().trim().to_ascii_lowercase();
404 let mut slug = String::new();
405 let mut last_was_separator = false;
406 for ch in value.chars() {
407 if ch.is_ascii_alphanumeric() {
408 slug.push(ch);
409 last_was_separator = false;
410 } else if !last_was_separator {
411 slug.push('_');
412 last_was_separator = true;
413 }
414 }
415 let slug = slug.trim_matches('_');
416 if slug.is_empty() {
417 Self("reference".to_string())
418 } else {
419 Self(slug.to_string())
420 }
421 }
422
423 pub fn as_str(&self) -> &str {
424 &self.0
425 }
426}
427
428impl Default for KnowledgeDocKind {
429 fn default() -> Self {
430 Self::new("reference")
431 }
432}
433
434impl From<&str> for KnowledgeDocKind {
435 fn from(value: &str) -> Self {
436 Self::new(value)
437 }
438}
439
440impl From<String> for KnowledgeDocKind {
441 fn from(value: String) -> Self {
442 Self::new(value)
443 }
444}
445
446impl Serialize for KnowledgeDocKind {
447 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
448 where
449 S: Serializer,
450 {
451 serializer.serialize_str(self.as_str())
452 }
453}
454
455impl<'de> Deserialize<'de> for KnowledgeDocKind {
456 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
457 where
458 D: Deserializer<'de>,
459 {
460 String::deserialize(deserializer).map(Self::new)
461 }
462}
463
464fn search_pack<P: KnowledgePack + ?Sized>(
465 pack: &P,
466 query: &str,
467 filter: KnowledgeDocFilter,
468) -> Vec<KnowledgeDocSearchHit> {
469 let needle = normalize(query);
470 let mut hits = Vec::new();
471
472 for manifest in pack.list_docs(filter) {
473 let mut score = 0;
474 let mut matched = BTreeSet::new();
475
476 score += score_field(&needle, &manifest.id, 100, "id", &mut matched);
477 score += score_field(&needle, &manifest.selector, 90, "selector", &mut matched);
478 score += score_field(&needle, &manifest.title, 80, "title", &mut matched);
479 score += score_field(&needle, &manifest.summary, 60, "summary", &mut matched);
480
481 for tag in &manifest.tags {
482 score += score_field(&needle, tag, 70, "tag", &mut matched);
483 }
484
485 if score > 0 || needle.is_empty() {
486 hits.push(KnowledgeDocSearchHit {
487 document: manifest.clone(),
488 score,
489 matched: matched.into_iter().collect(),
490 });
491 }
492 }
493
494 hits.sort_by(|a, b| {
495 b.score
496 .cmp(&a.score)
497 .then_with(|| a.document.selector.cmp(&b.document.selector))
498 });
499 hits
500}
501
502fn matches_filter<P: KnowledgePack + ?Sized>(
503 pack: &P,
504 doc: &KnowledgeDocManifest,
505 filter: &KnowledgeDocFilter,
506) -> bool {
507 if let Some(kind) = &filter.kind
508 && doc.kind != *kind
509 {
510 return false;
511 }
512 if let Some(prefix) = &filter.selector_prefix
513 && !doc.selector.starts_with(prefix)
514 {
515 return false;
516 }
517 if !filter.tags.is_empty()
518 && !filter
519 .tags
520 .iter()
521 .all(|tag| doc.tags.iter().any(|doc_tag| doc_tag == tag))
522 {
523 return false;
524 }
525 if let Some(target) = &filter.related_to {
526 let has_edge = doc.related.iter().any(|edge| {
527 let edge_matches_target = edge.target == *target
528 || pack
529 .read_manifest(&edge.target)
530 .map(|edge_target| edge_target.id == *target || edge_target.selector == *target)
531 .unwrap_or(false);
532 edge_matches_target
533 && filter
534 .edge_type
535 .as_ref()
536 .map(|expected| edge.edge_type == *expected)
537 .unwrap_or(true)
538 });
539 if !has_edge {
540 return false;
541 }
542 }
543 true
544}
545
546fn score_field(
547 needle: &str,
548 haystack: &str,
549 weight: usize,
550 label: &str,
551 matched: &mut BTreeSet<String>,
552) -> usize {
553 if needle.is_empty() {
554 return 1;
555 }
556 let haystack = normalize(haystack);
557 if haystack == needle {
558 matched.insert(label.to_string());
559 weight * 2
560 } else if haystack.contains(needle) {
561 matched.insert(label.to_string());
562 weight
563 } else {
564 0
565 }
566}
567
568fn normalize(value: &str) -> String {
569 value.trim().to_lowercase()
570}
571
572#[cfg(test)]
573mod tests {
574 use super::*;
575 use std::time::{SystemTime, UNIX_EPOCH};
576
577 #[test]
578 fn package_knowledge_manifest_accepts_selector_without_doc_id() {
579 let unique = SystemTime::now()
580 .duration_since(UNIX_EPOCH)
581 .unwrap()
582 .as_nanos();
583 let dir = std::env::temp_dir().join(format!(
584 "nenjo-knowledge-package-manifest-{pid}-{unique}",
585 pid = std::process::id()
586 ));
587 let docs_dir = dir.join("docs/domain");
588 std::fs::create_dir_all(&docs_dir).unwrap();
589 std::fs::write(
590 dir.join("manifest.yaml"),
591 r#"
592schema: nenjo.knowledge.v1
593manifest:
594 pack_id: nenjo.core
595 version: 0.1.0
596 docs:
597 - selector: domain.nenjo
598 source_path: docs/domain/nenjo.md
599 title: Nenjo
600 summary: Platform overview.
601 kind: domain
602 tags: [domain:nenjo]
603 related: []
604"#,
605 )
606 .unwrap();
607 std::fs::write(docs_dir.join("nenjo.md"), "# Nenjo\n\nKnowledge content.").unwrap();
608
609 let pack = PackageKnowledgePack::load(&dir.join("manifest.yaml"), "0.1.0").unwrap();
610 let doc = pack.read_doc("domain.nenjo").unwrap();
611
612 assert_eq!(doc.manifest.selector, "domain.nenjo");
613 assert_eq!(doc.manifest.id, "nenjo.core.domain.nenjo");
614 assert!(doc.content.contains("Knowledge content"));
615
616 std::fs::remove_dir_all(dir).unwrap();
617 }
618}