1use std::collections::HashMap;
39use std::fs::File;
40use std::io::BufReader;
41use std::path::{Component, Path, PathBuf};
42
43use anyhow::{Context, Result};
44use chrono::{DateTime, FixedOffset};
45use serde::Deserialize;
46use serde_json::Value;
47use thiserror::Error;
48use walkdir::WalkDir;
49
50pub mod plugin;
51
52#[macro_export]
53macro_rules! lepiter_plugin_main {
54 ($handler:path) => {
55 fn main() -> std::io::Result<()> {
56 $crate::plugin::plugin_loop($handler)
57 }
58 };
59}
60
61pub type PageId = String;
63
64#[derive(Debug, Clone)]
66pub struct PageMeta {
67 pub id: PageId,
69 pub title: String,
71 pub title_lower: String,
73 pub path: PathBuf,
75 pub updated_at: Option<DateTime<FixedOffset>>,
77 pub tags: Vec<String>,
79}
80
81#[derive(Debug, Clone)]
83pub struct Page {
84 pub id: PageId,
86 pub title: String,
88 pub updated_at: Option<DateTime<FixedOffset>>,
90 pub tags: Vec<String>,
92 pub content: Vec<Node>,
94}
95
96#[derive(Debug, Clone)]
98pub enum Node {
99 Heading { level: u8, text: String },
101 Paragraph { text: String },
103 Text { text: String },
105 List { items: Vec<Vec<Node>> },
107 Code {
109 language: Option<String>,
110 code: String,
111 },
112 Link { text: String, url: String },
114 Quote { text: String },
116 Rewrite {
118 language: Option<String>,
119 search: String,
120 replace: String,
121 scope: Option<String>,
122 is_method_pattern: Option<bool>,
123 },
124 Unknown { typ: String, raw: Value },
126}
127
128pub fn parse_node_from_raw(item: &Value) -> Node {
134 parse_node(item)
135}
136
137#[derive(Debug, Clone)]
139pub struct ParseIssue {
140 pub path: PathBuf,
142 pub message: String,
144}
145
146#[derive(Debug, Clone, Copy, PartialEq, Eq)]
148pub enum SearchMatchKind {
149 Meta,
151 Content,
153}
154
155#[derive(Debug, Clone, PartialEq, Eq)]
157pub struct SearchHit {
158 pub id: PageId,
160 pub kind: SearchMatchKind,
162}
163
164#[derive(Debug, Clone, PartialEq, Eq)]
166pub enum LinkTargetKind {
167 InternalPage(PageId),
169 AttachmentPath(PathBuf),
171 ExternalUrl(String),
173 Unknown(String),
175}
176
177#[derive(Debug, Clone, PartialEq, Eq)]
179pub struct ResolvedAttachment {
180 pub path: PathBuf,
182 pub exists: bool,
184}
185
186#[derive(Debug, Error)]
188pub enum AttachmentError {
189 #[error("attachment target was empty")]
190 Empty,
191 #[error("attachment target not recognized: {0}")]
192 NotAttachment(String),
193 #[error("attachment path escapes knowledge base root: {0}")]
194 EscapesRoot(String),
195 #[error("attachment not found: {0}")]
196 Missing(PathBuf),
197}
198
199type AttachmentResult<T> = std::result::Result<T, AttachmentError>;
200
201#[derive(Debug, Clone)]
203pub struct AttachmentResolver {
204 root: PathBuf,
205}
206
207impl AttachmentResolver {
208 pub fn new(root: impl AsRef<Path>) -> Self {
210 Self {
211 root: root.as_ref().to_path_buf(),
212 }
213 }
214
215 pub fn resolve(&self, raw: &str) -> AttachmentResult<ResolvedAttachment> {
217 let target = raw.trim();
218 if target.is_empty() {
219 return Err(AttachmentError::Empty);
220 }
221 let rel = extract_attachment_relative(target)
222 .ok_or_else(|| AttachmentError::NotAttachment(target.to_string()))?;
223 let rel = sanitize_relative_path(rel)?;
224 let path = self.root.join(rel);
225 let exists = path.exists();
226 Ok(ResolvedAttachment { path, exists })
227 }
228
229 pub fn resolve_path(&self, raw: &str) -> Option<PathBuf> {
231 self.resolve(raw).ok().map(|resolved| resolved.path)
232 }
233
234 pub fn resolve_existing(&self, raw: &str) -> AttachmentResult<PathBuf> {
236 let resolved = self.resolve(raw)?;
237 if resolved.exists {
238 Ok(resolved.path)
239 } else {
240 Err(AttachmentError::Missing(resolved.path))
241 }
242 }
243
244 pub fn root(&self) -> &Path {
246 &self.root
247 }
248}
249
250#[derive(Debug, Clone, PartialEq, Eq)]
252pub enum TitleResolution {
253 Unique(PageId),
255 NotFound,
257 Ambiguous(Vec<PageId>),
259}
260
261#[derive(Debug, Clone)]
263pub struct KnowledgeBaseIndex {
264 root: PathBuf,
265 pub pages: HashMap<PageId, PageMeta>,
267 pub sorted_ids: Vec<PageId>,
269 pub index_issues: Vec<ParseIssue>,
271}
272
273pub struct KnowledgeBase;
275
276impl KnowledgeBase {
277 pub fn open(path: impl AsRef<Path>) -> Result<KnowledgeBaseIndex> {
282 let root = path.as_ref().to_path_buf();
283 let mut pages = HashMap::new();
284 let mut issues = Vec::new();
285
286 for entry in WalkDir::new(&root)
287 .min_depth(1)
288 .max_depth(1)
289 .into_iter()
290 .filter_map(|e| e.ok())
291 {
292 let file_type = entry.file_type();
293 let file_path = entry.path();
294 if !file_type.is_file()
295 || file_path.extension().and_then(|e| e.to_str()) != Some("lepiter")
296 {
297 continue;
298 }
299
300 match parse_page_meta(file_path) {
301 Ok(mut meta) => {
302 if meta.id.is_empty()
303 && let Some(stem) = file_path.file_stem().and_then(|s| s.to_str())
304 {
305 meta.id = stem.to_string();
306 }
307 if meta.title.is_empty() {
308 meta.title = meta.id.clone();
309 }
310 pages.insert(meta.id.clone(), meta);
311 }
312 Err(err) => issues.push(ParseIssue {
313 path: file_path.to_path_buf(),
314 message: format!("{err:#}"),
315 }),
316 }
317 }
318
319 let sorted_ids = compute_sorted_ids(&pages);
320
321 Ok(KnowledgeBaseIndex {
322 root,
323 pages,
324 sorted_ids,
325 index_issues: issues,
326 })
327 }
328}
329
330impl KnowledgeBaseIndex {
331 pub fn load_page(&self, id: &str) -> Result<Page> {
335 let meta = self
336 .pages
337 .get(id)
338 .with_context(|| format!("page id not found: {id}"))?;
339
340 let file = File::open(&meta.path)
341 .with_context(|| format!("failed to open page file {}", meta.path.display()))?;
342 let reader = BufReader::new(file);
343 let raw: Value =
344 serde_json::from_reader(reader).with_context(|| "failed to decode page JSON")?;
345
346 let mut content = Vec::new();
347 if let Some(items) = raw
348 .get("children")
349 .and_then(|v| v.get("items"))
350 .and_then(Value::as_array)
351 {
352 for item in items {
353 parse_item_recursive(item, &mut content);
354 }
355 }
356
357 Ok(Page {
358 id: meta.id.clone(),
359 title: meta.title.clone(),
360 updated_at: meta.updated_at,
361 tags: meta.tags.clone(),
362 content,
363 })
364 }
365
366 pub fn sorted_pages(&self) -> Vec<&PageMeta> {
368 self.sorted_ids
369 .iter()
370 .filter_map(|id| self.pages.get(id))
371 .collect()
372 }
373
374 pub fn filter_page_ids(&self, query: &str) -> Vec<PageId> {
376 let needle = query.trim().to_lowercase();
377 let mut metas = self.sorted_pages();
378 if !needle.is_empty() {
379 metas.retain(|m| page_meta_matches(m, &needle));
380 }
381 metas.into_iter().map(|m| m.id.clone()).collect()
382 }
383
384 pub fn search_hits(&self, query: &str, include_content: bool) -> Vec<SearchHit> {
386 let needle = query.trim().to_lowercase();
387 if needle.is_empty() {
388 return Vec::new();
389 }
390
391 let mut by_id: HashMap<PageId, SearchMatchKind> = HashMap::new();
392 let metas = self.sorted_pages();
393
394 for meta in &metas {
395 if page_meta_matches(meta, &needle) {
396 by_id.insert(meta.id.clone(), SearchMatchKind::Meta);
397 }
398 }
399
400 if include_content {
401 for meta in &metas {
402 if by_id.contains_key(&meta.id) {
403 continue;
404 }
405 let Ok(page) = self.load_page(&meta.id) else {
406 continue;
407 };
408 if render_page_to_text(&page).to_lowercase().contains(&needle) {
409 by_id.insert(meta.id.clone(), SearchMatchKind::Content);
410 }
411 }
412 }
413
414 let mut hits = Vec::new();
415 for meta in metas {
416 if let Some(kind) = by_id.get(&meta.id) {
417 hits.push(SearchHit {
418 id: meta.id.clone(),
419 kind: *kind,
420 });
421 }
422 }
423 hits
424 }
425
426 pub fn resolve_page_id_by_title(&self, title: &str) -> TitleResolution {
428 let needle = title.trim().to_lowercase();
429 if needle.is_empty() {
430 return TitleResolution::NotFound;
431 }
432
433 let sorted = self.sorted_pages();
434
435 let exact = sorted
436 .iter()
437 .filter(|m| m.title_lower == needle)
438 .map(|m| m.id.clone())
439 .collect::<Vec<_>>();
440 match exact.len() {
441 1 => return TitleResolution::Unique(exact[0].clone()),
442 n if n > 1 => return TitleResolution::Ambiguous(exact),
443 _ => {}
444 }
445
446 let partial = sorted
447 .iter()
448 .filter(|m| m.title_lower.contains(&needle))
449 .map(|m| m.id.clone())
450 .collect::<Vec<_>>();
451 match partial.len() {
452 1 => TitleResolution::Unique(partial[0].clone()),
453 0 => TitleResolution::NotFound,
454 _ => TitleResolution::Ambiguous(partial),
455 }
456 }
457
458 pub fn classify_link_target(&self, raw: &str) -> LinkTargetKind {
460 let target = raw.trim();
461 if target.is_empty() {
462 return LinkTargetKind::Unknown(raw.to_string());
463 }
464
465 if self.pages.contains_key(target) {
466 return LinkTargetKind::InternalPage(target.to_string());
467 }
468
469 if let Some(rest) = target.strip_prefix("page:") {
470 let id = rest.trim();
471 if self.pages.contains_key(id) {
472 return LinkTargetKind::InternalPage(id.to_string());
473 }
474 if let TitleResolution::Unique(resolved) = self.resolve_page_id_by_title(id) {
475 return LinkTargetKind::InternalPage(resolved);
476 }
477 }
478 if let Some(rest) = target.strip_prefix("title:") {
479 return match self.resolve_page_id_by_title(rest.trim()) {
480 TitleResolution::Unique(id) => LinkTargetKind::InternalPage(id),
481 _ => LinkTargetKind::Unknown(target.to_string()),
482 };
483 }
484
485 if let Some(uuid) = extract_uuid_like(target)
486 && self.pages.contains_key(uuid)
487 {
488 return LinkTargetKind::InternalPage(uuid.to_string());
489 }
490
491 if is_external_target(target) {
492 return LinkTargetKind::ExternalUrl(target.to_string());
493 }
494
495 if let Some(path) = self.attachment_resolver().resolve_path(target) {
496 return LinkTargetKind::AttachmentPath(path);
497 }
498
499 match self.resolve_page_id_by_title(target) {
500 TitleResolution::Unique(id) => LinkTargetKind::InternalPage(id),
501 _ => LinkTargetKind::Unknown(target.to_string()),
502 }
503 }
504
505 pub fn root(&self) -> &Path {
507 &self.root
508 }
509
510 pub fn attachment_resolver(&self) -> AttachmentResolver {
512 AttachmentResolver::new(&self.root)
513 }
514}
515
516fn compute_sorted_ids(pages: &HashMap<PageId, PageMeta>) -> Vec<PageId> {
517 let mut entries: Vec<_> = pages.values().collect();
518 entries.sort_by(|a, b| a.title_lower.cmp(&b.title_lower));
519 entries.into_iter().map(|m| m.id.clone()).collect()
520}
521
522fn page_meta_matches(meta: &PageMeta, needle: &str) -> bool {
523 meta.title_lower.contains(needle)
524 || meta.id.to_lowercase().contains(needle)
525 || meta.tags.iter().any(|t| t.to_lowercase().contains(needle))
526}
527
528fn is_external_target(target: &str) -> bool {
529 let lower = target.to_lowercase();
530 lower.starts_with("http://")
531 || lower.starts_with("https://")
532 || lower.starts_with("mailto:")
533 || lower.starts_with("file://")
534 || lower.contains("://")
535}
536
537fn extract_attachment_relative(target: &str) -> Option<&str> {
538 if let Some(rest) = target.strip_prefix("attachments/") {
539 return Some(rest).map(|_| target);
540 }
541 if let Some(pos) = target.find("/attachments/") {
542 let start = pos + 1;
543 return target.get(start..);
544 }
545 if let Some(pos) = target.find("attachments/") {
546 return target.get(pos..);
547 }
548 None
549}
550
551fn sanitize_relative_path(rel: &str) -> AttachmentResult<PathBuf> {
552 let rel = rel.trim();
553 if rel.is_empty() {
554 return Err(AttachmentError::Empty);
555 }
556 let path = Path::new(rel);
557 if path.is_absolute() {
558 return Err(AttachmentError::EscapesRoot(rel.to_string()));
559 }
560 for comp in path.components() {
561 match comp {
562 Component::Prefix(_) | Component::RootDir | Component::ParentDir => {
563 return Err(AttachmentError::EscapesRoot(rel.to_string()));
564 }
565 _ => {}
566 }
567 }
568 Ok(path.to_path_buf())
569}
570
571fn extract_uuid_like(input: &str) -> Option<&str> {
572 let bytes = input.as_bytes();
573 if bytes.len() < 36 {
574 return None;
575 }
576
577 for i in 0..=bytes.len() - 36 {
578 let cand = &input[i..i + 36];
579 let ok = cand.chars().enumerate().all(|(idx, c)| match idx {
580 8 | 13 | 18 | 23 => c == '-',
581 _ => c.is_ascii_hexdigit(),
582 });
583 if ok {
584 return Some(cand);
585 }
586 }
587 None
588}
589
590#[derive(Debug, Deserialize)]
591struct RawMeta {
592 #[serde(default)]
593 uid: Option<RawUid>,
594 #[serde(default)]
595 #[serde(rename = "pageType")]
596 page_type: Option<RawPageType>,
597 #[serde(default)]
598 title: Option<String>,
599 #[serde(default)]
600 #[serde(rename = "editTime")]
601 edit_time: Option<RawEditTime>,
602 #[serde(default)]
603 tags: Option<Value>,
604}
605
606#[derive(Debug, Deserialize)]
607struct RawUid {
608 #[serde(default)]
609 uuid: Option<String>,
610 #[serde(default)]
611 #[serde(rename = "uidString")]
612 uid_string: Option<String>,
613}
614
615#[derive(Debug, Deserialize)]
616struct RawPageType {
617 #[serde(default)]
618 title: Option<String>,
619}
620
621#[derive(Debug, Deserialize)]
622struct RawEditTime {
623 #[serde(default)]
624 time: Option<RawTimeValue>,
625}
626
627#[derive(Debug, Deserialize)]
628struct RawTimeValue {
629 #[serde(default)]
630 #[serde(rename = "dateAndTimeString")]
631 date_and_time_string: Option<String>,
632}
633
634fn parse_page_meta(path: &Path) -> Result<PageMeta> {
635 let file = File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
636 let reader = BufReader::new(file);
637 let raw: RawMeta =
638 serde_json::from_reader(reader).with_context(|| "failed to decode page metadata")?;
639
640 let id = raw
641 .uid
642 .as_ref()
643 .and_then(|u| u.uuid.clone().or_else(|| u.uid_string.clone()))
644 .unwrap_or_default();
645 let title = raw
646 .page_type
647 .and_then(|pt| pt.title)
648 .or(raw.title)
649 .unwrap_or_default();
650 let updated_at = raw
651 .edit_time
652 .and_then(|e| e.time)
653 .and_then(|t| t.date_and_time_string)
654 .and_then(|s| DateTime::parse_from_rfc3339(&s).ok());
655 let tags = parse_tags(raw.tags.as_ref());
656
657 let title_lower = title.to_lowercase();
658 Ok(PageMeta {
659 id,
660 title,
661 title_lower,
662 path: path.to_path_buf(),
663 updated_at,
664 tags,
665 })
666}
667
668fn parse_tags(value: Option<&Value>) -> Vec<String> {
669 let Some(value) = value else {
670 return Vec::new();
671 };
672 match value {
673 Value::Array(items) => items
674 .iter()
675 .filter_map(|item| {
676 item.as_str()
677 .map(ToOwned::to_owned)
678 .or_else(|| {
679 item.get("name")
680 .and_then(Value::as_str)
681 .map(ToOwned::to_owned)
682 })
683 .or_else(|| {
684 item.get("title")
685 .and_then(Value::as_str)
686 .map(ToOwned::to_owned)
687 })
688 })
689 .collect(),
690 Value::Object(obj) => obj
691 .get("items")
692 .and_then(Value::as_array)
693 .map(|items| {
694 items
695 .iter()
696 .filter_map(|i| {
697 i.get("title")
698 .and_then(Value::as_str)
699 .map(ToOwned::to_owned)
700 })
701 .collect::<Vec<_>>()
702 })
703 .unwrap_or_default(),
704 _ => Vec::new(),
705 }
706}
707
708fn parse_item_recursive(item: &Value, out: &mut Vec<Node>) {
709 let typ = extract_type(item);
710 out.push(parse_node(item));
711 if matches!(typ.as_deref(), Some("listSnippet")) {
712 return;
714 }
715 if let Some(children) = item
716 .get("children")
717 .and_then(|v| v.get("items"))
718 .and_then(Value::as_array)
719 {
720 for child in children {
721 parse_item_recursive(child, out);
722 }
723 }
724}
725
726fn parse_node(item: &Value) -> Node {
727 let typ = extract_type(item);
728
729 match typ.as_deref() {
730 Some("textSnippet") => parse_text_like_node(item),
731 Some("quoteSnippet") | Some("blockQuoteSnippet") | Some("commentSnippet") => Node::Quote {
732 text: extract_text(item).unwrap_or_default(),
733 },
734 Some("listSnippet") => parse_list_node(item),
735 Some("pictureSnippet") => parse_picture_node(item),
736 Some("youtubeSnippet") => parse_youtube_node(item),
737 Some("elementSnippet") => parse_element_node(item),
738 Some("pharoRewrite") => parse_rewrite_node(item),
739 Some("wordSnippet") => parse_word_node(item),
740 Some(
741 t @ ("pharoSnippet"
742 | "pythonSnippet"
743 | "javascriptSnippet"
744 | "shellCommandSnippet"
745 | "gemstoneSnippet"
746 | "exampleSnippet"
747 | "changesSnippet"
748 | "robocoderMetamodelSnippet"),
749 ) => Node::Code {
750 language: infer_language(Some(t)),
751 code: extract_code(item)
752 .or_else(|| extract_text(item))
753 .unwrap_or_default(),
754 },
755 Some(t @ "pharoLinkSnippet") if has_link(item) => Node::Link {
756 text: extract_text(item).unwrap_or_else(|| t.to_string()),
757 url: extract_link(item).unwrap_or_default(),
758 },
759 Some("linkSnippet") if has_link(item) => Node::Link {
760 text: extract_text(item).unwrap_or_else(|| "link".to_string()),
761 url: extract_link(item).unwrap_or_default(),
762 },
763 Some(t) => Node::Unknown {
764 typ: t.to_string(),
765 raw: item.clone(),
766 },
767 None => Node::Unknown {
768 typ: "<missing-type>".to_string(),
769 raw: item.clone(),
770 },
771 }
772}
773
774fn parse_text_like_node(item: &Value) -> Node {
775 let text = extract_text(item).unwrap_or_default();
776 if let Some((level, heading)) = parse_heading(&text) {
777 Node::Heading {
778 level,
779 text: heading,
780 }
781 } else if let Some(stripped) = text.strip_prefix("> ") {
782 Node::Quote {
783 text: stripped.to_string(),
784 }
785 } else if text.trim().is_empty() {
786 Node::Text { text }
787 } else {
788 Node::Paragraph { text }
789 }
790}
791
792fn parse_list_node(item: &Value) -> Node {
793 let mut items = Vec::new();
794 if let Some(children) = item
795 .get("children")
796 .and_then(|v| v.get("items"))
797 .and_then(Value::as_array)
798 {
799 for child in children {
800 items.push(vec![parse_node(child)]);
801 }
802 }
803 Node::List { items }
804}
805
806fn parse_picture_node(item: &Value) -> Node {
807 let url = item
808 .get("url")
809 .and_then(Value::as_str)
810 .map(ToOwned::to_owned)
811 .or_else(|| extract_link(item))
812 .unwrap_or_default();
813 let text = item
814 .get("caption")
815 .and_then(Value::as_str)
816 .map(ToOwned::to_owned)
817 .or_else(|| extract_text(item))
818 .unwrap_or_else(|| "picture".to_string());
819
820 if url.is_empty() {
821 Node::Unknown {
822 typ: "pictureSnippet".to_string(),
823 raw: item.clone(),
824 }
825 } else {
826 Node::Link { text, url }
827 }
828}
829
830fn parse_youtube_node(item: &Value) -> Node {
831 let url = item
832 .get("youtubeUrl")
833 .and_then(Value::as_str)
834 .map(ToOwned::to_owned)
835 .or_else(|| extract_link(item))
836 .unwrap_or_default();
837 let text = extract_text(item).unwrap_or_else(|| "youtube".to_string());
838
839 if url.is_empty() {
840 Node::Unknown {
841 typ: "youtubeSnippet".to_string(),
842 raw: item.clone(),
843 }
844 } else {
845 Node::Link { text, url }
846 }
847}
848
849fn parse_element_node(item: &Value) -> Node {
850 let code = extract_code(item).or_else(|| extract_text(item));
851 if let Some(code) = code.filter(|c| !c.trim().is_empty()) {
852 Node::Code {
853 language: Some("element".to_string()),
854 code,
855 }
856 } else {
857 Node::Unknown {
858 typ: "elementSnippet".to_string(),
859 raw: item.clone(),
860 }
861 }
862}
863
864fn parse_rewrite_node(item: &Value) -> Node {
865 let search = item
866 .get("search")
867 .and_then(Value::as_str)
868 .map(ToOwned::to_owned)
869 .unwrap_or_default();
870 let replace = item
871 .get("replace")
872 .and_then(Value::as_str)
873 .map(ToOwned::to_owned)
874 .unwrap_or_default();
875 let scope = item
876 .get("scope")
877 .and_then(Value::as_str)
878 .map(ToOwned::to_owned);
879 let is_method_pattern = item.get("isMethodPattern").and_then(Value::as_bool);
880
881 if search.is_empty() && replace.is_empty() {
882 Node::Unknown {
883 typ: "pharoRewrite".to_string(),
884 raw: item.clone(),
885 }
886 } else {
887 Node::Rewrite {
888 language: Some("pharo".to_string()),
889 search,
890 replace,
891 scope,
892 is_method_pattern,
893 }
894 }
895}
896
897fn parse_word_node(item: &Value) -> Node {
898 let mut lines = Vec::new();
899
900 if let Some(word) = item
901 .get("wordString")
902 .and_then(Value::as_str)
903 .map(str::trim)
904 .filter(|s| !s.is_empty())
905 {
906 lines.push(word.to_string());
907 }
908
909 if let Some(explanation) = item
910 .get("explanationAttachmentNameString")
911 .and_then(Value::as_str)
912 .map(str::trim)
913 .filter(|s| !s.is_empty())
914 {
915 lines.push(format!("explanation: {explanation}"));
916 }
917
918 if lines.is_empty() {
919 collect_text_fragments(item, &mut lines, 0, 12);
920 }
921
922 lines.retain(|s| !s.trim().is_empty());
923 lines.truncate(8);
924
925 if lines.is_empty() {
926 return Node::Unknown {
927 typ: "wordSnippet".to_string(),
928 raw: item.clone(),
929 };
930 }
931
932 let mut text = lines.join("\n");
933 if text.chars().count() > 1200 {
934 text = text.chars().take(1199).collect::<String>();
935 text.push('…');
936 }
937
938 Node::Paragraph { text }
939}
940
941fn collect_text_fragments(value: &Value, out: &mut Vec<String>, depth: usize, remaining: usize) {
942 if remaining == 0 || out.len() >= remaining || depth > 4 {
943 return;
944 }
945
946 match value {
947 Value::String(s) => {
948 let trimmed = s.trim();
949 if !trimmed.is_empty() {
950 out.push(trimmed.to_string());
951 }
952 }
953 Value::Array(items) => {
954 for item in items {
955 if out.len() >= remaining {
956 break;
957 }
958 collect_text_fragments(item, out, depth + 1, remaining);
959 }
960 }
961 Value::Object(map) => {
962 for (key, item) in map {
963 if matches!(
964 key.as_str(),
965 "__type"
966 | "children"
967 | "uid"
968 | "createEmail"
969 | "createTime"
970 | "editEmail"
971 | "editTime"
972 | "paragraphStyle"
973 ) {
974 continue;
975 }
976 if out.len() >= remaining {
977 break;
978 }
979 collect_text_fragments(item, out, depth + 1, remaining);
980 }
981 }
982 _ => {}
983 }
984}
985
986fn parse_heading(input: &str) -> Option<(u8, String)> {
987 let trimmed = input.trim();
988 let hashes = trimmed.chars().take_while(|c| *c == '#').count();
989 if hashes == 0 {
990 return None;
991 }
992 let rest = trimmed[hashes..].trim_start();
993 if rest.is_empty() {
994 return None;
995 }
996 Some((hashes.min(6) as u8, rest.to_string()))
997}
998
999fn extract_type(item: &Value) -> Option<String> {
1000 item.get("type")
1001 .and_then(Value::as_str)
1002 .map(ToOwned::to_owned)
1003 .or_else(|| {
1004 item.get("__type")
1005 .and_then(Value::as_str)
1006 .map(ToOwned::to_owned)
1007 })
1008}
1009
1010fn extract_text(item: &Value) -> Option<String> {
1011 item.get("string")
1012 .and_then(Value::as_str)
1013 .map(ToOwned::to_owned)
1014 .or_else(|| {
1015 item.get("text")
1016 .and_then(Value::as_str)
1017 .map(ToOwned::to_owned)
1018 })
1019 .or_else(|| {
1020 item.get("content")
1021 .and_then(Value::as_str)
1022 .map(ToOwned::to_owned)
1023 })
1024}
1025
1026fn extract_code(item: &Value) -> Option<String> {
1027 item.get("code")
1028 .and_then(Value::as_str)
1029 .map(ToOwned::to_owned)
1030 .or_else(|| {
1031 item.get("source")
1032 .and_then(Value::as_str)
1033 .map(ToOwned::to_owned)
1034 })
1035}
1036
1037fn extract_link(item: &Value) -> Option<String> {
1038 item.get("url")
1039 .and_then(Value::as_str)
1040 .map(ToOwned::to_owned)
1041 .or_else(|| {
1042 item.get("href")
1043 .and_then(Value::as_str)
1044 .map(ToOwned::to_owned)
1045 })
1046}
1047
1048fn has_link(item: &Value) -> bool {
1049 item.get("url").and_then(Value::as_str).is_some()
1050 || item.get("href").and_then(Value::as_str).is_some()
1051}
1052
1053fn infer_language(typ: Option<&str>) -> Option<String> {
1054 let typ = typ?;
1055 match typ {
1056 "pharoSnippet" => Some("pharo".to_string()),
1057 "pythonSnippet" => Some("python".to_string()),
1058 "javascriptSnippet" => Some("javascript".to_string()),
1059 "jsonSnippet" => Some("json".to_string()),
1060 "yamlSnippet" => Some("yaml".to_string()),
1061 _ => {
1062 if typ.ends_with("Snippet") {
1063 Some(typ.trim_end_matches("Snippet").to_lowercase())
1064 } else {
1065 None
1066 }
1067 }
1068 }
1069}
1070
1071pub fn render_page_to_text(page: &Page) -> String {
1073 render_nodes_to_text(&page.content)
1074}
1075
1076pub fn render_nodes_to_text(nodes: &[Node]) -> String {
1078 let mut out = String::new();
1079 for node in nodes {
1080 match node {
1081 Node::Heading { level, text } => {
1082 out.push_str(&"#".repeat((*level).max(1) as usize));
1083 out.push(' ');
1084 out.push_str(text);
1085 out.push_str("\n\n");
1086 }
1087 Node::Paragraph { text } => {
1088 out.push_str(text);
1089 out.push_str("\n\n");
1090 }
1091 Node::Text { text } => {
1092 out.push_str(text);
1093 out.push('\n');
1094 }
1095 Node::List { items } => {
1096 for item in items {
1097 out.push_str("- ");
1098 out.push_str(render_nodes_to_text(item).trim());
1099 out.push('\n');
1100 }
1101 out.push('\n');
1102 }
1103 Node::Code { language, code } => {
1104 out.push_str("```");
1105 if let Some(lang) = language {
1106 out.push_str(lang);
1107 }
1108 out.push('\n');
1109 out.push_str(code);
1110 out.push_str("\n```\n\n");
1111 }
1112 Node::Link { text, url } => {
1113 out.push_str(&format!("[{text}]({url})\n\n"));
1114 }
1115 Node::Quote { text } => {
1116 out.push_str(&format!("> {text}\n\n"));
1117 }
1118 Node::Rewrite {
1119 language,
1120 search,
1121 replace,
1122 scope,
1123 is_method_pattern,
1124 } => {
1125 let lang = language.clone().unwrap_or_else(|| "rewrite".to_string());
1126 out.push_str(&format!("```diff {lang}\n"));
1127 if let Some(scope) = scope {
1128 out.push_str(&format!("# scope: {scope}\n"));
1129 }
1130 if let Some(is_method_pattern) = is_method_pattern {
1131 out.push_str(&format!("# method_pattern: {is_method_pattern}\n"));
1132 }
1133 for line in normalize_text(search).lines() {
1134 out.push('-');
1135 out.push_str(line);
1136 out.push('\n');
1137 }
1138 for line in normalize_text(replace).lines() {
1139 out.push('+');
1140 out.push_str(line);
1141 out.push('\n');
1142 }
1143 out.push_str("```\n\n");
1144 }
1145 Node::Unknown { typ, .. } => {
1146 out.push_str(&format!("[[unknown: {typ}]]\n\n"));
1147 }
1148 }
1149 }
1150 out
1151}
1152
1153pub fn normalize_text(input: &str) -> String {
1154 input.replace("\r\n", "\n").replace('\r', "\n")
1155}
1156
1157pub fn collect_node_types_in_file(path: &Path) -> Result<HashMap<String, usize>> {
1159 let file = File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
1160 let reader = BufReader::new(file);
1161 let raw: Value = serde_json::from_reader(reader).with_context(|| "failed to decode JSON")?;
1162
1163 let mut out = HashMap::new();
1164 collect_node_types_value(&raw, &mut out);
1165 Ok(out)
1166}
1167
1168fn collect_node_types_value(value: &Value, out: &mut HashMap<String, usize>) {
1169 match value {
1170 Value::Object(map) => {
1171 if let Some(typ) = map
1172 .get("type")
1173 .and_then(Value::as_str)
1174 .or_else(|| map.get("__type").and_then(Value::as_str))
1175 {
1176 *out.entry(typ.to_string()).or_insert(0) += 1;
1177 }
1178 for v in map.values() {
1179 collect_node_types_value(v, out);
1180 }
1181 }
1182 Value::Array(items) => {
1183 for item in items {
1184 collect_node_types_value(item, out);
1185 }
1186 }
1187 _ => {}
1188 }
1189}
1190
1191#[cfg(test)]
1192mod tests {
1193 use super::*;
1194 use serde_json::json;
1195 use std::fs;
1196 use std::time::{SystemTime, UNIX_EPOCH};
1197
1198 fn temp_file_path(name: &str) -> PathBuf {
1199 let ts = SystemTime::now()
1200 .duration_since(UNIX_EPOCH)
1201 .expect("time")
1202 .as_nanos();
1203 std::env::temp_dir().join(format!("lepiter-core-{name}-{ts}.lepiter"))
1204 }
1205
1206 fn temp_dir_path(name: &str) -> PathBuf {
1207 let ts = SystemTime::now()
1208 .duration_since(UNIX_EPOCH)
1209 .expect("time")
1210 .as_nanos();
1211 std::env::temp_dir().join(format!("lepiter-core-{name}-{ts}"))
1212 }
1213
1214 #[test]
1215 fn parse_heading_detects_markdown_style() {
1216 assert_eq!(
1217 parse_heading("## Heading"),
1218 Some((2, "Heading".to_string()))
1219 );
1220 assert_eq!(parse_heading("No heading"), None);
1221 }
1222
1223 #[test]
1224 fn parse_tags_supports_array_and_object_items() {
1225 let arr = json!(["a", {"name": "b"}, {"title": "c"}]);
1226 assert_eq!(parse_tags(Some(&arr)), vec!["a", "b", "c"]);
1227
1228 let obj = json!({"items": [{"title":"x"}, {"title":"y"}]});
1229 assert_eq!(parse_tags(Some(&obj)), vec!["x", "y"]);
1230 }
1231
1232 #[test]
1233 fn parse_node_covers_known_and_unknown_types() {
1234 let heading = json!({"__type":"textSnippet","string":"# Title"});
1235 assert!(matches!(parse_node(&heading), Node::Heading { .. }));
1236
1237 let quote = json!({"__type":"blockQuoteSnippet","string":"quoted"});
1238 assert!(matches!(parse_node("e), Node::Quote { .. }));
1239
1240 let code = json!({"__type":"pythonSnippet","code":"print(1)"});
1241 assert!(matches!(parse_node(&code), Node::Code { .. }));
1242
1243 let link = json!({"__type":"pharoLinkSnippet","string":"link","url":"page:abc"});
1244 assert!(matches!(parse_node(&link), Node::Link { .. }));
1245
1246 let picture = json!({"__type":"pictureSnippet","url":"attachments/x.png","caption":"img"});
1247 assert!(matches!(parse_node(&picture), Node::Link { .. }));
1248
1249 let youtube = json!({"__type":"youtubeSnippet","youtubeUrl":"https://youtu.be/abc"});
1250 assert!(matches!(parse_node(&youtube), Node::Link { .. }));
1251
1252 let element = json!({"__type":"elementSnippet","code":"GtInspector newOn: 42"});
1253 assert!(matches!(parse_node(&element), Node::Code { .. }));
1254
1255 let rewrite =
1256 json!({"__type":"pharoRewrite","search":"a","replace":"b","isMethodPattern":true});
1257 assert!(matches!(parse_node(&rewrite), Node::Rewrite { .. }));
1258
1259 let word = json!({"__type":"wordSnippet","wordString":"refactoring"});
1260 assert!(matches!(parse_node(&word), Node::Paragraph { .. }));
1261
1262 let list = json!({
1263 "__type":"listSnippet",
1264 "children":{"items":[{"__type":"textSnippet","string":"item"}]}
1265 });
1266 assert!(matches!(parse_node(&list), Node::List { .. }));
1267
1268 let unknown = json!({"__type":"mysterySnippet","x":1});
1269 assert!(matches!(parse_node(&unknown), Node::Unknown { .. }));
1270
1271 let missing = json!({"x":1});
1272 assert!(matches!(parse_node(&missing), Node::Unknown { .. }));
1273 }
1274
1275 #[test]
1276 fn infer_language_maps_common_snippet_types() {
1277 assert_eq!(
1278 infer_language(Some("pharoSnippet")),
1279 Some("pharo".to_string())
1280 );
1281 assert_eq!(
1282 infer_language(Some("javascriptSnippet")),
1283 Some("javascript".to_string())
1284 );
1285 assert_eq!(
1286 infer_language(Some("yamlSnippet")),
1287 Some("yaml".to_string())
1288 );
1289 assert_eq!(
1290 infer_language(Some("customSnippet")),
1291 Some("custom".to_string())
1292 );
1293 assert_eq!(infer_language(None), None);
1294 }
1295
1296 #[test]
1297 fn render_nodes_outputs_unknown_placeholder() {
1298 let text = render_nodes_to_text(&[
1299 Node::Paragraph {
1300 text: "para".to_string(),
1301 },
1302 Node::Rewrite {
1303 language: Some("pharo".to_string()),
1304 search: "a".to_string(),
1305 replace: "b".to_string(),
1306 scope: None,
1307 is_method_pattern: Some(true),
1308 },
1309 Node::Unknown {
1310 typ: "weird".to_string(),
1311 raw: json!({"a":1}),
1312 },
1313 ]);
1314 assert!(text.contains("para"));
1315 assert!(text.contains("```diff pharo"));
1316 assert!(text.contains("-a"));
1317 assert!(text.contains("+b"));
1318 assert!(text.contains("[[unknown: weird]]"));
1319 }
1320
1321 #[test]
1322 fn collect_node_types_counts_nested_values() -> Result<()> {
1323 let path = temp_file_path("types");
1324 let content = json!({
1325 "__type":"page",
1326 "children":{"__type":"snippets","items":[
1327 {"__type":"textSnippet","children":{"__type":"snippets","items":[]}},
1328 {"__type":"pythonSnippet","code":"print(1)"}
1329 ]}
1330 });
1331 fs::write(&path, serde_json::to_vec(&content)?)?;
1332 let counts = collect_node_types_in_file(&path)?;
1333 fs::remove_file(&path)?;
1334
1335 assert_eq!(counts.get("page"), Some(&1));
1336 assert_eq!(counts.get("textSnippet"), Some(&1));
1337 assert_eq!(counts.get("pythonSnippet"), Some(&1));
1338 Ok(())
1339 }
1340
1341 #[test]
1342 fn parse_page_meta_extracts_core_fields() -> Result<()> {
1343 let path = temp_file_path("meta");
1344 let content = json!({
1345 "uid":{"uuid":"id-123"},
1346 "pageType":{"title":"Title"},
1347 "editTime":{"time":{"dateAndTimeString":"2024-01-01T00:00:00+00:00"}},
1348 "tags":["t1","t2"]
1349 });
1350 fs::write(&path, serde_json::to_vec(&content)?)?;
1351 let meta = parse_page_meta(&path)?;
1352 fs::remove_file(&path)?;
1353
1354 assert_eq!(meta.id, "id-123");
1355 assert_eq!(meta.title, "Title");
1356 assert_eq!(meta.tags, vec!["t1", "t2"]);
1357 assert!(meta.updated_at.is_some());
1358 Ok(())
1359 }
1360
1361 #[test]
1362 fn parse_item_recursive_includes_children() {
1363 let root = json!({
1364 "__type":"textSnippet",
1365 "string":"parent",
1366 "children":{"items":[
1367 {"__type":"textSnippet","string":"child"}
1368 ]}
1369 });
1370 let mut out = Vec::new();
1371 parse_item_recursive(&root, &mut out);
1372 assert_eq!(out.len(), 2);
1373 }
1374
1375 #[test]
1376 fn filter_page_ids_matches_title_id_and_tags() {
1377 let mut pages = HashMap::new();
1378 pages.insert(
1379 "id-1".to_string(),
1380 PageMeta {
1381 id: "id-1".to_string(),
1382 title: "Alpha".to_string(),
1383 title_lower: "alpha".to_string(),
1384 path: PathBuf::from("/tmp/a"),
1385 updated_at: None,
1386 tags: vec!["rust".to_string()],
1387 },
1388 );
1389 pages.insert(
1390 "id-2".to_string(),
1391 PageMeta {
1392 id: "id-2".to_string(),
1393 title: "Beta".to_string(),
1394 title_lower: "beta".to_string(),
1395 path: PathBuf::from("/tmp/b"),
1396 updated_at: None,
1397 tags: vec!["pharo".to_string()],
1398 },
1399 );
1400 let sorted_ids = compute_sorted_ids(&pages);
1401 let index = KnowledgeBaseIndex {
1402 root: PathBuf::from("/tmp"),
1403 pages,
1404 sorted_ids,
1405 index_issues: Vec::new(),
1406 };
1407
1408 assert_eq!(index.filter_page_ids("alpha"), vec!["id-1".to_string()]);
1409 assert_eq!(index.filter_page_ids("id-2"), vec!["id-2".to_string()]);
1410 assert_eq!(index.filter_page_ids("pharo"), vec!["id-2".to_string()]);
1411 assert_eq!(
1412 index.filter_page_ids(""),
1413 vec!["id-1".to_string(), "id-2".to_string()]
1414 );
1415 }
1416
1417 #[test]
1418 fn resolve_page_id_by_title_handles_unique_ambiguous_and_missing() {
1419 let mut pages = HashMap::new();
1420 pages.insert(
1421 "id-1".to_string(),
1422 PageMeta {
1423 id: "id-1".to_string(),
1424 title: "Alpha".to_string(),
1425 title_lower: "alpha".to_string(),
1426 path: PathBuf::from("/tmp/a"),
1427 updated_at: None,
1428 tags: Vec::new(),
1429 },
1430 );
1431 pages.insert(
1432 "id-2".to_string(),
1433 PageMeta {
1434 id: "id-2".to_string(),
1435 title: "Alphabet".to_string(),
1436 title_lower: "alphabet".to_string(),
1437 path: PathBuf::from("/tmp/b"),
1438 updated_at: None,
1439 tags: Vec::new(),
1440 },
1441 );
1442 let sorted_ids = compute_sorted_ids(&pages);
1443 let index = KnowledgeBaseIndex {
1444 root: PathBuf::from("/tmp"),
1445 pages,
1446 sorted_ids,
1447 index_issues: Vec::new(),
1448 };
1449
1450 assert_eq!(
1451 index.resolve_page_id_by_title("Alpha"),
1452 TitleResolution::Unique("id-1".to_string())
1453 );
1454 assert!(matches!(
1455 index.resolve_page_id_by_title("alp"),
1456 TitleResolution::Ambiguous(_)
1457 ));
1458 assert_eq!(
1459 index.resolve_page_id_by_title("zzz"),
1460 TitleResolution::NotFound
1461 );
1462 }
1463
1464 #[test]
1465 fn classify_link_target_covers_internal_attachment_external_unknown() {
1466 let mut pages = HashMap::new();
1467 pages.insert(
1468 "8a505fa0-2222-3333-4444-555555555555".to_string(),
1469 PageMeta {
1470 id: "8a505fa0-2222-3333-4444-555555555555".to_string(),
1471 title: "Alpha".to_string(),
1472 title_lower: "alpha".to_string(),
1473 path: PathBuf::from("/tmp/a"),
1474 updated_at: None,
1475 tags: Vec::new(),
1476 },
1477 );
1478 let sorted_ids = compute_sorted_ids(&pages);
1479 let index = KnowledgeBaseIndex {
1480 root: PathBuf::from("/kb"),
1481 pages,
1482 sorted_ids,
1483 index_issues: Vec::new(),
1484 };
1485
1486 assert!(matches!(
1487 index.classify_link_target("8a505fa0-2222-3333-4444-555555555555"),
1488 LinkTargetKind::InternalPage(_)
1489 ));
1490 assert!(matches!(
1491 index.classify_link_target("title:alpha"),
1492 LinkTargetKind::InternalPage(_)
1493 ));
1494 assert!(matches!(
1495 index.classify_link_target("go to 8a505fa0-2222-3333-4444-555555555555 now"),
1496 LinkTargetKind::InternalPage(_)
1497 ));
1498 assert!(matches!(
1499 index.classify_link_target("attachments/image.png"),
1500 LinkTargetKind::AttachmentPath(_)
1501 ));
1502 assert!(matches!(
1503 index.classify_link_target("https://example.com"),
1504 LinkTargetKind::ExternalUrl(_)
1505 ));
1506 assert!(matches!(
1507 index.classify_link_target("not a thing"),
1508 LinkTargetKind::Unknown(_)
1509 ));
1510 assert!(matches!(
1512 index.classify_link_target("page:Alpha"),
1513 LinkTargetKind::InternalPage(_)
1514 ));
1515 assert!(matches!(
1517 index.classify_link_target("page:Nonexistent"),
1518 LinkTargetKind::Unknown(_)
1519 ));
1520 }
1521
1522 #[test]
1523 fn attachment_resolver_reports_missing_files() -> Result<()> {
1524 let root = temp_dir_path("attachments");
1525 let attachments = root.join("attachments");
1526 fs::create_dir_all(&attachments)?;
1527 fs::write(attachments.join("ok.txt"), b"ok")?;
1528
1529 let resolver = AttachmentResolver::new(&root);
1530 let resolved = resolver.resolve("attachments/ok.txt")?;
1531 assert!(resolved.exists);
1532
1533 let missing = resolver.resolve_existing("attachments/missing.txt");
1534 assert!(matches!(missing, Err(AttachmentError::Missing(_))));
1535
1536 fs::remove_dir_all(&root)?;
1537 Ok(())
1538 }
1539
1540 fn make_kb_on_disk(pages: &[(&str, &str, &[&str], &str)]) -> (PathBuf, KnowledgeBaseIndex) {
1541 let dir = temp_dir_path("kb");
1542 fs::create_dir_all(&dir).unwrap();
1543 for (id, title, tags, body_text) in pages {
1544 let tags_json: Vec<Value> = tags.iter().map(|t| json!(t)).collect();
1545 let content = json!({
1546 "uid": {"uuid": id},
1547 "pageType": {"title": title},
1548 "tags": tags_json,
1549 "children": {"items": [
1550 {"__type": "textSnippet", "string": body_text}
1551 ]}
1552 });
1553 let file_path = dir.join(format!("{id}.lepiter"));
1554 fs::write(&file_path, serde_json::to_vec(&content).unwrap()).unwrap();
1555 }
1556 let index = KnowledgeBase::open(&dir).unwrap();
1557 (dir, index)
1558 }
1559
1560 #[test]
1561 fn search_hits_empty_query_returns_nothing() {
1562 let (dir, index) = make_kb_on_disk(&[("p1", "Alpha", &[], "hello world")]);
1563 assert!(index.search_hits("", false).is_empty());
1564 assert!(index.search_hits(" ", true).is_empty());
1565 fs::remove_dir_all(&dir).unwrap();
1566 }
1567
1568 #[test]
1569 fn search_hits_matches_title_case_insensitively() {
1570 let (dir, index) = make_kb_on_disk(&[
1571 ("p1", "Alpha Guide", &[], "nothing special"),
1572 ("p2", "Beta Notes", &[], "nothing special"),
1573 ]);
1574 let hits = index.search_hits("alpha", false);
1575 assert_eq!(hits.len(), 1);
1576 assert_eq!(hits[0].id, "p1");
1577 assert_eq!(hits[0].kind, SearchMatchKind::Meta);
1578 fs::remove_dir_all(&dir).unwrap();
1579 }
1580
1581 #[test]
1582 fn search_hits_matches_tags() {
1583 let (dir, index) = make_kb_on_disk(&[
1584 ("p1", "Page One", &["rust", "cli"], "body"),
1585 ("p2", "Page Two", &["pharo"], "body"),
1586 ]);
1587 let hits = index.search_hits("rust", false);
1588 assert_eq!(hits.len(), 1);
1589 assert_eq!(hits[0].id, "p1");
1590 assert_eq!(hits[0].kind, SearchMatchKind::Meta);
1591 fs::remove_dir_all(&dir).unwrap();
1592 }
1593
1594 #[test]
1595 fn search_hits_content_flag_searches_page_body() {
1596 let (dir, index) = make_kb_on_disk(&[
1597 ("p1", "Alpha", &[], "the quick brown fox"),
1598 ("p2", "Beta", &[], "lazy dog sleeps"),
1599 ]);
1600
1601 let no_content = index.search_hits("fox", false);
1602 assert!(no_content.is_empty());
1603
1604 let with_content = index.search_hits("fox", true);
1605 assert_eq!(with_content.len(), 1);
1606 assert_eq!(with_content[0].id, "p1");
1607 assert_eq!(with_content[0].kind, SearchMatchKind::Content);
1608 fs::remove_dir_all(&dir).unwrap();
1609 }
1610
1611 #[test]
1612 fn search_hits_meta_match_takes_priority_over_content() {
1613 let (dir, index) = make_kb_on_disk(&[("p1", "Fox Guide", &[], "the fox jumps")]);
1614 let hits = index.search_hits("fox", true);
1615 assert_eq!(hits.len(), 1);
1616 assert_eq!(hits[0].kind, SearchMatchKind::Meta);
1617 fs::remove_dir_all(&dir).unwrap();
1618 }
1619
1620 #[test]
1621 fn search_hits_returns_results_sorted_by_title() {
1622 let (dir, index) = make_kb_on_disk(&[
1623 ("p1", "Zebra", &["common"], "body"),
1624 ("p2", "Alpha", &["common"], "body"),
1625 ("p3", "Middle", &["common"], "body"),
1626 ]);
1627 let hits = index.search_hits("common", false);
1628 let ids: Vec<&str> = hits.iter().map(|h| h.id.as_str()).collect();
1629 assert_eq!(ids, vec!["p2", "p3", "p1"]);
1630 fs::remove_dir_all(&dir).unwrap();
1631 }
1632
1633 #[test]
1634 fn classify_link_target_page_prefix() {
1635 let (dir, index) = make_kb_on_disk(&[("p1", "Alpha", &[], "body")]);
1636 assert!(matches!(
1637 index.classify_link_target("page:p1"),
1638 LinkTargetKind::InternalPage(id) if id == "p1"
1639 ));
1640 assert!(matches!(
1641 index.classify_link_target("page:nonexistent"),
1642 LinkTargetKind::Unknown(_)
1643 ));
1644 fs::remove_dir_all(&dir).unwrap();
1645 }
1646
1647 #[test]
1648 fn classify_link_target_empty_is_unknown() {
1649 let (dir, index) = make_kb_on_disk(&[("p1", "Alpha", &[], "body")]);
1650 assert!(matches!(
1651 index.classify_link_target(""),
1652 LinkTargetKind::Unknown(_)
1653 ));
1654 fs::remove_dir_all(&dir).unwrap();
1655 }
1656
1657 #[test]
1658 fn classify_link_target_title_fallback() {
1659 let (dir, index) = make_kb_on_disk(&[("p1", "My Special Page", &[], "body")]);
1660 assert!(matches!(
1661 index.classify_link_target("My Special Page"),
1662 LinkTargetKind::InternalPage(id) if id == "p1"
1663 ));
1664 fs::remove_dir_all(&dir).unwrap();
1665 }
1666
1667 #[test]
1668 fn resolve_page_id_by_title_empty_and_whitespace() {
1669 let (dir, index) = make_kb_on_disk(&[("p1", "Alpha", &[], "body")]);
1670 assert_eq!(
1671 index.resolve_page_id_by_title(""),
1672 TitleResolution::NotFound
1673 );
1674 assert_eq!(
1675 index.resolve_page_id_by_title(" "),
1676 TitleResolution::NotFound
1677 );
1678 fs::remove_dir_all(&dir).unwrap();
1679 }
1680
1681 #[test]
1682 fn resolve_page_id_by_title_case_insensitive_exact() {
1683 let (dir, index) = make_kb_on_disk(&[("p1", "Alpha", &[], "body")]);
1684 assert_eq!(
1685 index.resolve_page_id_by_title("ALPHA"),
1686 TitleResolution::Unique("p1".to_string())
1687 );
1688 fs::remove_dir_all(&dir).unwrap();
1689 }
1690
1691 #[test]
1692 fn filter_page_ids_no_match_returns_empty() {
1693 let (dir, index) = make_kb_on_disk(&[("p1", "Alpha", &[], "body")]);
1694 assert!(index.filter_page_ids("zzzzz").is_empty());
1695 fs::remove_dir_all(&dir).unwrap();
1696 }
1697
1698 #[test]
1699 fn parse_word_node_extracts_primary_fields() {
1700 let item = json!({
1701 "__type":"wordSnippet",
1702 "wordString":"refactoring",
1703 "explanationAttachmentNameString":"attachments/x/explanation.json"
1704 });
1705 let node = parse_node(&item);
1706 match node {
1707 Node::Paragraph { text } => {
1708 assert!(text.contains("refactoring"));
1709 assert!(text.contains("attachments/x/explanation.json"));
1710 }
1711 other => panic!("expected paragraph, got {other:?}"),
1712 }
1713 }
1714
1715 #[test]
1716 fn parse_page_meta_missing_uid_uses_empty_id() -> Result<()> {
1717 let path = temp_file_path("no-uid");
1718 let content = json!({"pageType": {"title": "Some Title"}});
1719 fs::write(&path, serde_json::to_vec(&content)?)?;
1720 let meta = parse_page_meta(&path)?;
1721 fs::remove_file(&path)?;
1722
1723 assert!(meta.id.is_empty());
1724 assert_eq!(meta.title, "Some Title");
1725 Ok(())
1726 }
1727
1728 #[test]
1729 fn parse_page_meta_missing_page_type_uses_empty_title() -> Result<()> {
1730 let path = temp_file_path("no-pt");
1731 let content = json!({"uid": {"uuid": "abc-123"}});
1732 fs::write(&path, serde_json::to_vec(&content)?)?;
1733 let meta = parse_page_meta(&path)?;
1734 fs::remove_file(&path)?;
1735
1736 assert_eq!(meta.id, "abc-123");
1737 assert!(meta.title.is_empty());
1738 Ok(())
1739 }
1740
1741 #[test]
1742 fn parse_page_meta_invalid_date_string_yields_none() -> Result<()> {
1743 let path = temp_file_path("bad-date");
1744 let content = json!({
1745 "uid": {"uuid": "id-1"},
1746 "editTime": {"time": {"dateAndTimeString": "not-a-date"}}
1747 });
1748 fs::write(&path, serde_json::to_vec(&content)?)?;
1749 let meta = parse_page_meta(&path)?;
1750 fs::remove_file(&path)?;
1751
1752 assert!(meta.updated_at.is_none());
1753 Ok(())
1754 }
1755
1756 #[test]
1757 fn open_empty_directory_returns_empty_index() -> Result<()> {
1758 let dir = temp_dir_path("empty-kb");
1759 fs::create_dir_all(&dir)?;
1760 let index = KnowledgeBase::open(&dir)?;
1761 fs::remove_dir_all(&dir)?;
1762
1763 assert!(index.pages.is_empty());
1764 assert!(index.index_issues.is_empty());
1765 Ok(())
1766 }
1767
1768 #[test]
1769 fn open_skips_non_lepiter_files() -> Result<()> {
1770 let dir = temp_dir_path("non-lepiter");
1771 fs::create_dir_all(&dir)?;
1772 fs::write(dir.join("readme.txt"), b"hello")?;
1773 fs::write(dir.join("data.json"), b"{}")?;
1774 let index = KnowledgeBase::open(&dir)?;
1775 fs::remove_dir_all(&dir)?;
1776
1777 assert!(index.pages.is_empty());
1778 assert!(index.index_issues.is_empty());
1779 Ok(())
1780 }
1781
1782 #[test]
1783 fn open_reports_invalid_json_as_issue() -> Result<()> {
1784 let dir = temp_dir_path("bad-json");
1785 fs::create_dir_all(&dir)?;
1786 fs::write(dir.join("broken.lepiter"), b"not json at all")?;
1787 let index = KnowledgeBase::open(&dir)?;
1788 fs::remove_dir_all(&dir)?;
1789
1790 assert!(index.pages.is_empty());
1791 assert_eq!(index.index_issues.len(), 1);
1792 assert!(index.index_issues[0].message.contains("failed to decode"));
1793 Ok(())
1794 }
1795
1796 #[test]
1797 fn open_reports_wrong_json_structure_as_issue() -> Result<()> {
1798 let dir = temp_dir_path("wrong-shape");
1799 fs::create_dir_all(&dir)?;
1800 fs::write(dir.join("array.lepiter"), b"[1, 2, 3]")?;
1801 let index = KnowledgeBase::open(&dir)?;
1802 fs::remove_dir_all(&dir)?;
1803
1804 assert!(index.pages.is_empty());
1805 assert_eq!(index.index_issues.len(), 1);
1806 Ok(())
1807 }
1808
1809 #[test]
1810 fn open_fills_in_defaults_for_minimal_page() -> Result<()> {
1811 let dir = temp_dir_path("minimal");
1812 fs::create_dir_all(&dir)?;
1813 fs::write(dir.join("mypage.lepiter"), b"{}")?;
1814 let index = KnowledgeBase::open(&dir)?;
1815 fs::remove_dir_all(&dir)?;
1816
1817 assert_eq!(index.pages.len(), 1);
1818 let meta = index.pages.values().next().unwrap();
1819 assert_eq!(meta.id, "mypage");
1820 assert_eq!(meta.title, "mypage");
1821 Ok(())
1822 }
1823
1824 #[test]
1825 fn load_page_nonexistent_id_errors() -> Result<()> {
1826 let dir = temp_dir_path("no-such-id");
1827 fs::create_dir_all(&dir)?;
1828 let index = KnowledgeBase::open(&dir)?;
1829 fs::remove_dir_all(&dir)?;
1830
1831 let err = index.load_page("does-not-exist");
1832 assert!(err.is_err());
1833 assert!(format!("{:#}", err.unwrap_err()).contains("page id not found"));
1834 Ok(())
1835 }
1836
1837 #[test]
1838 fn load_page_missing_children_yields_empty_content() -> Result<()> {
1839 let dir = temp_dir_path("no-children");
1840 fs::create_dir_all(&dir)?;
1841 let content = json!({"uid": {"uuid": "pg-1"}, "pageType": {"title": "T"}});
1842 fs::write(dir.join("pg-1.lepiter"), serde_json::to_vec(&content)?)?;
1843 let index = KnowledgeBase::open(&dir)?;
1844 let page = index.load_page("pg-1")?;
1845 fs::remove_dir_all(&dir)?;
1846
1847 assert!(page.content.is_empty());
1848 Ok(())
1849 }
1850}