1use std::collections::HashMap;
23use std::fs::File;
24use std::io::BufReader;
25use std::path::{Path, PathBuf};
26
27use anyhow::{Context, Result};
28use chrono::{DateTime, FixedOffset};
29use serde::Deserialize;
30use serde_json::Value;
31use walkdir::WalkDir;
32
33pub type PageId = String;
35
36#[derive(Debug, Clone)]
38pub struct PageMeta {
39 pub id: PageId,
41 pub title: String,
43 pub path: PathBuf,
45 pub updated_at: Option<DateTime<FixedOffset>>,
47 pub tags: Vec<String>,
49}
50
51#[derive(Debug, Clone)]
53pub struct Page {
54 pub id: PageId,
56 pub title: String,
58 pub updated_at: Option<DateTime<FixedOffset>>,
60 pub tags: Vec<String>,
62 pub content: Vec<Node>,
64}
65
66#[derive(Debug, Clone)]
68pub enum Node {
69 Heading { level: u8, text: String },
71 Paragraph { text: String },
73 Text { text: String },
75 List { items: Vec<Vec<Node>> },
77 Code {
79 language: Option<String>,
80 code: String,
81 },
82 Link { text: String, url: String },
84 Quote { text: String },
86 Rewrite {
88 language: Option<String>,
89 search: String,
90 replace: String,
91 scope: Option<String>,
92 is_method_pattern: Option<bool>,
93 },
94 Unknown { typ: String, raw: Value },
96}
97
98#[derive(Debug, Clone)]
100pub struct ParseIssue {
101 pub path: PathBuf,
103 pub message: String,
105}
106
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
109pub enum SearchMatchKind {
110 Meta,
112 Content,
114}
115
116#[derive(Debug, Clone, PartialEq, Eq)]
118pub struct SearchHit {
119 pub id: PageId,
121 pub kind: SearchMatchKind,
123}
124
125#[derive(Debug, Clone, PartialEq, Eq)]
127pub enum LinkTargetKind {
128 InternalPage(PageId),
130 AttachmentPath(PathBuf),
132 ExternalUrl(String),
134 Unknown(String),
136}
137
138#[derive(Debug, Clone, PartialEq, Eq)]
140pub enum TitleResolution {
141 Unique(PageId),
143 NotFound,
145 Ambiguous(Vec<PageId>),
147}
148
149#[derive(Debug, Clone)]
151pub struct KnowledgeBaseIndex {
152 root: PathBuf,
153 pub pages: HashMap<PageId, PageMeta>,
155 pub index_issues: Vec<ParseIssue>,
157}
158
159pub struct KnowledgeBase;
161
162impl KnowledgeBase {
163 pub fn open(path: impl AsRef<Path>) -> Result<KnowledgeBaseIndex> {
168 let root = path.as_ref().to_path_buf();
169 let mut pages = HashMap::new();
170 let mut issues = Vec::new();
171
172 for entry in WalkDir::new(&root)
173 .min_depth(1)
174 .max_depth(1)
175 .into_iter()
176 .filter_map(|e| e.ok())
177 {
178 let file_type = entry.file_type();
179 let file_path = entry.path();
180 if !file_type.is_file()
181 || file_path.extension().and_then(|e| e.to_str()) != Some("lepiter")
182 {
183 continue;
184 }
185
186 match parse_page_meta(file_path) {
187 Ok(mut meta) => {
188 if meta.id.is_empty()
189 && let Some(stem) = file_path.file_stem().and_then(|s| s.to_str())
190 {
191 meta.id = stem.to_string();
192 }
193 if meta.title.is_empty() {
194 meta.title = meta.id.clone();
195 }
196 pages.insert(meta.id.clone(), meta);
197 }
198 Err(err) => issues.push(ParseIssue {
199 path: file_path.to_path_buf(),
200 message: format!("{err:#}"),
201 }),
202 }
203 }
204
205 Ok(KnowledgeBaseIndex {
206 root,
207 pages,
208 index_issues: issues,
209 })
210 }
211}
212
213impl KnowledgeBaseIndex {
214 pub fn load_page(&self, id: &str) -> Result<Page> {
218 let meta = self
219 .pages
220 .get(id)
221 .with_context(|| format!("page id not found: {id}"))?;
222
223 let file = File::open(&meta.path)
224 .with_context(|| format!("failed to open page file {}", meta.path.display()))?;
225 let reader = BufReader::new(file);
226 let raw: Value =
227 serde_json::from_reader(reader).with_context(|| "failed to decode page JSON")?;
228
229 let mut content = Vec::new();
230 if let Some(items) = raw
231 .get("children")
232 .and_then(|v| v.get("items"))
233 .and_then(Value::as_array)
234 {
235 for item in items {
236 parse_item_recursive(item, &mut content);
237 }
238 }
239
240 Ok(Page {
241 id: meta.id.clone(),
242 title: meta.title.clone(),
243 updated_at: meta.updated_at,
244 tags: meta.tags.clone(),
245 content,
246 })
247 }
248
249 pub fn sorted_pages_by_title(&self) -> Vec<&PageMeta> {
251 let mut pages = self.pages.values().collect::<Vec<_>>();
252 pages.sort_by(|a, b| a.title.to_lowercase().cmp(&b.title.to_lowercase()));
253 pages
254 }
255
256 pub fn filter_page_ids(&self, query: &str) -> Vec<PageId> {
258 let needle = query.trim().to_lowercase();
259 let mut metas = self.sorted_pages_by_title();
260 if !needle.is_empty() {
261 metas.retain(|m| page_meta_matches(m, &needle));
262 }
263 metas.into_iter().map(|m| m.id.clone()).collect()
264 }
265
266 pub fn search_hits(&self, query: &str, include_content: bool) -> Vec<SearchHit> {
268 let needle = query.trim().to_lowercase();
269 if needle.is_empty() {
270 return Vec::new();
271 }
272
273 let mut by_id: HashMap<PageId, SearchMatchKind> = HashMap::new();
274 let metas = self.sorted_pages_by_title();
275
276 for meta in &metas {
277 if page_meta_matches(meta, &needle) {
278 by_id.insert(meta.id.clone(), SearchMatchKind::Meta);
279 }
280 }
281
282 if include_content {
283 for meta in &metas {
284 if by_id.contains_key(&meta.id) {
285 continue;
286 }
287 let Ok(page) = self.load_page(&meta.id) else {
288 continue;
289 };
290 if render_page_to_text(&page).to_lowercase().contains(&needle) {
291 by_id.insert(meta.id.clone(), SearchMatchKind::Content);
292 }
293 }
294 }
295
296 let mut hits = Vec::new();
297 for meta in metas {
298 if let Some(kind) = by_id.get(&meta.id) {
299 hits.push(SearchHit {
300 id: meta.id.clone(),
301 kind: *kind,
302 });
303 }
304 }
305 hits
306 }
307
308 pub fn resolve_page_id_by_title(&self, title: &str) -> TitleResolution {
310 let needle = title.trim().to_lowercase();
311 if needle.is_empty() {
312 return TitleResolution::NotFound;
313 }
314
315 let exact = self
316 .sorted_pages_by_title()
317 .into_iter()
318 .filter(|m| m.title.to_lowercase() == needle)
319 .map(|m| m.id.clone())
320 .collect::<Vec<_>>();
321 match exact.len() {
322 1 => return TitleResolution::Unique(exact[0].clone()),
323 n if n > 1 => return TitleResolution::Ambiguous(exact),
324 _ => {}
325 }
326
327 let partial = self
328 .sorted_pages_by_title()
329 .into_iter()
330 .filter(|m| m.title.to_lowercase().contains(&needle))
331 .map(|m| m.id.clone())
332 .collect::<Vec<_>>();
333 match partial.len() {
334 1 => TitleResolution::Unique(partial[0].clone()),
335 0 => TitleResolution::NotFound,
336 _ => TitleResolution::Ambiguous(partial),
337 }
338 }
339
340 pub fn classify_link_target(&self, raw: &str) -> LinkTargetKind {
342 let target = raw.trim();
343 if target.is_empty() {
344 return LinkTargetKind::Unknown(raw.to_string());
345 }
346
347 if self.pages.contains_key(target) {
348 return LinkTargetKind::InternalPage(target.to_string());
349 }
350
351 if let Some(rest) = target.strip_prefix("page:") {
352 let id = rest.trim();
353 if self.pages.contains_key(id) {
354 return LinkTargetKind::InternalPage(id.to_string());
355 }
356 }
357 if let Some(rest) = target.strip_prefix("title:") {
358 return match self.resolve_page_id_by_title(rest.trim()) {
359 TitleResolution::Unique(id) => LinkTargetKind::InternalPage(id),
360 _ => LinkTargetKind::Unknown(target.to_string()),
361 };
362 }
363
364 if let Some(uuid) = extract_uuid_like(target)
365 && self.pages.contains_key(uuid)
366 {
367 return LinkTargetKind::InternalPage(uuid.to_string());
368 }
369
370 if is_external_target(target) {
371 return LinkTargetKind::ExternalUrl(target.to_string());
372 }
373
374 if let Some(rel) = attachment_relative_path(target) {
375 return LinkTargetKind::AttachmentPath(self.root.join(rel));
376 }
377
378 match self.resolve_page_id_by_title(target) {
379 TitleResolution::Unique(id) => LinkTargetKind::InternalPage(id),
380 _ => LinkTargetKind::Unknown(target.to_string()),
381 }
382 }
383
384 pub fn root(&self) -> &Path {
386 &self.root
387 }
388}
389
390fn page_meta_matches(meta: &PageMeta, needle: &str) -> bool {
391 meta.title.to_lowercase().contains(needle)
392 || meta.id.to_lowercase().contains(needle)
393 || meta.tags.iter().any(|t| t.to_lowercase().contains(needle))
394}
395
396fn is_external_target(target: &str) -> bool {
397 let lower = target.to_lowercase();
398 lower.starts_with("http://")
399 || lower.starts_with("https://")
400 || lower.starts_with("mailto:")
401 || lower.starts_with("file://")
402 || lower.contains("://")
403}
404
405fn attachment_relative_path(target: &str) -> Option<&str> {
406 if let Some(rest) = target.strip_prefix("attachments/") {
407 return Some(rest).map(|_| target);
408 }
409 if let Some(pos) = target.find("/attachments/") {
410 let start = pos + 1;
411 return target.get(start..);
412 }
413 if let Some(pos) = target.find("attachments/") {
414 return target.get(pos..);
415 }
416 None
417}
418
419fn extract_uuid_like(input: &str) -> Option<&str> {
420 let bytes = input.as_bytes();
421 if bytes.len() < 36 {
422 return None;
423 }
424
425 for i in 0..=bytes.len() - 36 {
426 let cand = &input[i..i + 36];
427 let ok = cand.chars().enumerate().all(|(idx, c)| match idx {
428 8 | 13 | 18 | 23 => c == '-',
429 _ => c.is_ascii_hexdigit(),
430 });
431 if ok {
432 return Some(cand);
433 }
434 }
435 None
436}
437
438#[derive(Debug, Deserialize)]
439struct RawMeta {
440 #[serde(default)]
441 uid: Option<RawUid>,
442 #[serde(default)]
443 #[serde(rename = "pageType")]
444 page_type: Option<RawPageType>,
445 #[serde(default)]
446 title: Option<String>,
447 #[serde(default)]
448 #[serde(rename = "editTime")]
449 edit_time: Option<RawEditTime>,
450 #[serde(default)]
451 tags: Option<Value>,
452}
453
454#[derive(Debug, Deserialize)]
455struct RawUid {
456 #[serde(default)]
457 uuid: Option<String>,
458 #[serde(default)]
459 #[serde(rename = "uidString")]
460 uid_string: Option<String>,
461}
462
463#[derive(Debug, Deserialize)]
464struct RawPageType {
465 #[serde(default)]
466 title: Option<String>,
467}
468
469#[derive(Debug, Deserialize)]
470struct RawEditTime {
471 #[serde(default)]
472 time: Option<RawTimeValue>,
473}
474
475#[derive(Debug, Deserialize)]
476struct RawTimeValue {
477 #[serde(default)]
478 #[serde(rename = "dateAndTimeString")]
479 date_and_time_string: Option<String>,
480}
481
482fn parse_page_meta(path: &Path) -> Result<PageMeta> {
483 let file = File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
484 let reader = BufReader::new(file);
485 let raw: RawMeta =
486 serde_json::from_reader(reader).with_context(|| "failed to decode page metadata")?;
487
488 let id = raw
489 .uid
490 .as_ref()
491 .and_then(|u| u.uuid.clone().or_else(|| u.uid_string.clone()))
492 .unwrap_or_default();
493 let title = raw
494 .page_type
495 .and_then(|pt| pt.title)
496 .or(raw.title)
497 .unwrap_or_default();
498 let updated_at = raw
499 .edit_time
500 .and_then(|e| e.time)
501 .and_then(|t| t.date_and_time_string)
502 .and_then(|s| DateTime::parse_from_rfc3339(&s).ok());
503 let tags = parse_tags(raw.tags.as_ref());
504
505 Ok(PageMeta {
506 id,
507 title,
508 path: path.to_path_buf(),
509 updated_at,
510 tags,
511 })
512}
513
514fn parse_tags(value: Option<&Value>) -> Vec<String> {
515 let Some(value) = value else {
516 return Vec::new();
517 };
518 match value {
519 Value::Array(items) => items
520 .iter()
521 .filter_map(|item| {
522 item.as_str()
523 .map(ToOwned::to_owned)
524 .or_else(|| {
525 item.get("name")
526 .and_then(Value::as_str)
527 .map(ToOwned::to_owned)
528 })
529 .or_else(|| {
530 item.get("title")
531 .and_then(Value::as_str)
532 .map(ToOwned::to_owned)
533 })
534 })
535 .collect(),
536 Value::Object(obj) => obj
537 .get("items")
538 .and_then(Value::as_array)
539 .map(|items| {
540 items
541 .iter()
542 .filter_map(|i| {
543 i.get("title")
544 .and_then(Value::as_str)
545 .map(ToOwned::to_owned)
546 })
547 .collect::<Vec<_>>()
548 })
549 .unwrap_or_default(),
550 _ => Vec::new(),
551 }
552}
553
554fn parse_item_recursive(item: &Value, out: &mut Vec<Node>) {
555 let typ = extract_type(item);
556 out.push(parse_node(item));
557 if matches!(typ.as_deref(), Some("listSnippet")) {
558 return;
560 }
561 if let Some(children) = item
562 .get("children")
563 .and_then(|v| v.get("items"))
564 .and_then(Value::as_array)
565 {
566 for child in children {
567 parse_item_recursive(child, out);
568 }
569 }
570}
571
572fn parse_node(item: &Value) -> Node {
573 let typ = extract_type(item);
574
575 match typ.as_deref() {
576 Some("textSnippet") => parse_text_like_node(item),
577 Some("quoteSnippet") | Some("blockQuoteSnippet") | Some("commentSnippet") => Node::Quote {
578 text: extract_text(item).unwrap_or_default(),
579 },
580 Some("listSnippet") => parse_list_node(item),
581 Some("pictureSnippet") => parse_picture_node(item),
582 Some("youtubeSnippet") => parse_youtube_node(item),
583 Some("elementSnippet") => parse_element_node(item),
584 Some("pharoRewrite") => parse_rewrite_node(item),
585 Some("wordSnippet") => parse_word_node(item),
586 Some(
587 t @ ("pharoSnippet"
588 | "pythonSnippet"
589 | "javascriptSnippet"
590 | "shellCommandSnippet"
591 | "gemstoneSnippet"
592 | "exampleSnippet"
593 | "changesSnippet"
594 | "robocoderMetamodelSnippet"),
595 ) => Node::Code {
596 language: infer_language(Some(t)),
597 code: extract_code(item)
598 .or_else(|| extract_text(item))
599 .unwrap_or_default(),
600 },
601 Some(t @ "pharoLinkSnippet") if has_link(item) => Node::Link {
602 text: extract_text(item).unwrap_or_else(|| t.to_string()),
603 url: extract_link(item).unwrap_or_default(),
604 },
605 Some("linkSnippet") if has_link(item) => Node::Link {
606 text: extract_text(item).unwrap_or_else(|| "link".to_string()),
607 url: extract_link(item).unwrap_or_default(),
608 },
609 Some(t) => Node::Unknown {
610 typ: t.to_string(),
611 raw: item.clone(),
612 },
613 None => Node::Unknown {
614 typ: "<missing-type>".to_string(),
615 raw: item.clone(),
616 },
617 }
618}
619
620fn parse_text_like_node(item: &Value) -> Node {
621 let text = extract_text(item).unwrap_or_default();
622 if let Some((level, heading)) = parse_heading(&text) {
623 Node::Heading {
624 level,
625 text: heading,
626 }
627 } else if let Some(stripped) = text.strip_prefix("> ") {
628 Node::Quote {
629 text: stripped.to_string(),
630 }
631 } else if text.trim().is_empty() {
632 Node::Text { text }
633 } else {
634 Node::Paragraph { text }
635 }
636}
637
638fn parse_list_node(item: &Value) -> Node {
639 let mut items = Vec::new();
640 if let Some(children) = item
641 .get("children")
642 .and_then(|v| v.get("items"))
643 .and_then(Value::as_array)
644 {
645 for child in children {
646 items.push(vec![parse_node(child)]);
647 }
648 }
649 Node::List { items }
650}
651
652fn parse_picture_node(item: &Value) -> Node {
653 let url = item
654 .get("url")
655 .and_then(Value::as_str)
656 .map(ToOwned::to_owned)
657 .or_else(|| extract_link(item))
658 .unwrap_or_default();
659 let text = item
660 .get("caption")
661 .and_then(Value::as_str)
662 .map(ToOwned::to_owned)
663 .or_else(|| extract_text(item))
664 .unwrap_or_else(|| "picture".to_string());
665
666 if url.is_empty() {
667 Node::Unknown {
668 typ: "pictureSnippet".to_string(),
669 raw: item.clone(),
670 }
671 } else {
672 Node::Link { text, url }
673 }
674}
675
676fn parse_youtube_node(item: &Value) -> Node {
677 let url = item
678 .get("youtubeUrl")
679 .and_then(Value::as_str)
680 .map(ToOwned::to_owned)
681 .or_else(|| extract_link(item))
682 .unwrap_or_default();
683 let text = extract_text(item).unwrap_or_else(|| "youtube".to_string());
684
685 if url.is_empty() {
686 Node::Unknown {
687 typ: "youtubeSnippet".to_string(),
688 raw: item.clone(),
689 }
690 } else {
691 Node::Link { text, url }
692 }
693}
694
695fn parse_element_node(item: &Value) -> Node {
696 let code = extract_code(item).or_else(|| extract_text(item));
697 if let Some(code) = code.filter(|c| !c.trim().is_empty()) {
698 Node::Code {
699 language: Some("element".to_string()),
700 code,
701 }
702 } else {
703 Node::Unknown {
704 typ: "elementSnippet".to_string(),
705 raw: item.clone(),
706 }
707 }
708}
709
710fn parse_rewrite_node(item: &Value) -> Node {
711 let search = item
712 .get("search")
713 .and_then(Value::as_str)
714 .map(ToOwned::to_owned)
715 .unwrap_or_default();
716 let replace = item
717 .get("replace")
718 .and_then(Value::as_str)
719 .map(ToOwned::to_owned)
720 .unwrap_or_default();
721 let scope = item
722 .get("scope")
723 .and_then(Value::as_str)
724 .map(ToOwned::to_owned);
725 let is_method_pattern = item.get("isMethodPattern").and_then(Value::as_bool);
726
727 if search.is_empty() && replace.is_empty() {
728 Node::Unknown {
729 typ: "pharoRewrite".to_string(),
730 raw: item.clone(),
731 }
732 } else {
733 Node::Rewrite {
734 language: Some("pharo".to_string()),
735 search,
736 replace,
737 scope,
738 is_method_pattern,
739 }
740 }
741}
742
743fn parse_word_node(item: &Value) -> Node {
744 let mut lines = Vec::new();
745
746 if let Some(word) = item
747 .get("wordString")
748 .and_then(Value::as_str)
749 .map(str::trim)
750 .filter(|s| !s.is_empty())
751 {
752 lines.push(word.to_string());
753 }
754
755 if let Some(explanation) = item
756 .get("explanationAttachmentNameString")
757 .and_then(Value::as_str)
758 .map(str::trim)
759 .filter(|s| !s.is_empty())
760 {
761 lines.push(format!("explanation: {explanation}"));
762 }
763
764 if lines.is_empty() {
765 collect_text_fragments(item, &mut lines, 0, 12);
766 }
767
768 lines.retain(|s| !s.trim().is_empty());
769 lines.truncate(8);
770
771 if lines.is_empty() {
772 return Node::Unknown {
773 typ: "wordSnippet".to_string(),
774 raw: item.clone(),
775 };
776 }
777
778 let mut text = lines.join("\n");
779 if text.chars().count() > 1200 {
780 text = text.chars().take(1199).collect::<String>();
781 text.push('…');
782 }
783
784 Node::Paragraph { text }
785}
786
787fn collect_text_fragments(value: &Value, out: &mut Vec<String>, depth: usize, remaining: usize) {
788 if remaining == 0 || out.len() >= remaining || depth > 4 {
789 return;
790 }
791
792 match value {
793 Value::String(s) => {
794 let trimmed = s.trim();
795 if !trimmed.is_empty() {
796 out.push(trimmed.to_string());
797 }
798 }
799 Value::Array(items) => {
800 for item in items {
801 if out.len() >= remaining {
802 break;
803 }
804 collect_text_fragments(item, out, depth + 1, remaining);
805 }
806 }
807 Value::Object(map) => {
808 for (key, item) in map {
809 if matches!(
810 key.as_str(),
811 "__type"
812 | "children"
813 | "uid"
814 | "createEmail"
815 | "createTime"
816 | "editEmail"
817 | "editTime"
818 | "paragraphStyle"
819 ) {
820 continue;
821 }
822 if out.len() >= remaining {
823 break;
824 }
825 collect_text_fragments(item, out, depth + 1, remaining);
826 }
827 }
828 _ => {}
829 }
830}
831
832fn parse_heading(input: &str) -> Option<(u8, String)> {
833 let trimmed = input.trim();
834 let hashes = trimmed.chars().take_while(|c| *c == '#').count();
835 if hashes == 0 {
836 return None;
837 }
838 let rest = trimmed[hashes..].trim_start();
839 if rest.is_empty() {
840 return None;
841 }
842 Some((hashes.min(6) as u8, rest.to_string()))
843}
844
845fn extract_type(item: &Value) -> Option<String> {
846 item.get("type")
847 .and_then(Value::as_str)
848 .map(ToOwned::to_owned)
849 .or_else(|| {
850 item.get("__type")
851 .and_then(Value::as_str)
852 .map(ToOwned::to_owned)
853 })
854}
855
856fn extract_text(item: &Value) -> Option<String> {
857 item.get("string")
858 .and_then(Value::as_str)
859 .map(ToOwned::to_owned)
860 .or_else(|| {
861 item.get("text")
862 .and_then(Value::as_str)
863 .map(ToOwned::to_owned)
864 })
865 .or_else(|| {
866 item.get("content")
867 .and_then(Value::as_str)
868 .map(ToOwned::to_owned)
869 })
870}
871
872fn extract_code(item: &Value) -> Option<String> {
873 item.get("code")
874 .and_then(Value::as_str)
875 .map(ToOwned::to_owned)
876 .or_else(|| {
877 item.get("source")
878 .and_then(Value::as_str)
879 .map(ToOwned::to_owned)
880 })
881}
882
883fn extract_link(item: &Value) -> Option<String> {
884 item.get("url")
885 .and_then(Value::as_str)
886 .map(ToOwned::to_owned)
887 .or_else(|| {
888 item.get("href")
889 .and_then(Value::as_str)
890 .map(ToOwned::to_owned)
891 })
892}
893
894fn has_link(item: &Value) -> bool {
895 item.get("url").and_then(Value::as_str).is_some()
896 || item.get("href").and_then(Value::as_str).is_some()
897}
898
899fn infer_language(typ: Option<&str>) -> Option<String> {
900 let typ = typ?;
901 match typ {
902 "pharoSnippet" => Some("pharo".to_string()),
903 "pythonSnippet" => Some("python".to_string()),
904 "javascriptSnippet" => Some("javascript".to_string()),
905 "jsonSnippet" => Some("json".to_string()),
906 "yamlSnippet" => Some("yaml".to_string()),
907 _ => {
908 if typ.ends_with("Snippet") {
909 Some(typ.trim_end_matches("Snippet").to_lowercase())
910 } else {
911 None
912 }
913 }
914 }
915}
916
917pub fn render_page_to_text(page: &Page) -> String {
919 render_nodes_to_text(&page.content)
920}
921
922pub fn render_nodes_to_text(nodes: &[Node]) -> String {
924 let mut out = String::new();
925 for node in nodes {
926 match node {
927 Node::Heading { level, text } => {
928 out.push_str(&"#".repeat((*level).max(1) as usize));
929 out.push(' ');
930 out.push_str(text);
931 out.push_str("\n\n");
932 }
933 Node::Paragraph { text } => {
934 out.push_str(text);
935 out.push_str("\n\n");
936 }
937 Node::Text { text } => {
938 out.push_str(text);
939 out.push('\n');
940 }
941 Node::List { items } => {
942 for item in items {
943 out.push_str("- ");
944 out.push_str(render_nodes_to_text(item).trim());
945 out.push('\n');
946 }
947 out.push('\n');
948 }
949 Node::Code { language, code } => {
950 out.push_str("```");
951 if let Some(lang) = language {
952 out.push_str(lang);
953 }
954 out.push('\n');
955 out.push_str(code);
956 out.push_str("\n```\n\n");
957 }
958 Node::Link { text, url } => {
959 out.push_str(&format!("[{text}]({url})\n\n"));
960 }
961 Node::Quote { text } => {
962 out.push_str(&format!("> {text}\n\n"));
963 }
964 Node::Rewrite {
965 language,
966 search,
967 replace,
968 scope,
969 is_method_pattern,
970 } => {
971 let lang = language.clone().unwrap_or_else(|| "rewrite".to_string());
972 out.push_str(&format!("```diff {lang}\n"));
973 if let Some(scope) = scope {
974 out.push_str(&format!("# scope: {scope}\n"));
975 }
976 if let Some(is_method_pattern) = is_method_pattern {
977 out.push_str(&format!("# method_pattern: {is_method_pattern}\n"));
978 }
979 for line in normalize_text(search).lines() {
980 out.push('-');
981 out.push_str(line);
982 out.push('\n');
983 }
984 for line in normalize_text(replace).lines() {
985 out.push('+');
986 out.push_str(line);
987 out.push('\n');
988 }
989 out.push_str("```\n\n");
990 }
991 Node::Unknown { typ, .. } => {
992 out.push_str(&format!("[[unknown: {typ}]]\n\n"));
993 }
994 }
995 }
996 out
997}
998
999fn normalize_text(input: &str) -> String {
1000 input.replace("\r\n", "\n").replace('\r', "\n")
1001}
1002
1003pub fn collect_node_types_in_file(path: &Path) -> Result<HashMap<String, usize>> {
1005 let file = File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
1006 let reader = BufReader::new(file);
1007 let raw: Value = serde_json::from_reader(reader).with_context(|| "failed to decode JSON")?;
1008
1009 let mut out = HashMap::new();
1010 collect_node_types_value(&raw, &mut out);
1011 Ok(out)
1012}
1013
1014fn collect_node_types_value(value: &Value, out: &mut HashMap<String, usize>) {
1015 match value {
1016 Value::Object(map) => {
1017 if let Some(typ) = map
1018 .get("type")
1019 .and_then(Value::as_str)
1020 .or_else(|| map.get("__type").and_then(Value::as_str))
1021 {
1022 *out.entry(typ.to_string()).or_insert(0) += 1;
1023 }
1024 for v in map.values() {
1025 collect_node_types_value(v, out);
1026 }
1027 }
1028 Value::Array(items) => {
1029 for item in items {
1030 collect_node_types_value(item, out);
1031 }
1032 }
1033 _ => {}
1034 }
1035}
1036
1037#[cfg(test)]
1038mod tests {
1039 use super::*;
1040 use serde_json::json;
1041 use std::fs;
1042 use std::time::{SystemTime, UNIX_EPOCH};
1043
1044 fn temp_file_path(name: &str) -> PathBuf {
1045 let ts = SystemTime::now()
1046 .duration_since(UNIX_EPOCH)
1047 .expect("time")
1048 .as_nanos();
1049 std::env::temp_dir().join(format!("lepiter-core-{name}-{ts}.lepiter"))
1050 }
1051
1052 #[test]
1053 fn parse_heading_detects_markdown_style() {
1054 assert_eq!(
1055 parse_heading("## Heading"),
1056 Some((2, "Heading".to_string()))
1057 );
1058 assert_eq!(parse_heading("No heading"), None);
1059 }
1060
1061 #[test]
1062 fn parse_tags_supports_array_and_object_items() {
1063 let arr = json!(["a", {"name": "b"}, {"title": "c"}]);
1064 assert_eq!(parse_tags(Some(&arr)), vec!["a", "b", "c"]);
1065
1066 let obj = json!({"items": [{"title":"x"}, {"title":"y"}]});
1067 assert_eq!(parse_tags(Some(&obj)), vec!["x", "y"]);
1068 }
1069
1070 #[test]
1071 fn parse_node_covers_known_and_unknown_types() {
1072 let heading = json!({"__type":"textSnippet","string":"# Title"});
1073 assert!(matches!(parse_node(&heading), Node::Heading { .. }));
1074
1075 let quote = json!({"__type":"blockQuoteSnippet","string":"quoted"});
1076 assert!(matches!(parse_node("e), Node::Quote { .. }));
1077
1078 let code = json!({"__type":"pythonSnippet","code":"print(1)"});
1079 assert!(matches!(parse_node(&code), Node::Code { .. }));
1080
1081 let link = json!({"__type":"pharoLinkSnippet","string":"link","url":"page:abc"});
1082 assert!(matches!(parse_node(&link), Node::Link { .. }));
1083
1084 let picture = json!({"__type":"pictureSnippet","url":"attachments/x.png","caption":"img"});
1085 assert!(matches!(parse_node(&picture), Node::Link { .. }));
1086
1087 let youtube = json!({"__type":"youtubeSnippet","youtubeUrl":"https://youtu.be/abc"});
1088 assert!(matches!(parse_node(&youtube), Node::Link { .. }));
1089
1090 let element = json!({"__type":"elementSnippet","code":"GtInspector newOn: 42"});
1091 assert!(matches!(parse_node(&element), Node::Code { .. }));
1092
1093 let rewrite =
1094 json!({"__type":"pharoRewrite","search":"a","replace":"b","isMethodPattern":true});
1095 assert!(matches!(parse_node(&rewrite), Node::Rewrite { .. }));
1096
1097 let word = json!({"__type":"wordSnippet","wordString":"refactoring"});
1098 assert!(matches!(parse_node(&word), Node::Paragraph { .. }));
1099
1100 let list = json!({
1101 "__type":"listSnippet",
1102 "children":{"items":[{"__type":"textSnippet","string":"item"}]}
1103 });
1104 assert!(matches!(parse_node(&list), Node::List { .. }));
1105
1106 let unknown = json!({"__type":"mysterySnippet","x":1});
1107 assert!(matches!(parse_node(&unknown), Node::Unknown { .. }));
1108
1109 let missing = json!({"x":1});
1110 assert!(matches!(parse_node(&missing), Node::Unknown { .. }));
1111 }
1112
1113 #[test]
1114 fn infer_language_maps_common_snippet_types() {
1115 assert_eq!(
1116 infer_language(Some("pharoSnippet")),
1117 Some("pharo".to_string())
1118 );
1119 assert_eq!(
1120 infer_language(Some("javascriptSnippet")),
1121 Some("javascript".to_string())
1122 );
1123 assert_eq!(
1124 infer_language(Some("yamlSnippet")),
1125 Some("yaml".to_string())
1126 );
1127 assert_eq!(
1128 infer_language(Some("customSnippet")),
1129 Some("custom".to_string())
1130 );
1131 assert_eq!(infer_language(None), None);
1132 }
1133
1134 #[test]
1135 fn render_nodes_outputs_unknown_placeholder() {
1136 let text = render_nodes_to_text(&[
1137 Node::Paragraph {
1138 text: "para".to_string(),
1139 },
1140 Node::Rewrite {
1141 language: Some("pharo".to_string()),
1142 search: "a".to_string(),
1143 replace: "b".to_string(),
1144 scope: None,
1145 is_method_pattern: Some(true),
1146 },
1147 Node::Unknown {
1148 typ: "weird".to_string(),
1149 raw: json!({"a":1}),
1150 },
1151 ]);
1152 assert!(text.contains("para"));
1153 assert!(text.contains("```diff pharo"));
1154 assert!(text.contains("-a"));
1155 assert!(text.contains("+b"));
1156 assert!(text.contains("[[unknown: weird]]"));
1157 }
1158
1159 #[test]
1160 fn collect_node_types_counts_nested_values() -> Result<()> {
1161 let path = temp_file_path("types");
1162 let content = json!({
1163 "__type":"page",
1164 "children":{"__type":"snippets","items":[
1165 {"__type":"textSnippet","children":{"__type":"snippets","items":[]}},
1166 {"__type":"pythonSnippet","code":"print(1)"}
1167 ]}
1168 });
1169 fs::write(&path, serde_json::to_vec(&content)?)?;
1170 let counts = collect_node_types_in_file(&path)?;
1171 fs::remove_file(&path)?;
1172
1173 assert_eq!(counts.get("page"), Some(&1));
1174 assert_eq!(counts.get("textSnippet"), Some(&1));
1175 assert_eq!(counts.get("pythonSnippet"), Some(&1));
1176 Ok(())
1177 }
1178
1179 #[test]
1180 fn parse_page_meta_extracts_core_fields() -> Result<()> {
1181 let path = temp_file_path("meta");
1182 let content = json!({
1183 "uid":{"uuid":"id-123"},
1184 "pageType":{"title":"Title"},
1185 "editTime":{"time":{"dateAndTimeString":"2024-01-01T00:00:00+00:00"}},
1186 "tags":["t1","t2"]
1187 });
1188 fs::write(&path, serde_json::to_vec(&content)?)?;
1189 let meta = parse_page_meta(&path)?;
1190 fs::remove_file(&path)?;
1191
1192 assert_eq!(meta.id, "id-123");
1193 assert_eq!(meta.title, "Title");
1194 assert_eq!(meta.tags, vec!["t1", "t2"]);
1195 assert!(meta.updated_at.is_some());
1196 Ok(())
1197 }
1198
1199 #[test]
1200 fn parse_item_recursive_includes_children() {
1201 let root = json!({
1202 "__type":"textSnippet",
1203 "string":"parent",
1204 "children":{"items":[
1205 {"__type":"textSnippet","string":"child"}
1206 ]}
1207 });
1208 let mut out = Vec::new();
1209 parse_item_recursive(&root, &mut out);
1210 assert_eq!(out.len(), 2);
1211 }
1212
1213 #[test]
1214 fn filter_page_ids_matches_title_id_and_tags() {
1215 let mut pages = HashMap::new();
1216 pages.insert(
1217 "id-1".to_string(),
1218 PageMeta {
1219 id: "id-1".to_string(),
1220 title: "Alpha".to_string(),
1221 path: PathBuf::from("/tmp/a"),
1222 updated_at: None,
1223 tags: vec!["rust".to_string()],
1224 },
1225 );
1226 pages.insert(
1227 "id-2".to_string(),
1228 PageMeta {
1229 id: "id-2".to_string(),
1230 title: "Beta".to_string(),
1231 path: PathBuf::from("/tmp/b"),
1232 updated_at: None,
1233 tags: vec!["pharo".to_string()],
1234 },
1235 );
1236 let index = KnowledgeBaseIndex {
1237 root: PathBuf::from("/tmp"),
1238 pages,
1239 index_issues: Vec::new(),
1240 };
1241
1242 assert_eq!(index.filter_page_ids("alpha"), vec!["id-1".to_string()]);
1243 assert_eq!(index.filter_page_ids("id-2"), vec!["id-2".to_string()]);
1244 assert_eq!(index.filter_page_ids("pharo"), vec!["id-2".to_string()]);
1245 assert_eq!(
1246 index.filter_page_ids(""),
1247 vec!["id-1".to_string(), "id-2".to_string()]
1248 );
1249 }
1250
1251 #[test]
1252 fn resolve_page_id_by_title_handles_unique_ambiguous_and_missing() {
1253 let mut pages = HashMap::new();
1254 pages.insert(
1255 "id-1".to_string(),
1256 PageMeta {
1257 id: "id-1".to_string(),
1258 title: "Alpha".to_string(),
1259 path: PathBuf::from("/tmp/a"),
1260 updated_at: None,
1261 tags: Vec::new(),
1262 },
1263 );
1264 pages.insert(
1265 "id-2".to_string(),
1266 PageMeta {
1267 id: "id-2".to_string(),
1268 title: "Alphabet".to_string(),
1269 path: PathBuf::from("/tmp/b"),
1270 updated_at: None,
1271 tags: Vec::new(),
1272 },
1273 );
1274 let index = KnowledgeBaseIndex {
1275 root: PathBuf::from("/tmp"),
1276 pages,
1277 index_issues: Vec::new(),
1278 };
1279
1280 assert_eq!(
1281 index.resolve_page_id_by_title("Alpha"),
1282 TitleResolution::Unique("id-1".to_string())
1283 );
1284 assert!(matches!(
1285 index.resolve_page_id_by_title("alp"),
1286 TitleResolution::Ambiguous(_)
1287 ));
1288 assert_eq!(
1289 index.resolve_page_id_by_title("zzz"),
1290 TitleResolution::NotFound
1291 );
1292 }
1293
1294 #[test]
1295 fn classify_link_target_covers_internal_attachment_external_unknown() {
1296 let mut pages = HashMap::new();
1297 pages.insert(
1298 "8a505fa0-2222-3333-4444-555555555555".to_string(),
1299 PageMeta {
1300 id: "8a505fa0-2222-3333-4444-555555555555".to_string(),
1301 title: "Alpha".to_string(),
1302 path: PathBuf::from("/tmp/a"),
1303 updated_at: None,
1304 tags: Vec::new(),
1305 },
1306 );
1307 let index = KnowledgeBaseIndex {
1308 root: PathBuf::from("/kb"),
1309 pages,
1310 index_issues: Vec::new(),
1311 };
1312
1313 assert!(matches!(
1314 index.classify_link_target("8a505fa0-2222-3333-4444-555555555555"),
1315 LinkTargetKind::InternalPage(_)
1316 ));
1317 assert!(matches!(
1318 index.classify_link_target("title:alpha"),
1319 LinkTargetKind::InternalPage(_)
1320 ));
1321 assert!(matches!(
1322 index.classify_link_target("go to 8a505fa0-2222-3333-4444-555555555555 now"),
1323 LinkTargetKind::InternalPage(_)
1324 ));
1325 assert!(matches!(
1326 index.classify_link_target("attachments/image.png"),
1327 LinkTargetKind::AttachmentPath(_)
1328 ));
1329 assert!(matches!(
1330 index.classify_link_target("https://example.com"),
1331 LinkTargetKind::ExternalUrl(_)
1332 ));
1333 assert!(matches!(
1334 index.classify_link_target("not a thing"),
1335 LinkTargetKind::Unknown(_)
1336 ));
1337 }
1338
1339 #[test]
1340 fn parse_word_node_extracts_primary_fields() {
1341 let item = json!({
1342 "__type":"wordSnippet",
1343 "wordString":"refactoring",
1344 "explanationAttachmentNameString":"attachments/x/explanation.json"
1345 });
1346 let node = parse_node(&item);
1347 match node {
1348 Node::Paragraph { text } => {
1349 assert!(text.contains("refactoring"));
1350 assert!(text.contains("attachments/x/explanation.json"));
1351 }
1352 other => panic!("expected paragraph, got {other:?}"),
1353 }
1354 }
1355}