Skip to main content

lepiter_core/
lib.rs

1//! core data model and parser for lepiter knowledge bases stored as page json files.
2//!
3//! # scope
4//! - scans a lepiter directory and builds a metadata index keyed by page id.
5//! - loads and parses individual pages lazily by id.
6//! - converts page snippet trees into a stable block-oriented node model.
7//! - preserves unknown node types as [`Node::Unknown`] to keep consumers resilient.
8//! - provides a plugin sdk for external snippet renderers (`plugin` module).
9//!
10//! # example
11//! ```no_run
12//! use lepiter_core::KnowledgeBase;
13//!
14//! # fn main() -> anyhow::Result<()> {
15//! let index = KnowledgeBase::open("./lepiter")?;
16//! for page in index.sorted_pages() {
17//!     println!("{} - {}", page.id, page.title);
18//! }
19//! # Ok(())
20//! # }
21//! ```
22//!
23//! # plugin sdk
24//! ```no_run
25//! use lepiter_core::plugin::{PluginRequest, PluginResponse};
26//! use lepiter_core::lepiter_plugin_main;
27//!
28//! fn handle(req: PluginRequest) -> PluginResponse {
29//!     if req.typ != "wardleyMap" {
30//!         return PluginResponse::error("unsupported type");
31//!     }
32//!     PluginResponse::ok(vec!["example".to_string()])
33//! }
34//!
35//! lepiter_plugin_main!(handle);
36//! ```
37
38use std::collections::HashMap;
39use std::fs::File;
40use std::io::BufReader;
41use std::path::{Component, Path, PathBuf};
42
43use anyhow::{Context, Result};
44use chrono::{DateTime, FixedOffset};
45use serde::Deserialize;
46use serde_json::Value;
47use thiserror::Error;
48use walkdir::WalkDir;
49
50pub mod plugin;
51
52#[macro_export]
53macro_rules! lepiter_plugin_main {
54    ($handler:path) => {
55        fn main() -> std::io::Result<()> {
56            $crate::plugin::plugin_loop($handler)
57        }
58    };
59}
60
61/// Canonical page identifier used throughout the API.
62pub type PageId = String;
63
64/// Metadata for a page discovered during index scanning.
65#[derive(Debug, Clone)]
66pub struct PageMeta {
67    /// Canonical page id (preferred key over filename).
68    pub id: PageId,
69    /// Human-readable page title.
70    pub title: String,
71    /// Pre-computed lowercased title for case-insensitive comparisons.
72    pub title_lower: String,
73    /// Absolute or relative path to the source page file.
74    pub path: PathBuf,
75    /// Last edit timestamp, if present in source metadata.
76    pub updated_at: Option<DateTime<FixedOffset>>,
77    /// Optional page tags extracted from metadata.
78    pub tags: Vec<String>,
79}
80
81/// Fully parsed page content.
82#[derive(Debug, Clone)]
83pub struct Page {
84    /// Canonical page id.
85    pub id: PageId,
86    /// Page title.
87    pub title: String,
88    /// Last edit timestamp, if present.
89    pub updated_at: Option<DateTime<FixedOffset>>,
90    /// Page tags.
91    pub tags: Vec<String>,
92    /// Parsed block-level content.
93    pub content: Vec<Node>,
94}
95
96/// Block-oriented normalized node model used by consumers (e.g. TUI).
97#[derive(Debug, Clone)]
98pub enum Node {
99    /// Markdown-style heading.
100    Heading { level: u8, text: String },
101    /// Paragraph text.
102    Paragraph { text: String },
103    /// Plain text line.
104    Text { text: String },
105    /// List with item nodes.
106    List { items: Vec<Vec<Node>> },
107    /// Code block with optional language.
108    Code {
109        language: Option<String>,
110        code: String,
111    },
112    /// Link block.
113    Link { text: String, url: String },
114    /// Quote block.
115    Quote { text: String },
116    /// Rewrite block (search/replace transformation).
117    Rewrite {
118        language: Option<String>,
119        search: String,
120        replace: String,
121        scope: Option<String>,
122        is_method_pattern: Option<bool>,
123    },
124    /// Unknown/unsupported source node type preserved losslessly.
125    Unknown { typ: String, raw: Value },
126}
127
128/// Parses a single raw snippet JSON value into a [`Node`].
129///
130/// This is a best-effort conversion that preserves unknown snippet types as
131/// [`Node::Unknown`]. It is intended for tooling that operates on raw JSON
132/// without loading a full page.
133pub fn parse_node_from_raw(item: &Value) -> Node {
134    parse_node(item)
135}
136
137/// Non-fatal parse/indexing issue associated with a source file.
138#[derive(Debug, Clone)]
139pub struct ParseIssue {
140    /// File path where the issue occurred.
141    pub path: PathBuf,
142    /// Human-readable error description.
143    pub message: String,
144}
145
146/// Match category for search results.
147#[derive(Debug, Clone, Copy, PartialEq, Eq)]
148pub enum SearchMatchKind {
149    /// Match came from page metadata (title/id/tags).
150    Meta,
151    /// Match came from rendered page content.
152    Content,
153}
154
155/// Search result entry for one page.
156#[derive(Debug, Clone, PartialEq, Eq)]
157pub struct SearchHit {
158    /// Canonical page id.
159    pub id: PageId,
160    /// How this page matched.
161    pub kind: SearchMatchKind,
162}
163
164/// Classification of a raw link target.
165#[derive(Debug, Clone, PartialEq, Eq)]
166pub enum LinkTargetKind {
167    /// Resolved to an internal page id.
168    InternalPage(PageId),
169    /// Resolved to an attachment file path in the knowledge base.
170    AttachmentPath(PathBuf),
171    /// Resolved to an external URL/scheme target.
172    ExternalUrl(String),
173    /// Could not classify target.
174    Unknown(String),
175}
176
177/// Resolved attachment target.
178#[derive(Debug, Clone, PartialEq, Eq)]
179pub struct ResolvedAttachment {
180    /// Full path to the attachment.
181    pub path: PathBuf,
182    /// Whether the attachment exists on disk.
183    pub exists: bool,
184}
185
186/// Attachment resolution failures.
187#[derive(Debug, Error)]
188pub enum AttachmentError {
189    #[error("attachment target was empty")]
190    Empty,
191    #[error("attachment target not recognized: {0}")]
192    NotAttachment(String),
193    #[error("attachment path escapes knowledge base root: {0}")]
194    EscapesRoot(String),
195    #[error("attachment not found: {0}")]
196    Missing(PathBuf),
197}
198
199type AttachmentResult<T> = std::result::Result<T, AttachmentError>;
200
201/// Resolves attachment targets relative to the knowledge base root.
202#[derive(Debug, Clone)]
203pub struct AttachmentResolver {
204    root: PathBuf,
205}
206
207impl AttachmentResolver {
208    /// Creates a resolver rooted at the knowledge base path.
209    pub fn new(root: impl AsRef<Path>) -> Self {
210        Self {
211            root: root.as_ref().to_path_buf(),
212        }
213    }
214
215    /// Resolves an attachment target to a path and existence flag.
216    pub fn resolve(&self, raw: &str) -> AttachmentResult<ResolvedAttachment> {
217        let target = raw.trim();
218        if target.is_empty() {
219            return Err(AttachmentError::Empty);
220        }
221        let rel = extract_attachment_relative(target)
222            .ok_or_else(|| AttachmentError::NotAttachment(target.to_string()))?;
223        let rel = sanitize_relative_path(rel)?;
224        let path = self.root.join(rel);
225        let exists = path.exists();
226        Ok(ResolvedAttachment { path, exists })
227    }
228
229    /// Resolves an attachment target to a path only (ignores missing).
230    pub fn resolve_path(&self, raw: &str) -> Option<PathBuf> {
231        self.resolve(raw).ok().map(|resolved| resolved.path)
232    }
233
234    /// Resolves an attachment target and ensures the file exists.
235    pub fn resolve_existing(&self, raw: &str) -> AttachmentResult<PathBuf> {
236        let resolved = self.resolve(raw)?;
237        if resolved.exists {
238            Ok(resolved.path)
239        } else {
240            Err(AttachmentError::Missing(resolved.path))
241        }
242    }
243
244    /// Returns the resolver root.
245    pub fn root(&self) -> &Path {
246        &self.root
247    }
248}
249
250/// Result of resolving a page by title.
251#[derive(Debug, Clone, PartialEq, Eq)]
252pub enum TitleResolution {
253    /// A unique page id was resolved.
254    Unique(PageId),
255    /// No matching title found.
256    NotFound,
257    /// Multiple candidate page ids matched.
258    Ambiguous(Vec<PageId>),
259}
260
261/// Indexed knowledge base metadata with lazy page loading.
262#[derive(Debug, Clone)]
263pub struct KnowledgeBaseIndex {
264    root: PathBuf,
265    /// Metadata map keyed by canonical page id.
266    pub pages: HashMap<PageId, PageMeta>,
267    /// Page ids in case-insensitive title sort order, computed once at open time.
268    pub sorted_ids: Vec<PageId>,
269    /// Non-fatal issues encountered while scanning metadata.
270    pub index_issues: Vec<ParseIssue>,
271}
272
273/// Entry point for opening a Lepiter knowledge base directory.
274pub struct KnowledgeBase;
275
276impl KnowledgeBase {
277    /// Scans a knowledge base directory and builds a page metadata index.
278    ///
279    /// This operation only reads metadata and does not parse full page content.
280    /// Full parsing is done lazily via [`KnowledgeBaseIndex::load_page`].
281    pub fn open(path: impl AsRef<Path>) -> Result<KnowledgeBaseIndex> {
282        let root = path.as_ref().to_path_buf();
283        let mut pages = HashMap::new();
284        let mut issues = Vec::new();
285
286        for entry in WalkDir::new(&root)
287            .min_depth(1)
288            .max_depth(1)
289            .into_iter()
290            .filter_map(|e| e.ok())
291        {
292            let file_type = entry.file_type();
293            let file_path = entry.path();
294            if !file_type.is_file()
295                || file_path.extension().and_then(|e| e.to_str()) != Some("lepiter")
296            {
297                continue;
298            }
299
300            match parse_page_meta(file_path) {
301                Ok(mut meta) => {
302                    if meta.id.is_empty()
303                        && let Some(stem) = file_path.file_stem().and_then(|s| s.to_str())
304                    {
305                        meta.id = stem.to_string();
306                    }
307                    if meta.title.is_empty() {
308                        meta.title = meta.id.clone();
309                    }
310                    pages.insert(meta.id.clone(), meta);
311                }
312                Err(err) => issues.push(ParseIssue {
313                    path: file_path.to_path_buf(),
314                    message: format!("{err:#}"),
315                }),
316            }
317        }
318
319        let sorted_ids = compute_sorted_ids(&pages);
320
321        Ok(KnowledgeBaseIndex {
322            root,
323            pages,
324            sorted_ids,
325            index_issues: issues,
326        })
327    }
328}
329
330impl KnowledgeBaseIndex {
331    /// Loads and parses a single page by canonical id.
332    ///
333    /// Returns an error if the id is missing from the index or if JSON parsing fails.
334    pub fn load_page(&self, id: &str) -> Result<Page> {
335        let meta = self
336            .pages
337            .get(id)
338            .with_context(|| format!("page id not found: {id}"))?;
339
340        let file = File::open(&meta.path)
341            .with_context(|| format!("failed to open page file {}", meta.path.display()))?;
342        let reader = BufReader::new(file);
343        let raw: Value =
344            serde_json::from_reader(reader).with_context(|| "failed to decode page JSON")?;
345
346        let mut content = Vec::new();
347        if let Some(items) = raw
348            .get("children")
349            .and_then(|v| v.get("items"))
350            .and_then(Value::as_array)
351        {
352            for item in items {
353                parse_item_recursive(item, &mut content);
354            }
355        }
356
357        Ok(Page {
358            id: meta.id.clone(),
359            title: meta.title.clone(),
360            updated_at: meta.updated_at,
361            tags: meta.tags.clone(),
362            content,
363        })
364    }
365
366    /// Returns metadata entries in cached title-sorted order.
367    pub fn sorted_pages(&self) -> Vec<&PageMeta> {
368        self.sorted_ids
369            .iter()
370            .filter_map(|id| self.pages.get(id))
371            .collect()
372    }
373
374    /// Returns page ids filtered by metadata query (title/id/tags), sorted by title.
375    pub fn filter_page_ids(&self, query: &str) -> Vec<PageId> {
376        let needle = query.trim().to_lowercase();
377        let mut metas = self.sorted_pages();
378        if !needle.is_empty() {
379            metas.retain(|m| page_meta_matches(m, &needle));
380        }
381        metas.into_iter().map(|m| m.id.clone()).collect()
382    }
383
384    /// Searches pages by metadata and optionally content, returning sorted hits.
385    pub fn search_hits(&self, query: &str, include_content: bool) -> Vec<SearchHit> {
386        let needle = query.trim().to_lowercase();
387        if needle.is_empty() {
388            return Vec::new();
389        }
390
391        let mut by_id: HashMap<PageId, SearchMatchKind> = HashMap::new();
392        let metas = self.sorted_pages();
393
394        for meta in &metas {
395            if page_meta_matches(meta, &needle) {
396                by_id.insert(meta.id.clone(), SearchMatchKind::Meta);
397            }
398        }
399
400        if include_content {
401            for meta in &metas {
402                if by_id.contains_key(&meta.id) {
403                    continue;
404                }
405                let Ok(page) = self.load_page(&meta.id) else {
406                    continue;
407                };
408                if render_page_to_text(&page).to_lowercase().contains(&needle) {
409                    by_id.insert(meta.id.clone(), SearchMatchKind::Content);
410                }
411            }
412        }
413
414        let mut hits = Vec::new();
415        for meta in metas {
416            if let Some(kind) = by_id.get(&meta.id) {
417                hits.push(SearchHit {
418                    id: meta.id.clone(),
419                    kind: *kind,
420                });
421            }
422        }
423        hits
424    }
425
426    /// Resolves a page id from title using case-insensitive exact match, then partial match.
427    pub fn resolve_page_id_by_title(&self, title: &str) -> TitleResolution {
428        let needle = title.trim().to_lowercase();
429        if needle.is_empty() {
430            return TitleResolution::NotFound;
431        }
432
433        let sorted = self.sorted_pages();
434
435        let exact = sorted
436            .iter()
437            .filter(|m| m.title_lower == needle)
438            .map(|m| m.id.clone())
439            .collect::<Vec<_>>();
440        match exact.len() {
441            1 => return TitleResolution::Unique(exact[0].clone()),
442            n if n > 1 => return TitleResolution::Ambiguous(exact),
443            _ => {}
444        }
445
446        let partial = sorted
447            .iter()
448            .filter(|m| m.title_lower.contains(&needle))
449            .map(|m| m.id.clone())
450            .collect::<Vec<_>>();
451        match partial.len() {
452            1 => TitleResolution::Unique(partial[0].clone()),
453            0 => TitleResolution::NotFound,
454            _ => TitleResolution::Ambiguous(partial),
455        }
456    }
457
458    /// Classifies a raw link target for navigation/open behavior.
459    pub fn classify_link_target(&self, raw: &str) -> LinkTargetKind {
460        let target = raw.trim();
461        if target.is_empty() {
462            return LinkTargetKind::Unknown(raw.to_string());
463        }
464
465        if self.pages.contains_key(target) {
466            return LinkTargetKind::InternalPage(target.to_string());
467        }
468
469        if let Some(rest) = target.strip_prefix("page:") {
470            let id = rest.trim();
471            if self.pages.contains_key(id) {
472                return LinkTargetKind::InternalPage(id.to_string());
473            }
474            if let TitleResolution::Unique(resolved) = self.resolve_page_id_by_title(id) {
475                return LinkTargetKind::InternalPage(resolved);
476            }
477        }
478        if let Some(rest) = target.strip_prefix("title:") {
479            return match self.resolve_page_id_by_title(rest.trim()) {
480                TitleResolution::Unique(id) => LinkTargetKind::InternalPage(id),
481                _ => LinkTargetKind::Unknown(target.to_string()),
482            };
483        }
484
485        if let Some(uuid) = extract_uuid_like(target)
486            && self.pages.contains_key(uuid)
487        {
488            return LinkTargetKind::InternalPage(uuid.to_string());
489        }
490
491        if is_external_target(target) {
492            return LinkTargetKind::ExternalUrl(target.to_string());
493        }
494
495        if let Some(path) = self.attachment_resolver().resolve_path(target) {
496            return LinkTargetKind::AttachmentPath(path);
497        }
498
499        match self.resolve_page_id_by_title(target) {
500            TitleResolution::Unique(id) => LinkTargetKind::InternalPage(id),
501            _ => LinkTargetKind::Unknown(target.to_string()),
502        }
503    }
504
505    /// Returns the root path used to build this index.
506    pub fn root(&self) -> &Path {
507        &self.root
508    }
509
510    /// Returns an attachment resolver rooted at this knowledge base.
511    pub fn attachment_resolver(&self) -> AttachmentResolver {
512        AttachmentResolver::new(&self.root)
513    }
514}
515
516fn compute_sorted_ids(pages: &HashMap<PageId, PageMeta>) -> Vec<PageId> {
517    let mut entries: Vec<_> = pages.values().collect();
518    entries.sort_by(|a, b| a.title_lower.cmp(&b.title_lower));
519    entries.into_iter().map(|m| m.id.clone()).collect()
520}
521
522fn page_meta_matches(meta: &PageMeta, needle: &str) -> bool {
523    meta.title_lower.contains(needle)
524        || meta.id.to_lowercase().contains(needle)
525        || meta.tags.iter().any(|t| t.to_lowercase().contains(needle))
526}
527
528fn is_external_target(target: &str) -> bool {
529    let lower = target.to_lowercase();
530    lower.starts_with("http://")
531        || lower.starts_with("https://")
532        || lower.starts_with("mailto:")
533        || lower.starts_with("file://")
534        || lower.contains("://")
535}
536
537fn extract_attachment_relative(target: &str) -> Option<&str> {
538    if let Some(rest) = target.strip_prefix("attachments/") {
539        return Some(rest).map(|_| target);
540    }
541    if let Some(pos) = target.find("/attachments/") {
542        let start = pos + 1;
543        return target.get(start..);
544    }
545    if let Some(pos) = target.find("attachments/") {
546        return target.get(pos..);
547    }
548    None
549}
550
551fn sanitize_relative_path(rel: &str) -> AttachmentResult<PathBuf> {
552    let rel = rel.trim();
553    if rel.is_empty() {
554        return Err(AttachmentError::Empty);
555    }
556    let path = Path::new(rel);
557    if path.is_absolute() {
558        return Err(AttachmentError::EscapesRoot(rel.to_string()));
559    }
560    for comp in path.components() {
561        match comp {
562            Component::Prefix(_) | Component::RootDir | Component::ParentDir => {
563                return Err(AttachmentError::EscapesRoot(rel.to_string()));
564            }
565            _ => {}
566        }
567    }
568    Ok(path.to_path_buf())
569}
570
571fn extract_uuid_like(input: &str) -> Option<&str> {
572    let bytes = input.as_bytes();
573    if bytes.len() < 36 {
574        return None;
575    }
576
577    for i in 0..=bytes.len() - 36 {
578        let cand = &input[i..i + 36];
579        let ok = cand.chars().enumerate().all(|(idx, c)| match idx {
580            8 | 13 | 18 | 23 => c == '-',
581            _ => c.is_ascii_hexdigit(),
582        });
583        if ok {
584            return Some(cand);
585        }
586    }
587    None
588}
589
590#[derive(Debug, Deserialize)]
591struct RawMeta {
592    #[serde(default)]
593    uid: Option<RawUid>,
594    #[serde(default)]
595    #[serde(rename = "pageType")]
596    page_type: Option<RawPageType>,
597    #[serde(default)]
598    title: Option<String>,
599    #[serde(default)]
600    #[serde(rename = "editTime")]
601    edit_time: Option<RawEditTime>,
602    #[serde(default)]
603    tags: Option<Value>,
604}
605
606#[derive(Debug, Deserialize)]
607struct RawUid {
608    #[serde(default)]
609    uuid: Option<String>,
610    #[serde(default)]
611    #[serde(rename = "uidString")]
612    uid_string: Option<String>,
613}
614
615#[derive(Debug, Deserialize)]
616struct RawPageType {
617    #[serde(default)]
618    title: Option<String>,
619}
620
621#[derive(Debug, Deserialize)]
622struct RawEditTime {
623    #[serde(default)]
624    time: Option<RawTimeValue>,
625}
626
627#[derive(Debug, Deserialize)]
628struct RawTimeValue {
629    #[serde(default)]
630    #[serde(rename = "dateAndTimeString")]
631    date_and_time_string: Option<String>,
632}
633
634fn parse_page_meta(path: &Path) -> Result<PageMeta> {
635    let file = File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
636    let reader = BufReader::new(file);
637    let raw: RawMeta =
638        serde_json::from_reader(reader).with_context(|| "failed to decode page metadata")?;
639
640    let id = raw
641        .uid
642        .as_ref()
643        .and_then(|u| u.uuid.clone().or_else(|| u.uid_string.clone()))
644        .unwrap_or_default();
645    let title = raw
646        .page_type
647        .and_then(|pt| pt.title)
648        .or(raw.title)
649        .unwrap_or_default();
650    let updated_at = raw
651        .edit_time
652        .and_then(|e| e.time)
653        .and_then(|t| t.date_and_time_string)
654        .and_then(|s| DateTime::parse_from_rfc3339(&s).ok());
655    let tags = parse_tags(raw.tags.as_ref());
656
657    let title_lower = title.to_lowercase();
658    Ok(PageMeta {
659        id,
660        title,
661        title_lower,
662        path: path.to_path_buf(),
663        updated_at,
664        tags,
665    })
666}
667
668fn parse_tags(value: Option<&Value>) -> Vec<String> {
669    let Some(value) = value else {
670        return Vec::new();
671    };
672    match value {
673        Value::Array(items) => items
674            .iter()
675            .filter_map(|item| {
676                item.as_str()
677                    .map(ToOwned::to_owned)
678                    .or_else(|| {
679                        item.get("name")
680                            .and_then(Value::as_str)
681                            .map(ToOwned::to_owned)
682                    })
683                    .or_else(|| {
684                        item.get("title")
685                            .and_then(Value::as_str)
686                            .map(ToOwned::to_owned)
687                    })
688            })
689            .collect(),
690        Value::Object(obj) => obj
691            .get("items")
692            .and_then(Value::as_array)
693            .map(|items| {
694                items
695                    .iter()
696                    .filter_map(|i| {
697                        i.get("title")
698                            .and_then(Value::as_str)
699                            .map(ToOwned::to_owned)
700                    })
701                    .collect::<Vec<_>>()
702            })
703            .unwrap_or_default(),
704        _ => Vec::new(),
705    }
706}
707
708fn parse_item_recursive(item: &Value, out: &mut Vec<Node>) {
709    let typ = extract_type(item);
710    out.push(parse_node(item));
711    if matches!(typ.as_deref(), Some("listSnippet")) {
712        // list snippets already materialize children into Node::List items.
713        return;
714    }
715    if let Some(children) = item
716        .get("children")
717        .and_then(|v| v.get("items"))
718        .and_then(Value::as_array)
719    {
720        for child in children {
721            parse_item_recursive(child, out);
722        }
723    }
724}
725
726fn parse_node(item: &Value) -> Node {
727    let typ = extract_type(item);
728
729    match typ.as_deref() {
730        Some("textSnippet") => parse_text_like_node(item),
731        Some("quoteSnippet") | Some("blockQuoteSnippet") | Some("commentSnippet") => Node::Quote {
732            text: extract_text(item).unwrap_or_default(),
733        },
734        Some("listSnippet") => parse_list_node(item),
735        Some("pictureSnippet") => parse_picture_node(item),
736        Some("youtubeSnippet") => parse_youtube_node(item),
737        Some("elementSnippet") => parse_element_node(item),
738        Some("pharoRewrite") => parse_rewrite_node(item),
739        Some("wordSnippet") => parse_word_node(item),
740        Some(
741            t @ ("pharoSnippet"
742            | "pythonSnippet"
743            | "javascriptSnippet"
744            | "shellCommandSnippet"
745            | "gemstoneSnippet"
746            | "exampleSnippet"
747            | "changesSnippet"
748            | "robocoderMetamodelSnippet"),
749        ) => Node::Code {
750            language: infer_language(Some(t)),
751            code: extract_code(item)
752                .or_else(|| extract_text(item))
753                .unwrap_or_default(),
754        },
755        Some(t @ "pharoLinkSnippet") if has_link(item) => Node::Link {
756            text: extract_text(item).unwrap_or_else(|| t.to_string()),
757            url: extract_link(item).unwrap_or_default(),
758        },
759        Some("linkSnippet") if has_link(item) => Node::Link {
760            text: extract_text(item).unwrap_or_else(|| "link".to_string()),
761            url: extract_link(item).unwrap_or_default(),
762        },
763        Some(t) => Node::Unknown {
764            typ: t.to_string(),
765            raw: item.clone(),
766        },
767        None => Node::Unknown {
768            typ: "<missing-type>".to_string(),
769            raw: item.clone(),
770        },
771    }
772}
773
774fn parse_text_like_node(item: &Value) -> Node {
775    let text = extract_text(item).unwrap_or_default();
776    if let Some((level, heading)) = parse_heading(&text) {
777        Node::Heading {
778            level,
779            text: heading,
780        }
781    } else if let Some(stripped) = text.strip_prefix("> ") {
782        Node::Quote {
783            text: stripped.to_string(),
784        }
785    } else if text.trim().is_empty() {
786        Node::Text { text }
787    } else {
788        Node::Paragraph { text }
789    }
790}
791
792fn parse_list_node(item: &Value) -> Node {
793    let mut items = Vec::new();
794    if let Some(children) = item
795        .get("children")
796        .and_then(|v| v.get("items"))
797        .and_then(Value::as_array)
798    {
799        for child in children {
800            items.push(vec![parse_node(child)]);
801        }
802    }
803    Node::List { items }
804}
805
806fn parse_picture_node(item: &Value) -> Node {
807    let url = item
808        .get("url")
809        .and_then(Value::as_str)
810        .map(ToOwned::to_owned)
811        .or_else(|| extract_link(item))
812        .unwrap_or_default();
813    let text = item
814        .get("caption")
815        .and_then(Value::as_str)
816        .map(ToOwned::to_owned)
817        .or_else(|| extract_text(item))
818        .unwrap_or_else(|| "picture".to_string());
819
820    if url.is_empty() {
821        Node::Unknown {
822            typ: "pictureSnippet".to_string(),
823            raw: item.clone(),
824        }
825    } else {
826        Node::Link { text, url }
827    }
828}
829
830fn parse_youtube_node(item: &Value) -> Node {
831    let url = item
832        .get("youtubeUrl")
833        .and_then(Value::as_str)
834        .map(ToOwned::to_owned)
835        .or_else(|| extract_link(item))
836        .unwrap_or_default();
837    let text = extract_text(item).unwrap_or_else(|| "youtube".to_string());
838
839    if url.is_empty() {
840        Node::Unknown {
841            typ: "youtubeSnippet".to_string(),
842            raw: item.clone(),
843        }
844    } else {
845        Node::Link { text, url }
846    }
847}
848
849fn parse_element_node(item: &Value) -> Node {
850    let code = extract_code(item).or_else(|| extract_text(item));
851    if let Some(code) = code.filter(|c| !c.trim().is_empty()) {
852        Node::Code {
853            language: Some("element".to_string()),
854            code,
855        }
856    } else {
857        Node::Unknown {
858            typ: "elementSnippet".to_string(),
859            raw: item.clone(),
860        }
861    }
862}
863
864fn parse_rewrite_node(item: &Value) -> Node {
865    let search = item
866        .get("search")
867        .and_then(Value::as_str)
868        .map(ToOwned::to_owned)
869        .unwrap_or_default();
870    let replace = item
871        .get("replace")
872        .and_then(Value::as_str)
873        .map(ToOwned::to_owned)
874        .unwrap_or_default();
875    let scope = item
876        .get("scope")
877        .and_then(Value::as_str)
878        .map(ToOwned::to_owned);
879    let is_method_pattern = item.get("isMethodPattern").and_then(Value::as_bool);
880
881    if search.is_empty() && replace.is_empty() {
882        Node::Unknown {
883            typ: "pharoRewrite".to_string(),
884            raw: item.clone(),
885        }
886    } else {
887        Node::Rewrite {
888            language: Some("pharo".to_string()),
889            search,
890            replace,
891            scope,
892            is_method_pattern,
893        }
894    }
895}
896
897fn parse_word_node(item: &Value) -> Node {
898    let mut lines = Vec::new();
899
900    if let Some(word) = item
901        .get("wordString")
902        .and_then(Value::as_str)
903        .map(str::trim)
904        .filter(|s| !s.is_empty())
905    {
906        lines.push(word.to_string());
907    }
908
909    if let Some(explanation) = item
910        .get("explanationAttachmentNameString")
911        .and_then(Value::as_str)
912        .map(str::trim)
913        .filter(|s| !s.is_empty())
914    {
915        lines.push(format!("explanation: {explanation}"));
916    }
917
918    if lines.is_empty() {
919        collect_text_fragments(item, &mut lines, 0, 12);
920    }
921
922    lines.retain(|s| !s.trim().is_empty());
923    lines.truncate(8);
924
925    if lines.is_empty() {
926        return Node::Unknown {
927            typ: "wordSnippet".to_string(),
928            raw: item.clone(),
929        };
930    }
931
932    let mut text = lines.join("\n");
933    if text.chars().count() > 1200 {
934        text = text.chars().take(1199).collect::<String>();
935        text.push('…');
936    }
937
938    Node::Paragraph { text }
939}
940
941fn collect_text_fragments(value: &Value, out: &mut Vec<String>, depth: usize, remaining: usize) {
942    if remaining == 0 || out.len() >= remaining || depth > 4 {
943        return;
944    }
945
946    match value {
947        Value::String(s) => {
948            let trimmed = s.trim();
949            if !trimmed.is_empty() {
950                out.push(trimmed.to_string());
951            }
952        }
953        Value::Array(items) => {
954            for item in items {
955                if out.len() >= remaining {
956                    break;
957                }
958                collect_text_fragments(item, out, depth + 1, remaining);
959            }
960        }
961        Value::Object(map) => {
962            for (key, item) in map {
963                if matches!(
964                    key.as_str(),
965                    "__type"
966                        | "children"
967                        | "uid"
968                        | "createEmail"
969                        | "createTime"
970                        | "editEmail"
971                        | "editTime"
972                        | "paragraphStyle"
973                ) {
974                    continue;
975                }
976                if out.len() >= remaining {
977                    break;
978                }
979                collect_text_fragments(item, out, depth + 1, remaining);
980            }
981        }
982        _ => {}
983    }
984}
985
986fn parse_heading(input: &str) -> Option<(u8, String)> {
987    let trimmed = input.trim();
988    let hashes = trimmed.chars().take_while(|c| *c == '#').count();
989    if hashes == 0 {
990        return None;
991    }
992    let rest = trimmed[hashes..].trim_start();
993    if rest.is_empty() {
994        return None;
995    }
996    Some((hashes.min(6) as u8, rest.to_string()))
997}
998
999fn extract_type(item: &Value) -> Option<String> {
1000    item.get("type")
1001        .and_then(Value::as_str)
1002        .map(ToOwned::to_owned)
1003        .or_else(|| {
1004            item.get("__type")
1005                .and_then(Value::as_str)
1006                .map(ToOwned::to_owned)
1007        })
1008}
1009
1010fn extract_text(item: &Value) -> Option<String> {
1011    item.get("string")
1012        .and_then(Value::as_str)
1013        .map(ToOwned::to_owned)
1014        .or_else(|| {
1015            item.get("text")
1016                .and_then(Value::as_str)
1017                .map(ToOwned::to_owned)
1018        })
1019        .or_else(|| {
1020            item.get("content")
1021                .and_then(Value::as_str)
1022                .map(ToOwned::to_owned)
1023        })
1024}
1025
1026fn extract_code(item: &Value) -> Option<String> {
1027    item.get("code")
1028        .and_then(Value::as_str)
1029        .map(ToOwned::to_owned)
1030        .or_else(|| {
1031            item.get("source")
1032                .and_then(Value::as_str)
1033                .map(ToOwned::to_owned)
1034        })
1035}
1036
1037fn extract_link(item: &Value) -> Option<String> {
1038    item.get("url")
1039        .and_then(Value::as_str)
1040        .map(ToOwned::to_owned)
1041        .or_else(|| {
1042            item.get("href")
1043                .and_then(Value::as_str)
1044                .map(ToOwned::to_owned)
1045        })
1046}
1047
1048fn has_link(item: &Value) -> bool {
1049    item.get("url").and_then(Value::as_str).is_some()
1050        || item.get("href").and_then(Value::as_str).is_some()
1051}
1052
1053fn infer_language(typ: Option<&str>) -> Option<String> {
1054    let typ = typ?;
1055    match typ {
1056        "pharoSnippet" => Some("pharo".to_string()),
1057        "pythonSnippet" => Some("python".to_string()),
1058        "javascriptSnippet" => Some("javascript".to_string()),
1059        "jsonSnippet" => Some("json".to_string()),
1060        "yamlSnippet" => Some("yaml".to_string()),
1061        _ => {
1062            if typ.ends_with("Snippet") {
1063                Some(typ.trim_end_matches("Snippet").to_lowercase())
1064            } else {
1065                None
1066            }
1067        }
1068    }
1069}
1070
1071/// Renders a parsed page to plain text.
1072pub fn render_page_to_text(page: &Page) -> String {
1073    render_nodes_to_text(&page.content)
1074}
1075
1076/// Renders normalized nodes to plain text.
1077pub fn render_nodes_to_text(nodes: &[Node]) -> String {
1078    let mut out = String::new();
1079    for node in nodes {
1080        match node {
1081            Node::Heading { level, text } => {
1082                out.push_str(&"#".repeat((*level).max(1) as usize));
1083                out.push(' ');
1084                out.push_str(text);
1085                out.push_str("\n\n");
1086            }
1087            Node::Paragraph { text } => {
1088                out.push_str(text);
1089                out.push_str("\n\n");
1090            }
1091            Node::Text { text } => {
1092                out.push_str(text);
1093                out.push('\n');
1094            }
1095            Node::List { items } => {
1096                for item in items {
1097                    out.push_str("- ");
1098                    out.push_str(render_nodes_to_text(item).trim());
1099                    out.push('\n');
1100                }
1101                out.push('\n');
1102            }
1103            Node::Code { language, code } => {
1104                out.push_str("```");
1105                if let Some(lang) = language {
1106                    out.push_str(lang);
1107                }
1108                out.push('\n');
1109                out.push_str(code);
1110                out.push_str("\n```\n\n");
1111            }
1112            Node::Link { text, url } => {
1113                out.push_str(&format!("[{text}]({url})\n\n"));
1114            }
1115            Node::Quote { text } => {
1116                out.push_str(&format!("> {text}\n\n"));
1117            }
1118            Node::Rewrite {
1119                language,
1120                search,
1121                replace,
1122                scope,
1123                is_method_pattern,
1124            } => {
1125                let lang = language.clone().unwrap_or_else(|| "rewrite".to_string());
1126                out.push_str(&format!("```diff {lang}\n"));
1127                if let Some(scope) = scope {
1128                    out.push_str(&format!("# scope: {scope}\n"));
1129                }
1130                if let Some(is_method_pattern) = is_method_pattern {
1131                    out.push_str(&format!("# method_pattern: {is_method_pattern}\n"));
1132                }
1133                for line in normalize_text(search).lines() {
1134                    out.push('-');
1135                    out.push_str(line);
1136                    out.push('\n');
1137                }
1138                for line in normalize_text(replace).lines() {
1139                    out.push('+');
1140                    out.push_str(line);
1141                    out.push('\n');
1142                }
1143                out.push_str("```\n\n");
1144            }
1145            Node::Unknown { typ, .. } => {
1146                out.push_str(&format!("[[unknown: {typ}]]\n\n"));
1147            }
1148        }
1149    }
1150    out
1151}
1152
1153pub fn normalize_text(input: &str) -> String {
1154    input.replace("\r\n", "\n").replace('\r', "\n")
1155}
1156
1157/// Collects all observed `type`/`__type` values and their counts in one page file.
1158pub fn collect_node_types_in_file(path: &Path) -> Result<HashMap<String, usize>> {
1159    let file = File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
1160    let reader = BufReader::new(file);
1161    let raw: Value = serde_json::from_reader(reader).with_context(|| "failed to decode JSON")?;
1162
1163    let mut out = HashMap::new();
1164    collect_node_types_value(&raw, &mut out);
1165    Ok(out)
1166}
1167
1168fn collect_node_types_value(value: &Value, out: &mut HashMap<String, usize>) {
1169    match value {
1170        Value::Object(map) => {
1171            if let Some(typ) = map
1172                .get("type")
1173                .and_then(Value::as_str)
1174                .or_else(|| map.get("__type").and_then(Value::as_str))
1175            {
1176                *out.entry(typ.to_string()).or_insert(0) += 1;
1177            }
1178            for v in map.values() {
1179                collect_node_types_value(v, out);
1180            }
1181        }
1182        Value::Array(items) => {
1183            for item in items {
1184                collect_node_types_value(item, out);
1185            }
1186        }
1187        _ => {}
1188    }
1189}
1190
1191#[cfg(test)]
1192mod tests {
1193    use super::*;
1194    use serde_json::json;
1195    use std::fs;
1196    use std::time::{SystemTime, UNIX_EPOCH};
1197
1198    fn temp_file_path(name: &str) -> PathBuf {
1199        let ts = SystemTime::now()
1200            .duration_since(UNIX_EPOCH)
1201            .expect("time")
1202            .as_nanos();
1203        std::env::temp_dir().join(format!("lepiter-core-{name}-{ts}.lepiter"))
1204    }
1205
1206    fn temp_dir_path(name: &str) -> PathBuf {
1207        let ts = SystemTime::now()
1208            .duration_since(UNIX_EPOCH)
1209            .expect("time")
1210            .as_nanos();
1211        std::env::temp_dir().join(format!("lepiter-core-{name}-{ts}"))
1212    }
1213
1214    #[test]
1215    fn parse_heading_detects_markdown_style() {
1216        assert_eq!(
1217            parse_heading("## Heading"),
1218            Some((2, "Heading".to_string()))
1219        );
1220        assert_eq!(parse_heading("No heading"), None);
1221    }
1222
1223    #[test]
1224    fn parse_tags_supports_array_and_object_items() {
1225        let arr = json!(["a", {"name": "b"}, {"title": "c"}]);
1226        assert_eq!(parse_tags(Some(&arr)), vec!["a", "b", "c"]);
1227
1228        let obj = json!({"items": [{"title":"x"}, {"title":"y"}]});
1229        assert_eq!(parse_tags(Some(&obj)), vec!["x", "y"]);
1230    }
1231
1232    #[test]
1233    fn parse_node_covers_known_and_unknown_types() {
1234        let heading = json!({"__type":"textSnippet","string":"# Title"});
1235        assert!(matches!(parse_node(&heading), Node::Heading { .. }));
1236
1237        let quote = json!({"__type":"blockQuoteSnippet","string":"quoted"});
1238        assert!(matches!(parse_node(&quote), Node::Quote { .. }));
1239
1240        let code = json!({"__type":"pythonSnippet","code":"print(1)"});
1241        assert!(matches!(parse_node(&code), Node::Code { .. }));
1242
1243        let link = json!({"__type":"pharoLinkSnippet","string":"link","url":"page:abc"});
1244        assert!(matches!(parse_node(&link), Node::Link { .. }));
1245
1246        let picture = json!({"__type":"pictureSnippet","url":"attachments/x.png","caption":"img"});
1247        assert!(matches!(parse_node(&picture), Node::Link { .. }));
1248
1249        let youtube = json!({"__type":"youtubeSnippet","youtubeUrl":"https://youtu.be/abc"});
1250        assert!(matches!(parse_node(&youtube), Node::Link { .. }));
1251
1252        let element = json!({"__type":"elementSnippet","code":"GtInspector newOn: 42"});
1253        assert!(matches!(parse_node(&element), Node::Code { .. }));
1254
1255        let rewrite =
1256            json!({"__type":"pharoRewrite","search":"a","replace":"b","isMethodPattern":true});
1257        assert!(matches!(parse_node(&rewrite), Node::Rewrite { .. }));
1258
1259        let word = json!({"__type":"wordSnippet","wordString":"refactoring"});
1260        assert!(matches!(parse_node(&word), Node::Paragraph { .. }));
1261
1262        let list = json!({
1263            "__type":"listSnippet",
1264            "children":{"items":[{"__type":"textSnippet","string":"item"}]}
1265        });
1266        assert!(matches!(parse_node(&list), Node::List { .. }));
1267
1268        let unknown = json!({"__type":"mysterySnippet","x":1});
1269        assert!(matches!(parse_node(&unknown), Node::Unknown { .. }));
1270
1271        let missing = json!({"x":1});
1272        assert!(matches!(parse_node(&missing), Node::Unknown { .. }));
1273    }
1274
1275    #[test]
1276    fn infer_language_maps_common_snippet_types() {
1277        assert_eq!(
1278            infer_language(Some("pharoSnippet")),
1279            Some("pharo".to_string())
1280        );
1281        assert_eq!(
1282            infer_language(Some("javascriptSnippet")),
1283            Some("javascript".to_string())
1284        );
1285        assert_eq!(
1286            infer_language(Some("yamlSnippet")),
1287            Some("yaml".to_string())
1288        );
1289        assert_eq!(
1290            infer_language(Some("customSnippet")),
1291            Some("custom".to_string())
1292        );
1293        assert_eq!(infer_language(None), None);
1294    }
1295
1296    #[test]
1297    fn render_nodes_outputs_unknown_placeholder() {
1298        let text = render_nodes_to_text(&[
1299            Node::Paragraph {
1300                text: "para".to_string(),
1301            },
1302            Node::Rewrite {
1303                language: Some("pharo".to_string()),
1304                search: "a".to_string(),
1305                replace: "b".to_string(),
1306                scope: None,
1307                is_method_pattern: Some(true),
1308            },
1309            Node::Unknown {
1310                typ: "weird".to_string(),
1311                raw: json!({"a":1}),
1312            },
1313        ]);
1314        assert!(text.contains("para"));
1315        assert!(text.contains("```diff pharo"));
1316        assert!(text.contains("-a"));
1317        assert!(text.contains("+b"));
1318        assert!(text.contains("[[unknown: weird]]"));
1319    }
1320
1321    #[test]
1322    fn collect_node_types_counts_nested_values() -> Result<()> {
1323        let path = temp_file_path("types");
1324        let content = json!({
1325            "__type":"page",
1326            "children":{"__type":"snippets","items":[
1327                {"__type":"textSnippet","children":{"__type":"snippets","items":[]}},
1328                {"__type":"pythonSnippet","code":"print(1)"}
1329            ]}
1330        });
1331        fs::write(&path, serde_json::to_vec(&content)?)?;
1332        let counts = collect_node_types_in_file(&path)?;
1333        fs::remove_file(&path)?;
1334
1335        assert_eq!(counts.get("page"), Some(&1));
1336        assert_eq!(counts.get("textSnippet"), Some(&1));
1337        assert_eq!(counts.get("pythonSnippet"), Some(&1));
1338        Ok(())
1339    }
1340
1341    #[test]
1342    fn parse_page_meta_extracts_core_fields() -> Result<()> {
1343        let path = temp_file_path("meta");
1344        let content = json!({
1345            "uid":{"uuid":"id-123"},
1346            "pageType":{"title":"Title"},
1347            "editTime":{"time":{"dateAndTimeString":"2024-01-01T00:00:00+00:00"}},
1348            "tags":["t1","t2"]
1349        });
1350        fs::write(&path, serde_json::to_vec(&content)?)?;
1351        let meta = parse_page_meta(&path)?;
1352        fs::remove_file(&path)?;
1353
1354        assert_eq!(meta.id, "id-123");
1355        assert_eq!(meta.title, "Title");
1356        assert_eq!(meta.tags, vec!["t1", "t2"]);
1357        assert!(meta.updated_at.is_some());
1358        Ok(())
1359    }
1360
1361    #[test]
1362    fn parse_item_recursive_includes_children() {
1363        let root = json!({
1364            "__type":"textSnippet",
1365            "string":"parent",
1366            "children":{"items":[
1367                {"__type":"textSnippet","string":"child"}
1368            ]}
1369        });
1370        let mut out = Vec::new();
1371        parse_item_recursive(&root, &mut out);
1372        assert_eq!(out.len(), 2);
1373    }
1374
1375    #[test]
1376    fn filter_page_ids_matches_title_id_and_tags() {
1377        let mut pages = HashMap::new();
1378        pages.insert(
1379            "id-1".to_string(),
1380            PageMeta {
1381                id: "id-1".to_string(),
1382                title: "Alpha".to_string(),
1383                title_lower: "alpha".to_string(),
1384                path: PathBuf::from("/tmp/a"),
1385                updated_at: None,
1386                tags: vec!["rust".to_string()],
1387            },
1388        );
1389        pages.insert(
1390            "id-2".to_string(),
1391            PageMeta {
1392                id: "id-2".to_string(),
1393                title: "Beta".to_string(),
1394                title_lower: "beta".to_string(),
1395                path: PathBuf::from("/tmp/b"),
1396                updated_at: None,
1397                tags: vec!["pharo".to_string()],
1398            },
1399        );
1400        let sorted_ids = compute_sorted_ids(&pages);
1401        let index = KnowledgeBaseIndex {
1402            root: PathBuf::from("/tmp"),
1403            pages,
1404            sorted_ids,
1405            index_issues: Vec::new(),
1406        };
1407
1408        assert_eq!(index.filter_page_ids("alpha"), vec!["id-1".to_string()]);
1409        assert_eq!(index.filter_page_ids("id-2"), vec!["id-2".to_string()]);
1410        assert_eq!(index.filter_page_ids("pharo"), vec!["id-2".to_string()]);
1411        assert_eq!(
1412            index.filter_page_ids(""),
1413            vec!["id-1".to_string(), "id-2".to_string()]
1414        );
1415    }
1416
1417    #[test]
1418    fn resolve_page_id_by_title_handles_unique_ambiguous_and_missing() {
1419        let mut pages = HashMap::new();
1420        pages.insert(
1421            "id-1".to_string(),
1422            PageMeta {
1423                id: "id-1".to_string(),
1424                title: "Alpha".to_string(),
1425                title_lower: "alpha".to_string(),
1426                path: PathBuf::from("/tmp/a"),
1427                updated_at: None,
1428                tags: Vec::new(),
1429            },
1430        );
1431        pages.insert(
1432            "id-2".to_string(),
1433            PageMeta {
1434                id: "id-2".to_string(),
1435                title: "Alphabet".to_string(),
1436                title_lower: "alphabet".to_string(),
1437                path: PathBuf::from("/tmp/b"),
1438                updated_at: None,
1439                tags: Vec::new(),
1440            },
1441        );
1442        let sorted_ids = compute_sorted_ids(&pages);
1443        let index = KnowledgeBaseIndex {
1444            root: PathBuf::from("/tmp"),
1445            pages,
1446            sorted_ids,
1447            index_issues: Vec::new(),
1448        };
1449
1450        assert_eq!(
1451            index.resolve_page_id_by_title("Alpha"),
1452            TitleResolution::Unique("id-1".to_string())
1453        );
1454        assert!(matches!(
1455            index.resolve_page_id_by_title("alp"),
1456            TitleResolution::Ambiguous(_)
1457        ));
1458        assert_eq!(
1459            index.resolve_page_id_by_title("zzz"),
1460            TitleResolution::NotFound
1461        );
1462    }
1463
1464    #[test]
1465    fn classify_link_target_covers_internal_attachment_external_unknown() {
1466        let mut pages = HashMap::new();
1467        pages.insert(
1468            "8a505fa0-2222-3333-4444-555555555555".to_string(),
1469            PageMeta {
1470                id: "8a505fa0-2222-3333-4444-555555555555".to_string(),
1471                title: "Alpha".to_string(),
1472                title_lower: "alpha".to_string(),
1473                path: PathBuf::from("/tmp/a"),
1474                updated_at: None,
1475                tags: Vec::new(),
1476            },
1477        );
1478        let sorted_ids = compute_sorted_ids(&pages);
1479        let index = KnowledgeBaseIndex {
1480            root: PathBuf::from("/kb"),
1481            pages,
1482            sorted_ids,
1483            index_issues: Vec::new(),
1484        };
1485
1486        assert!(matches!(
1487            index.classify_link_target("8a505fa0-2222-3333-4444-555555555555"),
1488            LinkTargetKind::InternalPage(_)
1489        ));
1490        assert!(matches!(
1491            index.classify_link_target("title:alpha"),
1492            LinkTargetKind::InternalPage(_)
1493        ));
1494        assert!(matches!(
1495            index.classify_link_target("go to 8a505fa0-2222-3333-4444-555555555555 now"),
1496            LinkTargetKind::InternalPage(_)
1497        ));
1498        assert!(matches!(
1499            index.classify_link_target("attachments/image.png"),
1500            LinkTargetKind::AttachmentPath(_)
1501        ));
1502        assert!(matches!(
1503            index.classify_link_target("https://example.com"),
1504            LinkTargetKind::ExternalUrl(_)
1505        ));
1506        assert!(matches!(
1507            index.classify_link_target("not a thing"),
1508            LinkTargetKind::Unknown(_)
1509        ));
1510        // page: prefix falls back to title resolution
1511        assert!(matches!(
1512            index.classify_link_target("page:Alpha"),
1513            LinkTargetKind::InternalPage(_)
1514        ));
1515        // page: prefix with unknown title stays Unknown
1516        assert!(matches!(
1517            index.classify_link_target("page:Nonexistent"),
1518            LinkTargetKind::Unknown(_)
1519        ));
1520    }
1521
1522    #[test]
1523    fn attachment_resolver_reports_missing_files() -> Result<()> {
1524        let root = temp_dir_path("attachments");
1525        let attachments = root.join("attachments");
1526        fs::create_dir_all(&attachments)?;
1527        fs::write(attachments.join("ok.txt"), b"ok")?;
1528
1529        let resolver = AttachmentResolver::new(&root);
1530        let resolved = resolver.resolve("attachments/ok.txt")?;
1531        assert!(resolved.exists);
1532
1533        let missing = resolver.resolve_existing("attachments/missing.txt");
1534        assert!(matches!(missing, Err(AttachmentError::Missing(_))));
1535
1536        fs::remove_dir_all(&root)?;
1537        Ok(())
1538    }
1539
1540    fn make_kb_on_disk(pages: &[(&str, &str, &[&str], &str)]) -> (PathBuf, KnowledgeBaseIndex) {
1541        let dir = temp_dir_path("kb");
1542        fs::create_dir_all(&dir).unwrap();
1543        for (id, title, tags, body_text) in pages {
1544            let tags_json: Vec<Value> = tags.iter().map(|t| json!(t)).collect();
1545            let content = json!({
1546                "uid": {"uuid": id},
1547                "pageType": {"title": title},
1548                "tags": tags_json,
1549                "children": {"items": [
1550                    {"__type": "textSnippet", "string": body_text}
1551                ]}
1552            });
1553            let file_path = dir.join(format!("{id}.lepiter"));
1554            fs::write(&file_path, serde_json::to_vec(&content).unwrap()).unwrap();
1555        }
1556        let index = KnowledgeBase::open(&dir).unwrap();
1557        (dir, index)
1558    }
1559
1560    #[test]
1561    fn search_hits_empty_query_returns_nothing() {
1562        let (dir, index) = make_kb_on_disk(&[("p1", "Alpha", &[], "hello world")]);
1563        assert!(index.search_hits("", false).is_empty());
1564        assert!(index.search_hits("  ", true).is_empty());
1565        fs::remove_dir_all(&dir).unwrap();
1566    }
1567
1568    #[test]
1569    fn search_hits_matches_title_case_insensitively() {
1570        let (dir, index) = make_kb_on_disk(&[
1571            ("p1", "Alpha Guide", &[], "nothing special"),
1572            ("p2", "Beta Notes", &[], "nothing special"),
1573        ]);
1574        let hits = index.search_hits("alpha", false);
1575        assert_eq!(hits.len(), 1);
1576        assert_eq!(hits[0].id, "p1");
1577        assert_eq!(hits[0].kind, SearchMatchKind::Meta);
1578        fs::remove_dir_all(&dir).unwrap();
1579    }
1580
1581    #[test]
1582    fn search_hits_matches_tags() {
1583        let (dir, index) = make_kb_on_disk(&[
1584            ("p1", "Page One", &["rust", "cli"], "body"),
1585            ("p2", "Page Two", &["pharo"], "body"),
1586        ]);
1587        let hits = index.search_hits("rust", false);
1588        assert_eq!(hits.len(), 1);
1589        assert_eq!(hits[0].id, "p1");
1590        assert_eq!(hits[0].kind, SearchMatchKind::Meta);
1591        fs::remove_dir_all(&dir).unwrap();
1592    }
1593
1594    #[test]
1595    fn search_hits_content_flag_searches_page_body() {
1596        let (dir, index) = make_kb_on_disk(&[
1597            ("p1", "Alpha", &[], "the quick brown fox"),
1598            ("p2", "Beta", &[], "lazy dog sleeps"),
1599        ]);
1600
1601        let no_content = index.search_hits("fox", false);
1602        assert!(no_content.is_empty());
1603
1604        let with_content = index.search_hits("fox", true);
1605        assert_eq!(with_content.len(), 1);
1606        assert_eq!(with_content[0].id, "p1");
1607        assert_eq!(with_content[0].kind, SearchMatchKind::Content);
1608        fs::remove_dir_all(&dir).unwrap();
1609    }
1610
1611    #[test]
1612    fn search_hits_meta_match_takes_priority_over_content() {
1613        let (dir, index) = make_kb_on_disk(&[("p1", "Fox Guide", &[], "the fox jumps")]);
1614        let hits = index.search_hits("fox", true);
1615        assert_eq!(hits.len(), 1);
1616        assert_eq!(hits[0].kind, SearchMatchKind::Meta);
1617        fs::remove_dir_all(&dir).unwrap();
1618    }
1619
1620    #[test]
1621    fn search_hits_returns_results_sorted_by_title() {
1622        let (dir, index) = make_kb_on_disk(&[
1623            ("p1", "Zebra", &["common"], "body"),
1624            ("p2", "Alpha", &["common"], "body"),
1625            ("p3", "Middle", &["common"], "body"),
1626        ]);
1627        let hits = index.search_hits("common", false);
1628        let ids: Vec<&str> = hits.iter().map(|h| h.id.as_str()).collect();
1629        assert_eq!(ids, vec!["p2", "p3", "p1"]);
1630        fs::remove_dir_all(&dir).unwrap();
1631    }
1632
1633    #[test]
1634    fn classify_link_target_page_prefix() {
1635        let (dir, index) = make_kb_on_disk(&[("p1", "Alpha", &[], "body")]);
1636        assert!(matches!(
1637            index.classify_link_target("page:p1"),
1638            LinkTargetKind::InternalPage(id) if id == "p1"
1639        ));
1640        assert!(matches!(
1641            index.classify_link_target("page:nonexistent"),
1642            LinkTargetKind::Unknown(_)
1643        ));
1644        fs::remove_dir_all(&dir).unwrap();
1645    }
1646
1647    #[test]
1648    fn classify_link_target_empty_is_unknown() {
1649        let (dir, index) = make_kb_on_disk(&[("p1", "Alpha", &[], "body")]);
1650        assert!(matches!(
1651            index.classify_link_target(""),
1652            LinkTargetKind::Unknown(_)
1653        ));
1654        fs::remove_dir_all(&dir).unwrap();
1655    }
1656
1657    #[test]
1658    fn classify_link_target_title_fallback() {
1659        let (dir, index) = make_kb_on_disk(&[("p1", "My Special Page", &[], "body")]);
1660        assert!(matches!(
1661            index.classify_link_target("My Special Page"),
1662            LinkTargetKind::InternalPage(id) if id == "p1"
1663        ));
1664        fs::remove_dir_all(&dir).unwrap();
1665    }
1666
1667    #[test]
1668    fn resolve_page_id_by_title_empty_and_whitespace() {
1669        let (dir, index) = make_kb_on_disk(&[("p1", "Alpha", &[], "body")]);
1670        assert_eq!(
1671            index.resolve_page_id_by_title(""),
1672            TitleResolution::NotFound
1673        );
1674        assert_eq!(
1675            index.resolve_page_id_by_title("   "),
1676            TitleResolution::NotFound
1677        );
1678        fs::remove_dir_all(&dir).unwrap();
1679    }
1680
1681    #[test]
1682    fn resolve_page_id_by_title_case_insensitive_exact() {
1683        let (dir, index) = make_kb_on_disk(&[("p1", "Alpha", &[], "body")]);
1684        assert_eq!(
1685            index.resolve_page_id_by_title("ALPHA"),
1686            TitleResolution::Unique("p1".to_string())
1687        );
1688        fs::remove_dir_all(&dir).unwrap();
1689    }
1690
1691    #[test]
1692    fn filter_page_ids_no_match_returns_empty() {
1693        let (dir, index) = make_kb_on_disk(&[("p1", "Alpha", &[], "body")]);
1694        assert!(index.filter_page_ids("zzzzz").is_empty());
1695        fs::remove_dir_all(&dir).unwrap();
1696    }
1697
1698    #[test]
1699    fn parse_word_node_extracts_primary_fields() {
1700        let item = json!({
1701            "__type":"wordSnippet",
1702            "wordString":"refactoring",
1703            "explanationAttachmentNameString":"attachments/x/explanation.json"
1704        });
1705        let node = parse_node(&item);
1706        match node {
1707            Node::Paragraph { text } => {
1708                assert!(text.contains("refactoring"));
1709                assert!(text.contains("attachments/x/explanation.json"));
1710            }
1711            other => panic!("expected paragraph, got {other:?}"),
1712        }
1713    }
1714
1715    #[test]
1716    fn parse_page_meta_missing_uid_uses_empty_id() -> Result<()> {
1717        let path = temp_file_path("no-uid");
1718        let content = json!({"pageType": {"title": "Some Title"}});
1719        fs::write(&path, serde_json::to_vec(&content)?)?;
1720        let meta = parse_page_meta(&path)?;
1721        fs::remove_file(&path)?;
1722
1723        assert!(meta.id.is_empty());
1724        assert_eq!(meta.title, "Some Title");
1725        Ok(())
1726    }
1727
1728    #[test]
1729    fn parse_page_meta_missing_page_type_uses_empty_title() -> Result<()> {
1730        let path = temp_file_path("no-pt");
1731        let content = json!({"uid": {"uuid": "abc-123"}});
1732        fs::write(&path, serde_json::to_vec(&content)?)?;
1733        let meta = parse_page_meta(&path)?;
1734        fs::remove_file(&path)?;
1735
1736        assert_eq!(meta.id, "abc-123");
1737        assert!(meta.title.is_empty());
1738        Ok(())
1739    }
1740
1741    #[test]
1742    fn parse_page_meta_invalid_date_string_yields_none() -> Result<()> {
1743        let path = temp_file_path("bad-date");
1744        let content = json!({
1745            "uid": {"uuid": "id-1"},
1746            "editTime": {"time": {"dateAndTimeString": "not-a-date"}}
1747        });
1748        fs::write(&path, serde_json::to_vec(&content)?)?;
1749        let meta = parse_page_meta(&path)?;
1750        fs::remove_file(&path)?;
1751
1752        assert!(meta.updated_at.is_none());
1753        Ok(())
1754    }
1755
1756    #[test]
1757    fn open_empty_directory_returns_empty_index() -> Result<()> {
1758        let dir = temp_dir_path("empty-kb");
1759        fs::create_dir_all(&dir)?;
1760        let index = KnowledgeBase::open(&dir)?;
1761        fs::remove_dir_all(&dir)?;
1762
1763        assert!(index.pages.is_empty());
1764        assert!(index.index_issues.is_empty());
1765        Ok(())
1766    }
1767
1768    #[test]
1769    fn open_skips_non_lepiter_files() -> Result<()> {
1770        let dir = temp_dir_path("non-lepiter");
1771        fs::create_dir_all(&dir)?;
1772        fs::write(dir.join("readme.txt"), b"hello")?;
1773        fs::write(dir.join("data.json"), b"{}")?;
1774        let index = KnowledgeBase::open(&dir)?;
1775        fs::remove_dir_all(&dir)?;
1776
1777        assert!(index.pages.is_empty());
1778        assert!(index.index_issues.is_empty());
1779        Ok(())
1780    }
1781
1782    #[test]
1783    fn open_reports_invalid_json_as_issue() -> Result<()> {
1784        let dir = temp_dir_path("bad-json");
1785        fs::create_dir_all(&dir)?;
1786        fs::write(dir.join("broken.lepiter"), b"not json at all")?;
1787        let index = KnowledgeBase::open(&dir)?;
1788        fs::remove_dir_all(&dir)?;
1789
1790        assert!(index.pages.is_empty());
1791        assert_eq!(index.index_issues.len(), 1);
1792        assert!(index.index_issues[0].message.contains("failed to decode"));
1793        Ok(())
1794    }
1795
1796    #[test]
1797    fn open_reports_wrong_json_structure_as_issue() -> Result<()> {
1798        let dir = temp_dir_path("wrong-shape");
1799        fs::create_dir_all(&dir)?;
1800        fs::write(dir.join("array.lepiter"), b"[1, 2, 3]")?;
1801        let index = KnowledgeBase::open(&dir)?;
1802        fs::remove_dir_all(&dir)?;
1803
1804        assert!(index.pages.is_empty());
1805        assert_eq!(index.index_issues.len(), 1);
1806        Ok(())
1807    }
1808
1809    #[test]
1810    fn open_fills_in_defaults_for_minimal_page() -> Result<()> {
1811        let dir = temp_dir_path("minimal");
1812        fs::create_dir_all(&dir)?;
1813        fs::write(dir.join("mypage.lepiter"), b"{}")?;
1814        let index = KnowledgeBase::open(&dir)?;
1815        fs::remove_dir_all(&dir)?;
1816
1817        assert_eq!(index.pages.len(), 1);
1818        let meta = index.pages.values().next().unwrap();
1819        assert_eq!(meta.id, "mypage");
1820        assert_eq!(meta.title, "mypage");
1821        Ok(())
1822    }
1823
1824    #[test]
1825    fn load_page_nonexistent_id_errors() -> Result<()> {
1826        let dir = temp_dir_path("no-such-id");
1827        fs::create_dir_all(&dir)?;
1828        let index = KnowledgeBase::open(&dir)?;
1829        fs::remove_dir_all(&dir)?;
1830
1831        let err = index.load_page("does-not-exist");
1832        assert!(err.is_err());
1833        assert!(format!("{:#}", err.unwrap_err()).contains("page id not found"));
1834        Ok(())
1835    }
1836
1837    #[test]
1838    fn load_page_missing_children_yields_empty_content() -> Result<()> {
1839        let dir = temp_dir_path("no-children");
1840        fs::create_dir_all(&dir)?;
1841        let content = json!({"uid": {"uuid": "pg-1"}, "pageType": {"title": "T"}});
1842        fs::write(dir.join("pg-1.lepiter"), serde_json::to_vec(&content)?)?;
1843        let index = KnowledgeBase::open(&dir)?;
1844        let page = index.load_page("pg-1")?;
1845        fs::remove_dir_all(&dir)?;
1846
1847        assert!(page.content.is_empty());
1848        Ok(())
1849    }
1850}