Skip to main content

markdown_btree_core/
engine.rs

1use serde::{Deserialize, Serialize};
2use std::collections::BTreeMap;
3
4/// A paginated collection of section paths returned by discovery tools.
5#[derive(Serialize, Deserialize, Clone, Debug)]
6pub struct PaginatedResult {
7    /// List of hierarchical paths (e.g., ["chapter-1", "chapter-1/intro"]).
8    pub items: Vec<String>,
9    /// Current 0-indexed page number.
10    pub page: usize,
11    /// Number of items requested per page.
12    pub size: usize,
13    /// Total number of items available for the given parent path.
14    pub total: usize,
15}
16
17/// Represents a specific section of the Markdown document identified by a heading.
18#[derive(Serialize, Deserialize, Clone, Debug)]
19pub struct MarkdownNode {
20    /// The original text of the heading.
21    pub title: String,
22    /// The URL-safe version of the title used for path building.
23    pub slug: String,
24    /// The depth level of the heading (1 for #, 2 for ##, etc.).
25    pub level: usize,
26    /// The raw text content belonging directly to this section (before the next heading).
27    pub content: String,
28    /// Hierarchical paths to direct sub-sections.
29    pub children: Vec<String>,
30}
31
32/// The core indexing engine that transforms Markdown text into a Virtual File System.
33/// It uses a BTreeMap to maintain an ordered, searchable index of all sections.
34pub struct MarkdownEngine {
35    /// Map of full hierarchical paths to their corresponding document nodes.
36    pub(crate) nodes: BTreeMap<String, MarkdownNode>,
37    /// Map of Markdown reference IDs (e.g., [link_id]: url) to their target URLs.
38    pub(crate) references: BTreeMap<String, String>,
39    /// List of paths for top-level headings (Level 1 or the first encountered).
40    pub(crate) root_nodes: Vec<String>,
41}
42
43/// Generates a URL-safe slug from a heading title.
44pub fn slugify(title: &str) -> String {
45    let mut out = String::new();
46    let mut previous_dash = false;
47
48    for ch in title.chars() {
49        if ch.is_ascii_alphanumeric() {
50            out.push(ch.to_ascii_lowercase());
51            previous_dash = false;
52        } else if !previous_dash {
53            out.push('-');
54            previous_dash = true;
55        }
56    }
57
58    let normalized = out.trim_matches('-').to_string();
59    if normalized.is_empty() {
60        "section".to_string()
61    } else {
62        normalized
63    }
64}
65
66/// Attempts to parse a Markdown reference definition (e.g., [id]: url).
67fn parse_reference(line: &str) -> Option<(String, String)> {
68    let trimmed = line.trim();
69    if !trimmed.starts_with('[') {
70        return None;
71    }
72
73    let close = trimmed.find("]:")?;
74    if close <= 1 {
75        return None;
76    }
77
78    let ref_id = trimmed[1..close].trim();
79    let url = trimmed[(close + 2)..].trim();
80
81    if ref_id.is_empty() || url.is_empty() {
82        return None;
83    }
84
85    Some((ref_id.to_string(), url.to_string()))
86}
87
88/// Parses a standard Markdown heading (e.g., ### My Title).
89fn parse_markdown_heading(line: &str) -> Option<(usize, String)> {
90    let trimmed = line.trim();
91    if !trimmed.starts_with('#') {
92        return None;
93    }
94
95    let mut level = 0usize;
96    for ch in trimmed.chars() {
97        if ch == '#' {
98            level += 1;
99        } else {
100            break;
101        }
102    }
103
104    if !(1..=6).contains(&level) {
105        return None;
106    }
107
108    if trimmed
109        .chars()
110        .nth(level)
111        .is_none_or(|c| !c.is_whitespace())
112    {
113        return None;
114    }
115
116    let title = trimmed[level..].trim();
117    if title.is_empty() {
118        return None;
119    }
120
121    Some((level, title.to_string()))
122}
123
124/// Parses an HTML-style heading (e.g., <h2>My Title</h2>).
125fn parse_html_heading(line: &str) -> Option<(usize, String)> {
126    let trimmed = line.trim();
127    let lower = trimmed.to_ascii_lowercase();
128
129    if !lower.starts_with("<h") {
130        return None;
131    }
132
133    let bytes = lower.as_bytes();
134    if bytes.len() < 5 {
135        return None;
136    }
137
138    let level_char = bytes[2] as char;
139    if !('1'..='6').contains(&level_char) {
140        return None;
141    }
142
143    let level = level_char.to_digit(10)? as usize;
144    let open_end = trimmed.find('>')?;
145    let close_tag = format!("</h{}>", level);
146    let close_index = lower.rfind(&close_tag)?;
147
148    if close_index <= open_end {
149        return None;
150    }
151
152    let title = trimmed[(open_end + 1)..close_index].trim();
153    if title.is_empty() {
154        return None;
155    }
156
157    Some((level, title.to_string()))
158}
159
160/// Unified heading parser that supports both Markdown and HTML heading formats.
161pub(crate) fn parse_heading(line: &str) -> Option<(usize, String)> {
162    parse_markdown_heading(line).or_else(|| parse_html_heading(line))
163}
164
165impl MarkdownEngine {
166    /// Parses the provided Markdown content and builds a hierarchical Virtual File System index.
167    pub fn new(content: &str) -> MarkdownEngine {
168        let mut engine = MarkdownEngine {
169            nodes: BTreeMap::new(),
170            references: BTreeMap::new(),
171            root_nodes: Vec::new(),
172        };
173
174        let mut stack: Vec<(usize, String)> = Vec::new();
175        let mut current_path: Option<String> = None;
176
177        for line in content.lines() {
178            if let Some((ref_id, url)) = parse_reference(line) {
179                engine.references.insert(ref_id, url);
180                continue;
181            }
182
183            if let Some((level, title)) = parse_heading(line) {
184                while let Some((last_level, _)) = stack.last() {
185                    if *last_level >= level {
186                        stack.pop();
187                    } else {
188                        break;
189                    }
190                }
191
192                let parent_path = stack.last().map(|(_, path)| path.clone());
193                let base_slug = slugify(&title);
194                let unique_slug = engine.make_unique_slug(parent_path.as_deref(), &base_slug);
195
196                let full_path = match &parent_path {
197                    Some(parent) => format!("{}/{}", parent, unique_slug),
198                    None => unique_slug.clone(),
199                };
200
201                let node = MarkdownNode {
202                    title,
203                    slug: unique_slug,
204                    level,
205                    content: String::new(),
206                    children: Vec::new(),
207                };
208
209                if let Some(parent) = parent_path {
210                    if let Some(parent_node) = engine.nodes.get_mut(&parent) {
211                        parent_node.children.push(full_path.clone());
212                    }
213                } else {
214                    engine.root_nodes.push(full_path.clone());
215                }
216
217                engine.nodes.insert(full_path.clone(), node);
218                stack.push((level, full_path.clone()));
219                current_path = Some(full_path);
220                continue;
221            }
222
223            engine.push_content_line(current_path.as_ref(), line);
224        }
225
226        engine
227    }
228
229    /// Generates a unique slug for a section by checking for collisions with existing paths.
230    fn make_unique_slug(&self, parent_path: Option<&str>, base_slug: &str) -> String {
231        let mut candidate = base_slug.to_string();
232        let mut suffix = 1usize;
233
234        loop {
235            let full_path = match parent_path {
236                Some(parent) => format!("{}/{}", parent, candidate),
237                None => candidate.clone(),
238            };
239
240            if !self.nodes.contains_key(&full_path) {
241                return candidate;
242            }
243
244            candidate = format!("{}-{}", base_slug, suffix);
245            suffix += 1;
246        }
247    }
248
249    /// Internal helper to append content to the currently active section during parsing.
250    fn push_content_line(&mut self, current_path: Option<&String>, line: &str) {
251        if let Some(path) = current_path {
252            if let Some(node) = self.nodes.get_mut(path) {
253                if !node.content.is_empty() {
254                    node.content.push('\n');
255                }
256                node.content.push_str(line);
257            }
258        }
259    }
260
261    /// Lists direct sub-sections for a given hierarchical path with pagination support.
262    /// Use an empty path ("") to retrieve root-level headings.
263    pub fn ls(&self, path: &str, page: usize, size: usize) -> PaginatedResult {
264        let entries = if path.is_empty() {
265            self.root_nodes.clone()
266        } else {
267            self.nodes
268                .get(path)
269                .map(|node| node.children.clone())
270                .unwrap_or_default()
271        };
272
273        let safe_size = if size == 0 { 20 } else { size };
274        let start = page.saturating_mul(safe_size);
275        let end = start.saturating_add(safe_size).min(entries.len());
276
277        let items = if start >= entries.len() {
278            Vec::new()
279        } else {
280            entries[start..end].to_vec()
281        };
282
283        PaginatedResult {
284            items,
285            page,
286            size: safe_size,
287            total: entries.len(),
288        }
289    }
290
291    /// Retrieves the human-readable title of the section at the specified path.
292    pub fn get_title(&self, path: &str) -> Option<String> {
293        self.nodes.get(path).map(|n| n.title.clone())
294    }
295
296    /// Reads the direct text content of a section (non-recursive).
297    pub fn read(&self, path: &str) -> Option<String> {
298        self.nodes.get(path).map(|node| node.content.clone())
299    }
300
301    /// Reads the full content of a section, including all its sub-sections recursively.
302    pub fn read_full(&self, path: &str) -> Option<String> {
303        if !self.nodes.contains_key(path) {
304            return None;
305        }
306        let mut buf = String::new();
307        self.collect_full(path, &mut buf);
308        Some(buf)
309    }
310
311    /// Recursive helper to collect content and headings from a node and its entire subtree.
312    fn collect_full(&self, path: &str, buf: &mut String) {
313        let Some(node) = self.nodes.get(path) else {
314            return;
315        };
316        let content = node.content.clone();
317        let children = node.children.clone();
318
319        if !content.is_empty() {
320            if !buf.is_empty() {
321                buf.push('\n');
322            }
323            buf.push_str(&content);
324        }
325
326        for child_path in children {
327            let (child_level, child_title) = match self.nodes.get(&child_path) {
328                Some(n) => (n.level, n.title.clone()),
329                None => continue,
330            };
331            if !buf.is_empty() {
332                buf.push('\n');
333            }
334            buf.push_str(&"#".repeat(child_level));
335            buf.push(' ');
336            buf.push_str(&child_title);
337            self.collect_full(&child_path, buf);
338        }
339    }
340
341    /// Resolves a Markdown reference ID to its corresponding URL.
342    pub fn get_reference(&self, ref_id: &str) -> Option<String> {
343        self.references.get(ref_id).cloned()
344    }
345
346    /// Performs a case-insensitive search across all section titles.
347    /// Returns a list of all matching hierarchical paths.
348    pub fn search(&self, query: &str) -> Vec<String> {
349        let q = query.to_ascii_lowercase();
350        self.nodes
351            .iter()
352            .filter_map(|(path, node)| {
353                if node.title.to_ascii_lowercase().contains(&q) {
354                    Some(path.clone())
355                } else {
356                    None
357                }
358            })
359            .collect()
360    }
361}