Skip to main content

edgeparse_core/output/
toc_builder.rs

1//! TOC (Table of Contents) builder — builds a structured table of contents
2//! from headings detected in the document, suitable for output rendering.
3
4use crate::models::content::ContentElement;
5
6/// A single TOC entry with hierarchical nesting.
7#[derive(Debug, Clone, PartialEq)]
8pub struct TocEntry {
9    /// Heading text.
10    pub title: String,
11    /// Heading level (1 = top-level, 2 = section, etc.).
12    pub level: u32,
13    /// Page number where the heading appears (1-based).
14    pub page_number: u32,
15    /// Child entries (sub-headings).
16    pub children: Vec<TocEntry>,
17}
18
19/// A complete table of contents.
20#[derive(Debug, Clone, Default)]
21pub struct TableOfContents {
22    /// Top-level entries.
23    pub entries: Vec<TocEntry>,
24}
25
26impl TableOfContents {
27    /// Build a TOC from document pages by extracting headings.
28    pub fn from_pages(pages: &[Vec<ContentElement>]) -> Self {
29        let mut flat: Vec<TocEntry> = Vec::new();
30
31        for (page_idx, page) in pages.iter().enumerate() {
32            let page_num = (page_idx + 1) as u32;
33            for elem in page {
34                if let Some(entry) = extract_heading(elem, page_num) {
35                    flat.push(entry);
36                }
37            }
38        }
39
40        Self {
41            entries: nest_entries(flat),
42        }
43    }
44
45    /// Total number of entries (including nested).
46    pub fn total_entries(&self) -> usize {
47        count_entries(&self.entries)
48    }
49
50    /// Render the TOC as a markdown string.
51    pub fn to_markdown(&self) -> String {
52        let mut out = String::new();
53        render_markdown(&self.entries, &mut out);
54        out
55    }
56
57    /// Render the TOC as an HTML unordered list.
58    pub fn to_html(&self) -> String {
59        if self.entries.is_empty() {
60            return String::new();
61        }
62        let mut out = String::new();
63        render_html(&self.entries, &mut out, 0);
64        out
65    }
66
67    /// Whether the TOC is empty.
68    pub fn is_empty(&self) -> bool {
69        self.entries.is_empty()
70    }
71}
72
73/// Extract heading info from a content element.
74fn extract_heading(elem: &ContentElement, page_number: u32) -> Option<TocEntry> {
75    match elem {
76        ContentElement::Heading(h) => {
77            let level = h.heading_level.unwrap_or(1);
78            let title = h.base.base.value().trim().to_string();
79            if title.is_empty() {
80                return None;
81            }
82            Some(TocEntry {
83                title,
84                level,
85                page_number,
86                children: Vec::new(),
87            })
88        }
89        _ => None,
90    }
91}
92
93/// Nest flat entries into a tree based on levels.
94/// Each entry's children are subsequent entries with a higher level number (lower priority).
95fn nest_entries(flat: Vec<TocEntry>) -> Vec<TocEntry> {
96    if flat.is_empty() {
97        return Vec::new();
98    }
99
100    let mut result: Vec<TocEntry> = Vec::new();
101    let mut stack: Vec<TocEntry> = Vec::new();
102
103    for entry in flat {
104        // Pop entries from stack that are at the same or deeper level
105        while let Some(top) = stack.last() {
106            if top.level >= entry.level {
107                let popped = stack.pop().unwrap();
108                if let Some(parent) = stack.last_mut() {
109                    parent.children.push(popped);
110                } else {
111                    result.push(popped);
112                }
113            } else {
114                break;
115            }
116        }
117        stack.push(entry);
118    }
119
120    // Flush remaining stack
121    while let Some(popped) = stack.pop() {
122        if let Some(parent) = stack.last_mut() {
123            parent.children.push(popped);
124        } else {
125            result.push(popped);
126        }
127    }
128
129    result
130}
131
132fn count_entries(entries: &[TocEntry]) -> usize {
133    entries.iter().map(|e| 1 + count_entries(&e.children)).sum()
134}
135
136fn render_markdown(entries: &[TocEntry], out: &mut String) {
137    for entry in entries {
138        let indent = "  ".repeat((entry.level - 1) as usize);
139        out.push_str(&format!(
140            "{}- {} (p. {})\n",
141            indent, entry.title, entry.page_number
142        ));
143        render_markdown(&entry.children, out);
144    }
145}
146
147fn render_html(entries: &[TocEntry], out: &mut String, depth: usize) {
148    let indent = "  ".repeat(depth);
149    out.push_str(&format!("{}<ul>\n", indent));
150    for entry in entries {
151        out.push_str(&format!(
152            "{}  <li>{} (p. {})",
153            indent, entry.title, entry.page_number
154        ));
155        if !entry.children.is_empty() {
156            out.push('\n');
157            render_html(&entry.children, out, depth + 2);
158            out.push_str(&format!("{}  </li>\n", indent));
159        } else {
160            out.push_str("</li>\n");
161        }
162    }
163    out.push_str(&format!("{}</ul>\n", indent));
164}
165
166#[cfg(test)]
167mod tests {
168    use super::*;
169
170    fn make_entry(title: &str, level: u32, page: u32) -> TocEntry {
171        TocEntry {
172            title: title.to_string(),
173            level,
174            page_number: page,
175            children: Vec::new(),
176        }
177    }
178
179    #[test]
180    fn test_nest_flat_entries() {
181        let flat = vec![
182            make_entry("Chapter 1", 1, 1),
183            make_entry("Section 1.1", 2, 2),
184            make_entry("Section 1.2", 2, 3),
185            make_entry("Chapter 2", 1, 5),
186        ];
187        let nested = nest_entries(flat);
188        assert_eq!(nested.len(), 2);
189        assert_eq!(nested[0].title, "Chapter 1");
190        assert_eq!(nested[0].children.len(), 2);
191        assert_eq!(nested[0].children[0].title, "Section 1.1");
192        assert_eq!(nested[1].title, "Chapter 2");
193        assert!(nested[1].children.is_empty());
194    }
195
196    #[test]
197    fn test_total_entries() {
198        let toc = TableOfContents {
199            entries: vec![TocEntry {
200                title: "Ch1".to_string(),
201                level: 1,
202                page_number: 1,
203                children: vec![make_entry("S1.1", 2, 2), make_entry("S1.2", 2, 3)],
204            }],
205        };
206        assert_eq!(toc.total_entries(), 3);
207    }
208
209    #[test]
210    fn test_to_markdown() {
211        let toc = TableOfContents {
212            entries: vec![TocEntry {
213                title: "Intro".to_string(),
214                level: 1,
215                page_number: 1,
216                children: vec![make_entry("Overview", 2, 2)],
217            }],
218        };
219        let md = toc.to_markdown();
220        assert!(md.contains("- Intro (p. 1)"));
221        assert!(md.contains("  - Overview (p. 2)"));
222    }
223
224    #[test]
225    fn test_to_html() {
226        let toc = TableOfContents {
227            entries: vec![make_entry("Title", 1, 1)],
228        };
229        let html = toc.to_html();
230        assert!(html.contains("<ul>"));
231        assert!(html.contains("<li>Title (p. 1)</li>"));
232        assert!(html.contains("</ul>"));
233    }
234
235    #[test]
236    fn test_empty_toc() {
237        let toc = TableOfContents::default();
238        assert!(toc.is_empty());
239        assert_eq!(toc.total_entries(), 0);
240        assert_eq!(toc.to_markdown(), "");
241        assert_eq!(toc.to_html(), "");
242    }
243}