Skip to main content

vectorless/document/
toc.rs

1// Copyright (c) 2026 vectorless developers
2// SPDX-License-Identifier: Apache-2.0
3
4//! Table of Contents (ToC) view generation.
5//!
6//! Provides utilities for generating different views of the document tree,
7//! including hierarchical ToC, flat ToC, and filtered views.
8
9use serde::{Deserialize, Serialize};
10
11use super::node::NodeId;
12use super::node::TreeNode;
13use super::tree::DocumentTree;
14
15/// A node in the Table of Contents.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct TocNode {
18    /// Node title.
19    pub title: String,
20    /// Node ID (if available).
21    pub node_id: Option<String>,
22    /// Depth in the tree.
23    pub depth: usize,
24    /// Page range (for PDFs).
25    pub page_range: Option<(usize, usize)>,
26    /// Brief summary (optional).
27    pub summary: Option<String>,
28    /// Children nodes.
29    pub children: Vec<TocNode>,
30}
31
32impl TocNode {
33    /// Create a new ToC node.
34    pub fn new(title: impl Into<String>, depth: usize) -> Self {
35        Self {
36            title: title.into(),
37            node_id: None,
38            depth,
39            page_range: None,
40            summary: None,
41            children: Vec::new(),
42        }
43    }
44
45    /// Set the node ID.
46    pub fn with_node_id(mut self, id: impl Into<String>) -> Self {
47        self.node_id = Some(id.into());
48        self
49    }
50
51    /// Set the page range.
52    pub fn with_page_range(mut self, start: usize, end: usize) -> Self {
53        self.page_range = Some((start, end));
54        self
55    }
56
57    /// Set the summary.
58    pub fn with_summary(mut self, summary: impl Into<String>) -> Self {
59        self.summary = Some(summary.into());
60        self
61    }
62
63    /// Add a child node.
64    pub fn add_child(&mut self, child: TocNode) {
65        self.children.push(child);
66    }
67
68    /// Count total nodes in this subtree.
69    pub fn count_nodes(&self) -> usize {
70        1 + self.children.iter().map(|c| c.count_nodes()).sum::<usize>()
71    }
72
73    /// Count leaf nodes in this subtree.
74    pub fn count_leaves(&self) -> usize {
75        if self.children.is_empty() {
76            1
77        } else {
78            self.children.iter().map(|c| c.count_leaves()).sum()
79        }
80    }
81
82    /// Get maximum depth in this subtree.
83    pub fn max_depth(&self) -> usize {
84        if self.children.is_empty() {
85            self.depth
86        } else {
87            self.children
88                .iter()
89                .map(|c| c.max_depth())
90                .max()
91                .unwrap_or(self.depth)
92        }
93    }
94}
95
96/// Configuration for ToC generation.
97#[derive(Debug, Clone)]
98pub struct TocConfig {
99    /// Maximum depth to include (None = unlimited).
100    pub max_depth: Option<usize>,
101    /// Whether to include summaries.
102    pub include_summaries: bool,
103    /// Whether to include page ranges.
104    pub include_pages: bool,
105    /// Minimum content length to include (filter out empty nodes).
106    pub min_content_length: usize,
107}
108
109impl Default for TocConfig {
110    fn default() -> Self {
111        Self {
112            max_depth: None,
113            include_summaries: true,
114            include_pages: true,
115            min_content_length: 0,
116        }
117    }
118}
119
120impl TocConfig {
121    /// Create new ToC config.
122    pub fn new() -> Self {
123        Self::default()
124    }
125
126    /// Set maximum depth.
127    pub fn with_max_depth(mut self, depth: usize) -> Self {
128        self.max_depth = Some(depth);
129        self
130    }
131
132    /// Set whether to include summaries.
133    pub fn with_summaries(mut self, include: bool) -> Self {
134        self.include_summaries = include;
135        self
136    }
137}
138
139/// ToC view generator.
140#[derive(Clone)]
141pub struct TocView {
142    config: TocConfig,
143}
144
145impl TocView {
146    /// Create a new ToC view generator.
147    pub fn new() -> Self {
148        Self {
149            config: TocConfig::default(),
150        }
151    }
152
153    /// Create with custom configuration.
154    pub fn with_config(config: TocConfig) -> Self {
155        Self { config }
156    }
157
158    /// Generate ToC from a tree.
159    pub fn generate(&self, tree: &DocumentTree) -> TocNode {
160        self.build_toc_node(tree, tree.root(), 0)
161    }
162
163    /// Generate ToC starting from a specific node.
164    pub fn generate_from(&self, tree: &DocumentTree, start: NodeId) -> TocNode {
165        let depth = tree.get(start).map_or(0, |n| n.depth);
166        self.build_toc_node(tree, start, depth)
167    }
168
169    /// Build a ToC node from a tree node.
170    fn build_toc_node(&self, tree: &DocumentTree, node_id: NodeId, depth: usize) -> TocNode {
171        let node = match tree.get(node_id) {
172            Some(n) => n,
173            None => return TocNode::new("Unknown", depth),
174        };
175
176        // Check depth limit
177        if let Some(max) = self.config.max_depth {
178            if depth > max {
179                return TocNode::new("...", depth - 1);
180            }
181        }
182
183        // Check minimum content length
184        if node.content.len() < self.config.min_content_length && tree.children(node_id).is_empty()
185        {
186            return TocNode::new(node.title.clone(), depth);
187        }
188
189        let mut toc_node =
190            TocNode::new(&node.title, depth).with_node_id(node.node_id.clone().unwrap_or_default());
191
192        // Add page range
193        if self.config.include_pages {
194            if let (Some(start), Some(end)) = (node.start_page, node.end_page) {
195                toc_node = toc_node.with_page_range(start, end);
196            }
197        }
198
199        // Add summary
200        if self.config.include_summaries && !node.summary.is_empty() {
201            toc_node = toc_node.with_summary(&node.summary);
202        }
203
204        // Recursively add children
205        for child_id in tree.children(node_id) {
206            let child_toc = self.build_toc_node(tree, child_id, depth + 1);
207            toc_node.add_child(child_toc);
208        }
209
210        toc_node
211    }
212
213    /// Generate a flat list of ToC entries.
214    pub fn generate_flat(&self, tree: &DocumentTree) -> Vec<TocEntry> {
215        let mut entries = Vec::new();
216        self.collect_flat_entries(tree, tree.root(), &mut entries);
217        entries
218    }
219
220    fn collect_flat_entries(
221        &self,
222        tree: &DocumentTree,
223        node_id: NodeId,
224        entries: &mut Vec<TocEntry>,
225    ) {
226        if let Some(node) = tree.get(node_id) {
227            entries.push(TocEntry {
228                title: node.title.clone(),
229                node_id: node.node_id.clone(),
230                depth: node.depth,
231                page_range: node.start_page.zip(node.end_page),
232            });
233
234            for child_id in tree.children(node_id) {
235                self.collect_flat_entries(tree, child_id, entries);
236            }
237        }
238    }
239
240    /// Generate a filtered ToC based on a predicate.
241    pub fn generate_filtered<F>(&self, tree: &DocumentTree, filter: F) -> Vec<TocNode>
242    where
243        F: Fn(&TreeNode) -> bool,
244    {
245        let mut result = Vec::new();
246        self.collect_filtered(tree, tree.root(), &filter, &mut result);
247        result
248    }
249
250    fn collect_filtered<F>(
251        &self,
252        tree: &DocumentTree,
253        node_id: NodeId,
254        filter: &F,
255        result: &mut Vec<TocNode>,
256    ) where
257        F: Fn(&TreeNode) -> bool,
258    {
259        if let Some(node) = tree.get(node_id) {
260            if filter(node) {
261                let toc_node = self.build_toc_node(tree, node_id, node.depth);
262                result.push(toc_node);
263            }
264
265            for child_id in tree.children(node_id) {
266                self.collect_filtered(tree, child_id, filter, result);
267            }
268        }
269    }
270
271    /// Format ToC as markdown.
272    pub fn format_markdown(&self, toc: &TocNode) -> String {
273        let mut output = String::new();
274        self.write_markdown(toc, &mut output, 0);
275        output
276    }
277
278    fn write_markdown(&self, toc: &TocNode, output: &mut String, level: usize) {
279        let indent = "  ".repeat(level);
280        let bullet = if level == 0 { "-" } else { "-" };
281
282        output.push_str(&format!("{}{} {}\n", indent, bullet, toc.title));
283
284        if let Some(ref summary) = toc.summary {
285            output.push_str(&format!("{}  > {}\n", indent, summary));
286        }
287
288        for child in &toc.children {
289            self.write_markdown(child, output, level + 1);
290        }
291    }
292
293    /// Format ToC as JSON.
294    pub fn format_json(&self, toc: &TocNode) -> Result<String, serde_json::Error> {
295        serde_json::to_string_pretty(toc)
296    }
297}
298
299impl Default for TocView {
300    fn default() -> Self {
301        Self::new()
302    }
303}
304
305/// A flat ToC entry.
306#[derive(Debug, Clone, Serialize, Deserialize)]
307pub struct TocEntry {
308    /// Node title.
309    pub title: String,
310    /// Node ID.
311    pub node_id: Option<String>,
312    /// Depth in tree.
313    pub depth: usize,
314    /// Page range.
315    pub page_range: Option<(usize, usize)>,
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321
322    #[test]
323    fn test_toc_node_creation() {
324        let mut root = TocNode::new("Root", 0);
325        let child = TocNode::new("Child", 1)
326            .with_node_id("node-1")
327            .with_summary("A child node");
328
329        root.add_child(child);
330
331        assert_eq!(root.count_nodes(), 2);
332        assert_eq!(root.count_leaves(), 1);
333        assert_eq!(root.max_depth(), 1);
334    }
335
336    #[test]
337    fn test_toc_config() {
338        let config = TocConfig::new().with_max_depth(3).with_summaries(false);
339
340        assert_eq!(config.max_depth, Some(3));
341        assert!(!config.include_summaries);
342    }
343}