Skip to main content

vectorless/domain/
tree.rs

1// Copyright (c) 2026 vectorless developers
2// SPDX-License-Identifier: Apache-2.0
3
4//! Document tree using arena-based allocation.
5//!
6//! This structure provides better memory locality and simpler
7//! lifetime management compared to `Rc<RefCell<PageNode>`.
8
9use indextree::Arena;
10use serde::{Serialize, Deserialize};
11
12use super::node::{NodeId, TreeNode};
13
14/// JSON structure for exporting document tree (matches PageIndex format).
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct StructureNode {
17    /// Node title.
18    pub title: String,
19    /// Unique node identifier.
20    pub node_id: String,
21    /// Starting line number (1-based).
22    pub start_index: usize,
23    /// Ending line number (1-based).
24    pub end_index: usize,
25    /// Generated summary (optional).
26    #[serde(skip_serializing_if = "Option::is_none")]
27    pub summary: Option<String>,
28    /// Child nodes.
29    #[serde(skip_serializing_if = "Vec::is_empty")]
30    pub nodes: Vec<StructureNode>,
31}
32
33/// Document structure for JSON export.
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct DocumentStructure {
36    /// Document name.
37    pub doc_name: String,
38    /// Tree structure.
39    pub structure: Vec<StructureNode>,
40}
41
42/// A hierarchical document tree structure.
43///
44/// Uses an arena-based tree representation for efficient traversal
45/// and node manipulation.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct DocumentTree {
48    /// The underlying arena storing all nodes.
49    arena: Arena<TreeNode>,
50
51    /// The root node ID.
52    root_id: NodeId,
53}
54
55impl DocumentTree {
56    /// Create a new document tree with a root node.
57    pub fn new(title: &str, content: &str) -> Self {
58        let mut arena = Arena::new();
59        let root_data = TreeNode {
60            title: title.to_string(),
61            content: content.to_string(),
62            summary: String::new(),
63            depth: 0,
64            start_index: 1,
65            end_index: 1,
66            start_page: None,
67            end_page: None,
68            node_id: None,
69            physical_index: None,
70            token_count: None,
71        };
72        let root_id = arena.new_node(root_data);
73
74        Self { arena, root_id: NodeId(root_id) }
75    }
76
77    /// Create a document tree from an existing arena and root ID.
78    ///
79    /// This is useful for deserialization and testing.
80    pub fn from_raw(arena: Arena<TreeNode>, root_id: NodeId) -> Self {
81        Self { arena, root_id }
82    }
83
84    /// Get the root node ID.
85    pub fn root(&self) -> NodeId {
86        self.root_id
87    }
88
89    /// Get a reference to the underlying arena.
90    pub fn arena(&self) -> &Arena<TreeNode> {
91        &self.arena
92    }
93
94    /// Get a node by its ID.
95    ///
96    /// Returns None if the node doesn't exist.
97    pub fn get(&self, id: NodeId) -> Option<&TreeNode> {
98        self.arena.get(id.0).map(|n| n.get())
99    }
100
101    /// Get a mutable reference to a node by its ID.
102    ///
103    /// Returns None if the node doesn't exist.
104    pub fn get_mut(&mut self, id: NodeId) -> Option<&mut TreeNode> {
105        self.arena.get_mut(id.0).map(|n| n.get_mut())
106    }
107
108    /// Add a child node to the specified parent.
109    ///
110    /// Returns the ID of the newly created child node.
111    pub fn add_child(&mut self, parent: NodeId, title: &str, content: &str) -> NodeId {
112        let parent_depth = self.arena.get(parent.0).map(|n| n.get().depth).unwrap_or(0);
113        let child_data = TreeNode {
114            title: title.to_string(),
115            content: content.to_string(),
116            summary: String::new(),
117            depth: parent_depth + 1,
118            start_index: 1,
119            end_index: 1,
120            start_page: None,
121            end_page: None,
122            node_id: None,
123            physical_index: None,
124            token_count: None,
125        };
126        let child_id = self.arena.new_node(child_data);
127        parent.0.append(child_id, &mut self.arena);
128        NodeId(child_id)
129    }
130
131    /// Add a child node with page boundaries.
132    ///
133    /// Returns the ID of the newly created child node.
134    pub fn add_child_with_pages(
135        &mut self,
136        parent: NodeId,
137        title: &str,
138        content: &str,
139        start_page: usize,
140        end_page: usize,
141    ) -> NodeId {
142        let child_id = self.add_child(parent, title, content);
143        if let Some(node) = self.get_mut(child_id) {
144            node.start_page = Some(start_page);
145            node.end_page = Some(end_page);
146        }
147        child_id
148    }
149
150    /// Check if a node is a leaf (has no children).
151    pub fn is_leaf(&self, id: NodeId) -> bool {
152        id.0.children(&self.arena).next().is_none()
153    }
154
155    /// Get the children of a node.
156    pub fn children(&self, id: NodeId) -> Vec<NodeId> {
157        id.0.children(&self.arena).map(NodeId).collect()
158    }
159
160    /// Get the parent of a node.
161    ///
162    /// Returns None if the node is the root or doesn't have a parent.
163    pub fn parent(&self, id: NodeId) -> Option<NodeId> {
164        id.0.parent(&self.arena).map(NodeId)
165    }
166
167    /// Get all leaf nodes in the tree.
168    pub fn leaves(&self) -> Vec<NodeId> {
169        self.traverse()
170            .into_iter()
171            .filter(|id| self.is_leaf(*id))
172            .collect()
173    }
174
175    /// Get all nodes in the tree (depth-first order).
176    pub fn traverse(&self) -> Vec<NodeId> {
177        let mut result = Vec::new();
178        let mut stack = vec![self.root_id];
179
180        while let Some(id) = stack.pop() {
181            result.push(id);
182            // Add children in reverse order for correct DFS order
183            let mut children: Vec<_> = self.children(id).into_iter().collect();
184            children.reverse();
185            stack.extend(children);
186        }
187
188        result
189    }
190
191    /// Get the number of nodes in the tree.
192    pub fn node_count(&self) -> usize {
193        self.arena.count()
194    }
195
196    /// Update a node's summary.
197    pub fn set_summary(&mut self, id: NodeId, summary: &str) {
198        if let Some(node) = self.get_mut(id) {
199            node.summary = summary.to_string();
200        }
201    }
202
203    /// Update a node's content.
204    pub fn set_content(&mut self, id: NodeId, content: &str) {
205        if let Some(node) = self.get_mut(id) {
206            node.content = content.to_string();
207        }
208    }
209
210    /// Set page boundaries for a node.
211    pub fn set_page_boundaries(&mut self, id: NodeId, start: usize, end: usize) {
212        if let Some(node) = self.get_mut(id) {
213            node.start_page = Some(start);
214            node.end_page = Some(end);
215        }
216    }
217
218    /// Set line indices for a node.
219    pub fn set_line_indices(&mut self, id: NodeId, start: usize, end: usize) {
220        if let Some(node) = self.get_mut(id) {
221            node.start_index = start;
222            node.end_index = end;
223        }
224    }
225
226    /// Get page range for a node.
227    pub fn page_range(&self, id: NodeId) -> Option<(usize, usize)> {
228        let node = self.get(id)?;
229        match (node.start_page, node.end_page) {
230            (Some(start), Some(end)) => Some((start, end)),
231            _ => None,
232        }
233    }
234
235    /// Check if a node contains a specific page.
236    pub fn contains_page(&self, id: NodeId, page: usize) -> bool {
237        if let Some((start, end)) = self.page_range(id) {
238            page >= start && page <= end
239        } else {
240            false
241        }
242    }
243
244    /// Set the node ID (identifier string).
245    pub fn set_node_id(&mut self, id: NodeId, node_id: &str) {
246        if let Some(node) = self.get_mut(id) {
247            node.node_id = Some(node_id.to_string());
248        }
249    }
250
251    /// Set the physical index marker.
252    pub fn set_physical_index(&mut self, id: NodeId, index: &str) {
253        if let Some(node) = self.get_mut(id) {
254            node.physical_index = Some(index.to_string());
255        }
256    }
257
258    /// Update token count for a node.
259    pub fn set_token_count(&mut self, id: NodeId, count: usize) {
260        if let Some(node) = self.get_mut(id) {
261            node.token_count = Some(count);
262        }
263    }
264
265    /// Export the tree structure to JSON format (PageIndex compatible).
266    pub fn to_structure_json(&self, doc_name: &str) -> DocumentStructure {
267        let structure = self.build_structure_nodes(self.root_id);
268        DocumentStructure {
269            doc_name: doc_name.to_string(),
270            structure,
271        }
272    }
273
274    /// Recursively build structure nodes starting from the given node.
275    fn build_structure_nodes(&self, node_id: NodeId) -> Vec<StructureNode> {
276        let children = self.children(node_id);
277        children.into_iter().enumerate().map(|(idx, child_id)| {
278            self.node_to_structure(child_id, idx)
279        }).collect()
280    }
281
282    /// Convert a single node to StructureNode format.
283    fn node_to_structure(&self, node_id: NodeId, _idx: usize) -> StructureNode {
284        let node = self.get(node_id).cloned().unwrap_or_default();
285        let children = self.children(node_id);
286
287        StructureNode {
288            title: node.title,
289            node_id: node.node_id.clone().unwrap_or_else(|| format!("{:04}", _idx)),
290            start_index: node.start_index,
291            end_index: node.end_index,
292            summary: if node.summary.is_empty() { None } else { Some(node.summary) },
293            nodes: children.into_iter().enumerate().map(|(i, c)| self.node_to_structure(c, i)).collect(),
294        }
295    }
296}
297
298impl Default for DocumentTree {
299    fn default() -> Self {
300        Self::new("Root", "")
301    }
302}