1use indextree::Arena;
10use serde::{Serialize, Deserialize};
11
12use super::node::{NodeId, TreeNode};
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct StructureNode {
17 pub title: String,
19 pub node_id: String,
21 pub start_index: usize,
23 pub end_index: usize,
25 #[serde(skip_serializing_if = "Option::is_none")]
27 pub summary: Option<String>,
28 #[serde(skip_serializing_if = "Vec::is_empty")]
30 pub nodes: Vec<StructureNode>,
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct DocumentStructure {
36 pub doc_name: String,
38 pub structure: Vec<StructureNode>,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct DocumentTree {
48 arena: Arena<TreeNode>,
50
51 root_id: NodeId,
53}
54
55impl DocumentTree {
56 pub fn new(title: &str, content: &str) -> Self {
58 let mut arena = Arena::new();
59 let root_data = TreeNode {
60 title: title.to_string(),
61 content: content.to_string(),
62 summary: String::new(),
63 depth: 0,
64 start_index: 1,
65 end_index: 1,
66 start_page: None,
67 end_page: None,
68 node_id: None,
69 physical_index: None,
70 token_count: None,
71 };
72 let root_id = arena.new_node(root_data);
73
74 Self { arena, root_id: NodeId(root_id) }
75 }
76
77 pub fn from_raw(arena: Arena<TreeNode>, root_id: NodeId) -> Self {
81 Self { arena, root_id }
82 }
83
84 pub fn root(&self) -> NodeId {
86 self.root_id
87 }
88
89 pub fn arena(&self) -> &Arena<TreeNode> {
91 &self.arena
92 }
93
94 pub fn get(&self, id: NodeId) -> Option<&TreeNode> {
98 self.arena.get(id.0).map(|n| n.get())
99 }
100
101 pub fn get_mut(&mut self, id: NodeId) -> Option<&mut TreeNode> {
105 self.arena.get_mut(id.0).map(|n| n.get_mut())
106 }
107
108 pub fn add_child(&mut self, parent: NodeId, title: &str, content: &str) -> NodeId {
112 let parent_depth = self.arena.get(parent.0).map(|n| n.get().depth).unwrap_or(0);
113 let child_data = TreeNode {
114 title: title.to_string(),
115 content: content.to_string(),
116 summary: String::new(),
117 depth: parent_depth + 1,
118 start_index: 1,
119 end_index: 1,
120 start_page: None,
121 end_page: None,
122 node_id: None,
123 physical_index: None,
124 token_count: None,
125 };
126 let child_id = self.arena.new_node(child_data);
127 parent.0.append(child_id, &mut self.arena);
128 NodeId(child_id)
129 }
130
131 pub fn add_child_with_pages(
135 &mut self,
136 parent: NodeId,
137 title: &str,
138 content: &str,
139 start_page: usize,
140 end_page: usize,
141 ) -> NodeId {
142 let child_id = self.add_child(parent, title, content);
143 if let Some(node) = self.get_mut(child_id) {
144 node.start_page = Some(start_page);
145 node.end_page = Some(end_page);
146 }
147 child_id
148 }
149
150 pub fn is_leaf(&self, id: NodeId) -> bool {
152 id.0.children(&self.arena).next().is_none()
153 }
154
155 pub fn children(&self, id: NodeId) -> Vec<NodeId> {
157 id.0.children(&self.arena).map(NodeId).collect()
158 }
159
160 pub fn parent(&self, id: NodeId) -> Option<NodeId> {
164 id.0.parent(&self.arena).map(NodeId)
165 }
166
167 pub fn leaves(&self) -> Vec<NodeId> {
169 self.traverse()
170 .into_iter()
171 .filter(|id| self.is_leaf(*id))
172 .collect()
173 }
174
175 pub fn traverse(&self) -> Vec<NodeId> {
177 let mut result = Vec::new();
178 let mut stack = vec![self.root_id];
179
180 while let Some(id) = stack.pop() {
181 result.push(id);
182 let mut children: Vec<_> = self.children(id).into_iter().collect();
184 children.reverse();
185 stack.extend(children);
186 }
187
188 result
189 }
190
191 pub fn node_count(&self) -> usize {
193 self.arena.count()
194 }
195
196 pub fn set_summary(&mut self, id: NodeId, summary: &str) {
198 if let Some(node) = self.get_mut(id) {
199 node.summary = summary.to_string();
200 }
201 }
202
203 pub fn set_content(&mut self, id: NodeId, content: &str) {
205 if let Some(node) = self.get_mut(id) {
206 node.content = content.to_string();
207 }
208 }
209
210 pub fn set_page_boundaries(&mut self, id: NodeId, start: usize, end: usize) {
212 if let Some(node) = self.get_mut(id) {
213 node.start_page = Some(start);
214 node.end_page = Some(end);
215 }
216 }
217
218 pub fn set_line_indices(&mut self, id: NodeId, start: usize, end: usize) {
220 if let Some(node) = self.get_mut(id) {
221 node.start_index = start;
222 node.end_index = end;
223 }
224 }
225
226 pub fn page_range(&self, id: NodeId) -> Option<(usize, usize)> {
228 let node = self.get(id)?;
229 match (node.start_page, node.end_page) {
230 (Some(start), Some(end)) => Some((start, end)),
231 _ => None,
232 }
233 }
234
235 pub fn contains_page(&self, id: NodeId, page: usize) -> bool {
237 if let Some((start, end)) = self.page_range(id) {
238 page >= start && page <= end
239 } else {
240 false
241 }
242 }
243
244 pub fn set_node_id(&mut self, id: NodeId, node_id: &str) {
246 if let Some(node) = self.get_mut(id) {
247 node.node_id = Some(node_id.to_string());
248 }
249 }
250
251 pub fn set_physical_index(&mut self, id: NodeId, index: &str) {
253 if let Some(node) = self.get_mut(id) {
254 node.physical_index = Some(index.to_string());
255 }
256 }
257
258 pub fn set_token_count(&mut self, id: NodeId, count: usize) {
260 if let Some(node) = self.get_mut(id) {
261 node.token_count = Some(count);
262 }
263 }
264
265 pub fn to_structure_json(&self, doc_name: &str) -> DocumentStructure {
267 let structure = self.build_structure_nodes(self.root_id);
268 DocumentStructure {
269 doc_name: doc_name.to_string(),
270 structure,
271 }
272 }
273
274 fn build_structure_nodes(&self, node_id: NodeId) -> Vec<StructureNode> {
276 let children = self.children(node_id);
277 children.into_iter().enumerate().map(|(idx, child_id)| {
278 self.node_to_structure(child_id, idx)
279 }).collect()
280 }
281
282 fn node_to_structure(&self, node_id: NodeId, _idx: usize) -> StructureNode {
284 let node = self.get(node_id).cloned().unwrap_or_default();
285 let children = self.children(node_id);
286
287 StructureNode {
288 title: node.title,
289 node_id: node.node_id.clone().unwrap_or_else(|| format!("{:04}", _idx)),
290 start_index: node.start_index,
291 end_index: node.end_index,
292 summary: if node.summary.is_empty() { None } else { Some(node.summary) },
293 nodes: children.into_iter().enumerate().map(|(i, c)| self.node_to_structure(c, i)).collect(),
294 }
295 }
296}
297
298impl Default for DocumentTree {
299 fn default() -> Self {
300 Self::new("Root", "")
301 }
302}