1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8
9#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
11pub struct Heading {
12 pub level: u8,
14
15 pub text: String,
17
18 pub start_offset: usize,
20
21 pub end_offset: usize,
23
24 pub line_number: usize,
26
27 pub section_number: Option<String>,
29}
30
31impl Heading {
32 pub fn new(level: u8, text: String, start_offset: usize, end_offset: usize) -> Self {
34 Self {
35 level,
36 text,
37 start_offset,
38 end_offset,
39 line_number: 0,
40 section_number: None,
41 }
42 }
43
44 pub fn with_line_number(mut self, line_number: usize) -> Self {
46 self.line_number = line_number;
47 self
48 }
49
50 pub fn with_section_number(mut self, section_number: String) -> Self {
52 self.section_number = Some(section_number);
53 self
54 }
55
56 pub fn display_string(&self) -> String {
58 if let Some(ref num) = self.section_number {
59 format!("{} {}", num, self.text)
60 } else {
61 self.text.clone()
62 }
63 }
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct Section {
69 pub heading: Heading,
71
72 pub content_start: usize,
74
75 pub content_end: usize,
77
78 pub parent_section: Option<usize>,
80
81 pub child_sections: Vec<usize>,
83}
84
85impl Section {
86 pub fn new(heading: Heading, content_start: usize, content_end: usize) -> Self {
88 Self {
89 heading,
90 content_start,
91 content_end,
92 parent_section: None,
93 child_sections: Vec::new(),
94 }
95 }
96
97 pub fn content_length(&self) -> usize {
99 self.content_end.saturating_sub(self.content_start)
100 }
101
102 pub fn contains_offset(&self, offset: usize) -> bool {
104 offset >= self.heading.start_offset && offset < self.content_end
105 }
106
107 pub fn is_root(&self) -> bool {
109 self.parent_section.is_none()
110 }
111
112 pub fn has_children(&self) -> bool {
114 !self.child_sections.is_empty()
115 }
116}
117
118#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct HeadingHierarchy {
121 pub root_sections: Vec<usize>,
123
124 pub depth_map: HashMap<usize, usize>,
126}
127
128impl HeadingHierarchy {
129 pub fn new() -> Self {
131 Self {
132 root_sections: Vec::new(),
133 depth_map: HashMap::new(),
134 }
135 }
136
137 pub fn get_depth(&self, section_idx: usize) -> Option<usize> {
139 self.depth_map.get(§ion_idx).copied()
140 }
141
142 pub fn is_root(&self, section_idx: usize) -> bool {
144 self.root_sections.contains(§ion_idx)
145 }
146}
147
148impl Default for HeadingHierarchy {
149 fn default() -> Self {
150 Self::new()
151 }
152}
153
154#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct DocumentStructure {
157 pub headings: Vec<Heading>,
159
160 pub sections: Vec<Section>,
162
163 pub hierarchy: HeadingHierarchy,
165}
166
167impl DocumentStructure {
168 pub fn new() -> Self {
170 Self {
171 headings: Vec::new(),
172 sections: Vec::new(),
173 hierarchy: HeadingHierarchy::new(),
174 }
175 }
176
177 pub fn find_section_containing_offset(&self, offset: usize) -> Option<usize> {
179 self.sections
180 .iter()
181 .position(|section| section.contains_offset(offset))
182 }
183
184 pub fn get_heading_path(&self, section_idx: usize) -> Vec<String> {
189 let mut path = Vec::new();
190 let mut current_idx = Some(section_idx);
191
192 while let Some(idx) = current_idx {
194 if idx < self.sections.len() {
195 let section = &self.sections[idx];
196 path.push(section.heading.display_string());
197 current_idx = section.parent_section;
198 } else {
199 break;
200 }
201 }
202
203 path.reverse();
205 path
206 }
207
208 pub fn get_sections_at_level(&self, level: u8) -> Vec<&Section> {
210 self.sections
211 .iter()
212 .filter(|s| s.heading.level == level)
213 .collect()
214 }
215
216 pub fn section_count(&self) -> usize {
218 self.sections.len()
219 }
220
221 pub fn max_depth(&self) -> usize {
223 self.hierarchy
224 .depth_map
225 .values()
226 .max()
227 .copied()
228 .unwrap_or(0)
229 }
230
231 pub fn has_structure(&self) -> bool {
233 !self.headings.is_empty()
234 }
235
236 pub fn get_statistics(&self) -> StructureStatistics {
238 let mut level_counts: HashMap<u8, usize> = HashMap::new();
239 for heading in &self.headings {
240 *level_counts.entry(heading.level).or_insert(0) += 1;
241 }
242
243 StructureStatistics {
244 total_headings: self.headings.len(),
245 total_sections: self.sections.len(),
246 max_depth: self.max_depth(),
247 level_counts,
248 root_sections: self.hierarchy.root_sections.len(),
249 }
250 }
251}
252
253impl Default for DocumentStructure {
254 fn default() -> Self {
255 Self::new()
256 }
257}
258
259#[derive(Debug, Clone, Serialize, Deserialize)]
261pub struct StructureStatistics {
262 pub total_headings: usize,
264
265 pub total_sections: usize,
267
268 pub max_depth: usize,
270
271 pub level_counts: HashMap<u8, usize>,
273
274 pub root_sections: usize,
276}
277
278impl StructureStatistics {
279 pub fn print_summary(&self) {
281 println!("Document Structure Statistics:");
282 println!(" Total headings: {}", self.total_headings);
283 println!(" Total sections: {}", self.total_sections);
284 println!(" Max depth: {}", self.max_depth);
285 println!(" Root sections: {}", self.root_sections);
286 println!(" Headings by level:");
287 let mut levels: Vec<_> = self.level_counts.iter().collect();
288 levels.sort_by_key(|(level, _)| *level);
289 for (level, count) in levels {
290 println!(" Level {}: {} headings", level, count);
291 }
292 }
293}
294
295#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
297pub enum SectionNumberFormat {
298 Numeric,
300 Decimal,
302 Roman,
304 Alphabetic,
306 Mixed,
308}
309
310#[derive(Debug, Clone, Serialize, Deserialize)]
312pub struct SectionNumber {
313 pub raw: String,
315
316 pub format: SectionNumberFormat,
318
319 pub components: Vec<usize>,
321}
322
323impl SectionNumber {
324 pub fn depth(&self) -> u8 {
326 self.components.len().min(255) as u8
327 }
328
329 pub fn is_deeper_than(&self, other: &SectionNumber) -> bool {
331 self.depth() > other.depth()
332 }
333}
334
335#[cfg(test)]
336mod tests {
337 use super::*;
338
339 #[test]
340 fn test_heading_creation() {
341 let heading = Heading::new(1, "Chapter 1".to_string(), 0, 9)
342 .with_line_number(5)
343 .with_section_number("1".to_string());
344
345 assert_eq!(heading.level, 1);
346 assert_eq!(heading.text, "Chapter 1");
347 assert_eq!(heading.line_number, 5);
348 assert_eq!(heading.display_string(), "1 Chapter 1");
349 }
350
351 #[test]
352 fn test_section_contains_offset() {
353 let heading = Heading::new(1, "Test".to_string(), 0, 10);
354 let section = Section::new(heading, 10, 100);
355
356 assert!(section.contains_offset(0));
357 assert!(section.contains_offset(50));
358 assert!(!section.contains_offset(100));
359 assert!(!section.contains_offset(150));
360 }
361
362 #[test]
363 fn test_document_structure() {
364 let mut structure = DocumentStructure::new();
365
366 let h1 = Heading::new(1, "Chapter 1".to_string(), 0, 9);
367 let h2 = Heading::new(2, "Section 1.1".to_string(), 50, 61);
368
369 structure.headings.push(h1.clone());
370 structure.headings.push(h2.clone());
371
372 let s1 = Section::new(h1, 10, 50);
373 let mut s2 = Section::new(h2, 62, 100);
374 s2.parent_section = Some(0);
375
376 structure.sections.push(s1);
377 structure.sections.push(s2);
378 structure.hierarchy.root_sections.push(0);
379
380 assert_eq!(structure.section_count(), 2);
381 assert!(structure.has_structure());
382 assert_eq!(structure.find_section_containing_offset(25), Some(0));
383 assert_eq!(structure.find_section_containing_offset(75), Some(1));
384 }
385
386 #[test]
387 fn test_heading_path() {
388 let mut structure = DocumentStructure::new();
389
390 let h1 = Heading::new(1, "Chapter 1".to_string(), 0, 9);
391 let h2 = Heading::new(2, "Section 1.1".to_string(), 50, 61);
392 let h3 = Heading::new(3, "Subsection 1.1.1".to_string(), 100, 116);
393
394 structure.headings.push(h1.clone());
395 structure.headings.push(h2.clone());
396 structure.headings.push(h3.clone());
397
398 let s1 = Section::new(h1, 10, 50);
399 let mut s2 = Section::new(h2, 62, 100);
400 s2.parent_section = Some(0);
401 let mut s3 = Section::new(h3, 117, 200);
402 s3.parent_section = Some(1);
403
404 structure.sections.push(s1);
405 structure.sections.push(s2);
406 structure.sections.push(s3);
407
408 let path = structure.get_heading_path(2);
409 assert_eq!(path.len(), 3);
410 assert_eq!(path[0], "Chapter 1");
411 assert_eq!(path[1], "Section 1.1");
412 assert_eq!(path[2], "Subsection 1.1.1");
413 }
414
415 #[test]
416 fn test_structure_statistics() {
417 let mut structure = DocumentStructure::new();
418
419 structure.headings.push(Heading::new(1, "H1".to_string(), 0, 2));
420 structure.headings.push(Heading::new(2, "H2".to_string(), 10, 12));
421 structure.headings.push(Heading::new(2, "H2b".to_string(), 20, 23));
422
423 let stats = structure.get_statistics();
424 assert_eq!(stats.total_headings, 3);
425 assert_eq!(stats.level_counts.get(&1), Some(&1));
426 assert_eq!(stats.level_counts.get(&2), Some(&2));
427 }
428
429 #[test]
430 fn test_section_number_depth() {
431 let section_num = SectionNumber {
432 raw: "1.2.3".to_string(),
433 format: SectionNumberFormat::Decimal,
434 components: vec![1, 2, 3],
435 };
436
437 assert_eq!(section_num.depth(), 3);
438 }
439}