html_outliner/
outline_structure.rs1use kuchiki::{parse_html, traits::TendrilSink, NodeData, NodeRef};
2
3use crate::{heading::*, sectioning_type::SectioningType};
4
5const SECTIONING_ROOTS: [&str; 7] =
6 ["blockquote", "body", "details", "dialog", "fieldset", "figure", "td"];
7
8#[derive(Debug, Clone)]
9pub struct OutlineStructure {
10 pub sectioning_type: SectioningType,
11 pub heading: Option<Heading>,
12 pub sub_outline_structures: Vec<OutlineStructure>,
13}
14
15impl OutlineStructure {
16 #[inline]
17 pub(crate) fn new(sectioning_type: SectioningType) -> OutlineStructure {
18 OutlineStructure {
19 sectioning_type,
20 heading: None,
21 sub_outline_structures: Vec::new(),
22 }
23 }
24
25 #[inline]
26 pub fn parse_html<S: AsRef<str>>(html: S, max_depth: usize) -> OutlineStructure {
27 let node = parse_html().one(html.as_ref());
28
29 if let Some(outline_structure) =
30 create_outline_structure_finding_body(node.clone(), 0, max_depth)
31 {
32 outline_structure
33 } else {
34 create_outline_structure(SectioningType::Root, node, 0, max_depth)
35 .unwrap_or_else(|| OutlineStructure::new(SectioningType::Root))
36 }
37 }
38}
39
40pub(crate) fn create_outline_structure(
41 sectioning_type: SectioningType,
42 node: NodeRef,
43 depth: usize,
44 max_depth: usize,
45) -> Option<OutlineStructure> {
46 if depth > max_depth {
47 return None;
48 }
49
50 let mut outline_structure = OutlineStructure::new(sectioning_type);
51
52 let mut find_heading = true;
53
54 for child in node.children() {
55 if let NodeData::Element(element_data) = child.data() {
56 let local_name: &str = &element_data.name.local;
57
58 if SECTIONING_ROOTS.binary_search(&local_name).is_ok() {
59 continue;
60 }
61
62 if let Some(sub_sectioning_type) =
63 SectioningType::from_sectioning_content_tag(local_name)
64 {
65 if let Some(sub_outline_structure) =
66 create_outline_structure(sub_sectioning_type, child, depth + 1, max_depth)
67 {
68 outline_structure.sub_outline_structures.push(sub_outline_structure);
69 }
70 } else if let Some(heading) = create_heading(child.clone(), depth + 1, max_depth) {
71 if find_heading {
72 outline_structure.heading = Some(heading);
73 } else {
74 let mut sub_outline_structure = OutlineStructure::new(SectioningType::Heading);
75
76 sub_outline_structure.heading = Some(heading);
77
78 outline_structure.sub_outline_structures.push(sub_outline_structure);
79 }
80 } else {
81 if let Some(sub_outline_structure) =
82 create_outline_structure(SectioningType::Root, child, depth + 1, max_depth)
83 {
84 if let Some(heading) = sub_outline_structure.heading {
85 if find_heading {
86 outline_structure.heading = Some(heading);
87 } else {
88 let mut sub_outline_structure =
89 OutlineStructure::new(SectioningType::Heading);
90
91 sub_outline_structure.heading = Some(heading);
92
93 outline_structure.sub_outline_structures.push(sub_outline_structure);
94 }
95 }
96
97 for os in sub_outline_structure.sub_outline_structures {
98 outline_structure.sub_outline_structures.push(os);
99 }
100 }
101
102 continue;
103 }
104
105 find_heading = false;
106 }
107 }
108
109 Some(outline_structure)
110}
111
112pub(crate) fn create_outline_structure_finding_body(
113 node: NodeRef,
114 depth: usize,
115 max_depth: usize,
116) -> Option<OutlineStructure> {
117 if depth > max_depth {
118 return None;
119 }
120
121 if let NodeData::Element(element_data) = node.data() {
122 let local_name: &str = &element_data.name.local;
123
124 if local_name == "body" {
125 return Some(
126 create_outline_structure(SectioningType::Body, node.clone(), depth, max_depth)
127 .unwrap_or_else(|| OutlineStructure::new(SectioningType::Root)),
128 );
129 }
130 }
131
132 for child in node.children() {
133 if let Some(outline_structure) =
134 create_outline_structure_finding_body(child, depth + 1, max_depth)
135 {
136 return Some(outline_structure);
137 }
138 }
139
140 None
141}