1use kuchiki::{NodeData, NodeRef};
2use once_cell::sync::Lazy;
3use regex::Regex;
4
5static RE_NEW_LINE: Lazy<Regex> = Lazy::new(|| Regex::new("\n[\n\t ]*").unwrap());
6
7#[derive(Debug, Clone)]
8pub enum Heading {
9 Header { level: u8, text: String },
10 Group(Vec<Heading>),
11}
12
13impl Heading {
14 #[inline]
15 pub fn get_end_level(&self) -> u8 {
16 match self {
17 Heading::Header {
18 level, ..
19 } => *level,
20 Heading::Group(headings) => headings[headings.len() - 1].get_end_level(),
21 }
22 }
23
24 #[inline]
25 pub fn get_start_level(&self) -> u8 {
26 match self {
27 Heading::Header {
28 level, ..
29 } => *level,
30 Heading::Group(headings) => headings[0].get_end_level(),
31 }
32 }
33}
34
35pub(crate) fn create_heading(node: NodeRef, depth: usize, max_depth: usize) -> Option<Heading> {
36 if depth > max_depth {
37 return None;
38 }
39
40 let mut heading = if let NodeData::Element(element_data) = node.data() {
41 let local_name: &str = &element_data.name.local;
42
43 let local_name_length = local_name.len();
44
45 match local_name_length {
46 2 => {
47 if let Some(stripped_local_name) = local_name.strip_prefix('h') {
48 match stripped_local_name.parse::<u8>() {
49 Ok(level) if (1..=6).contains(&level) => Heading::Header {
50 level,
51 text: String::new(),
52 },
53 _ => return None,
54 }
55 } else {
56 return None;
57 }
58 },
59 6 => {
60 if local_name.eq("hgroup") {
61 Heading::Group(Vec::with_capacity(2))
62 } else {
63 return None;
64 }
65 },
66 _ => return None,
67 }
68 } else {
69 return None;
70 };
71
72 match &mut heading {
73 Heading::Header {
74 text, ..
75 } => {
76 for child in node.children() {
77 create_text(text, child, depth + 1, max_depth);
78 }
79 },
80 Heading::Group(headings) => {
81 for child in node.children() {
82 if let Some(heading) = create_heading(child, depth + 1, max_depth) {
83 headings.push(heading);
84 }
85 }
86
87 if headings.is_empty() {
88 return None;
89 }
90 },
91 }
92
93 Some(heading)
94}
95
96impl From<Heading> for String {
97 #[inline]
98 fn from(heading: Heading) -> String {
99 match heading {
100 Heading::Header {
101 text, ..
102 } => text,
103 Heading::Group(headings) => {
104 let mut iter = headings.into_iter();
105
106 let mut text: String = iter.next().unwrap().into();
107
108 for heading in iter {
109 text.push_str(" — ");
110
111 let t: String = heading.into();
112 text.push_str(&t);
113 }
114
115 text
116 },
117 }
118 }
119}
120
121#[inline]
122pub(crate) fn create_text(text: &mut String, node: NodeRef, depth: usize, max_depth: usize) {
123 if depth > max_depth {
124 return;
125 }
126
127 if let NodeData::Text(t) = node.data() {
128 let t = t.borrow();
129
130 text.push_str(RE_NEW_LINE.replace(t.as_str(), " ").trim());
131 } else {
132 for child in node.children() {
133 create_text(text, child, depth + 1, max_depth);
134 }
135 }
136}