html_outliner/
outline_structure.rs

1use kuchiki::{parse_html, traits::TendrilSink, NodeData, NodeRef};
2
3use crate::{heading::*, sectioning_type::SectioningType};
4
5const SECTIONING_ROOTS: [&str; 7] =
6    ["blockquote", "body", "details", "dialog", "fieldset", "figure", "td"];
7
8#[derive(Debug, Clone)]
9pub struct OutlineStructure {
10    pub sectioning_type:        SectioningType,
11    pub heading:                Option<Heading>,
12    pub sub_outline_structures: Vec<OutlineStructure>,
13}
14
15impl OutlineStructure {
16    #[inline]
17    pub(crate) fn new(sectioning_type: SectioningType) -> OutlineStructure {
18        OutlineStructure {
19            sectioning_type,
20            heading: None,
21            sub_outline_structures: Vec::new(),
22        }
23    }
24
25    #[inline]
26    pub fn parse_html<S: AsRef<str>>(html: S, max_depth: usize) -> OutlineStructure {
27        let node = parse_html().one(html.as_ref());
28
29        if let Some(outline_structure) =
30            create_outline_structure_finding_body(node.clone(), 0, max_depth)
31        {
32            outline_structure
33        } else {
34            create_outline_structure(SectioningType::Root, node, 0, max_depth)
35                .unwrap_or_else(|| OutlineStructure::new(SectioningType::Root))
36        }
37    }
38}
39
40pub(crate) fn create_outline_structure(
41    sectioning_type: SectioningType,
42    node: NodeRef,
43    depth: usize,
44    max_depth: usize,
45) -> Option<OutlineStructure> {
46    if depth > max_depth {
47        return None;
48    }
49
50    let mut outline_structure = OutlineStructure::new(sectioning_type);
51
52    let mut find_heading = true;
53
54    for child in node.children() {
55        if let NodeData::Element(element_data) = child.data() {
56            let local_name: &str = &element_data.name.local;
57
58            if SECTIONING_ROOTS.binary_search(&local_name).is_ok() {
59                continue;
60            }
61
62            if let Some(sub_sectioning_type) =
63                SectioningType::from_sectioning_content_tag(local_name)
64            {
65                if let Some(sub_outline_structure) =
66                    create_outline_structure(sub_sectioning_type, child, depth + 1, max_depth)
67                {
68                    outline_structure.sub_outline_structures.push(sub_outline_structure);
69                }
70            } else if let Some(heading) = create_heading(child.clone(), depth + 1, max_depth) {
71                if find_heading {
72                    outline_structure.heading = Some(heading);
73                } else {
74                    let mut sub_outline_structure = OutlineStructure::new(SectioningType::Heading);
75
76                    sub_outline_structure.heading = Some(heading);
77
78                    outline_structure.sub_outline_structures.push(sub_outline_structure);
79                }
80            } else {
81                if let Some(sub_outline_structure) =
82                    create_outline_structure(SectioningType::Root, child, depth + 1, max_depth)
83                {
84                    if let Some(heading) = sub_outline_structure.heading {
85                        if find_heading {
86                            outline_structure.heading = Some(heading);
87                        } else {
88                            let mut sub_outline_structure =
89                                OutlineStructure::new(SectioningType::Heading);
90
91                            sub_outline_structure.heading = Some(heading);
92
93                            outline_structure.sub_outline_structures.push(sub_outline_structure);
94                        }
95                    }
96
97                    for os in sub_outline_structure.sub_outline_structures {
98                        outline_structure.sub_outline_structures.push(os);
99                    }
100                }
101
102                continue;
103            }
104
105            find_heading = false;
106        }
107    }
108
109    Some(outline_structure)
110}
111
112pub(crate) fn create_outline_structure_finding_body(
113    node: NodeRef,
114    depth: usize,
115    max_depth: usize,
116) -> Option<OutlineStructure> {
117    if depth > max_depth {
118        return None;
119    }
120
121    if let NodeData::Element(element_data) = node.data() {
122        let local_name: &str = &element_data.name.local;
123
124        if local_name == "body" {
125            return Some(
126                create_outline_structure(SectioningType::Body, node.clone(), depth, max_depth)
127                    .unwrap_or_else(|| OutlineStructure::new(SectioningType::Root)),
128            );
129        }
130    }
131
132    for child in node.children() {
133        if let Some(outline_structure) =
134            create_outline_structure_finding_body(child, depth + 1, max_depth)
135        {
136            return Some(outline_structure);
137        }
138    }
139
140    None
141}