1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
extern crate kuchiki;

use crate::heading::*;
use crate::sectioning_type::SectioningType;

use kuchiki::traits::TendrilSink;
use kuchiki::{parse_html, NodeData, NodeRef};

const SECTIONING_ROOTS: [&str; 7] =
    ["blockquote", "body", "details", "dialog", "fieldset", "figure", "td"];

#[derive(Debug, Clone)]
pub struct OutlineStructure {
    pub sectioning_type: SectioningType,
    pub heading: Option<Heading>,
    pub sub_outline_structures: Vec<OutlineStructure>,
}

impl OutlineStructure {
    #[inline]
    pub(crate) fn new(sectioning_type: SectioningType) -> OutlineStructure {
        OutlineStructure {
            sectioning_type,
            heading: None,
            sub_outline_structures: Vec::new(),
        }
    }

    #[inline]
    pub fn parse_html<S: AsRef<str>>(html: S, max_depth: usize) -> OutlineStructure {
        let node = parse_html().one(html.as_ref());

        if let Some(outline_structure) =
            create_outline_structure_finding_body(node.clone(), 0, max_depth)
        {
            outline_structure
        } else {
            create_outline_structure(SectioningType::Root, node, 0, max_depth)
                .unwrap_or_else(|| OutlineStructure::new(SectioningType::Root))
        }
    }
}

pub(crate) fn create_outline_structure(
    sectioning_type: SectioningType,
    node: NodeRef,
    depth: usize,
    max_depth: usize,
) -> Option<OutlineStructure> {
    if depth > max_depth {
        return None;
    }

    let mut outline_structure = OutlineStructure::new(sectioning_type);

    let mut find_heading = true;

    for child in node.children() {
        if let NodeData::Element(element_data) = child.data() {
            let local_name: &str = &element_data.name.local;

            if SECTIONING_ROOTS.binary_search(&local_name).is_ok() {
                continue;
            }

            if let Some(sub_sectioning_type) =
                SectioningType::from_sectioning_content_tag(local_name)
            {
                if let Some(sub_outline_structure) =
                    create_outline_structure(sub_sectioning_type, child, depth + 1, max_depth)
                {
                    outline_structure.sub_outline_structures.push(sub_outline_structure);
                }
            } else if let Some(heading) = create_heading(child.clone(), depth + 1, max_depth) {
                if find_heading {
                    outline_structure.heading = Some(heading);
                } else {
                    let mut sub_outline_structure = OutlineStructure::new(SectioningType::Heading);

                    sub_outline_structure.heading = Some(heading);

                    outline_structure.sub_outline_structures.push(sub_outline_structure);
                }
            } else {
                if let Some(sub_outline_structure) =
                    create_outline_structure(SectioningType::Root, child, depth + 1, max_depth)
                {
                    if let Some(heading) = sub_outline_structure.heading {
                        if find_heading {
                            outline_structure.heading = Some(heading);
                        } else {
                            let mut sub_outline_structure =
                                OutlineStructure::new(SectioningType::Heading);

                            sub_outline_structure.heading = Some(heading);

                            outline_structure.sub_outline_structures.push(sub_outline_structure);
                        }
                    }

                    for os in sub_outline_structure.sub_outline_structures {
                        outline_structure.sub_outline_structures.push(os);
                    }
                }

                continue;
            }

            find_heading = false;
        }
    }

    Some(outline_structure)
}

pub(crate) fn create_outline_structure_finding_body(
    node: NodeRef,
    depth: usize,
    max_depth: usize,
) -> Option<OutlineStructure> {
    if depth > max_depth {
        return None;
    }

    if let NodeData::Element(element_data) = node.data() {
        let local_name: &str = &element_data.name.local;

        if local_name == "body" {
            return Some(
                create_outline_structure(SectioningType::Body, node.clone(), depth, max_depth)
                    .unwrap_or_else(|| OutlineStructure::new(SectioningType::Root)),
            );
        }
    }

    for child in node.children() {
        if let Some(outline_structure) =
            create_outline_structure_finding_body(child, depth + 1, max_depth)
        {
            return Some(outline_structure);
        }
    }

    None
}