1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
extern crate kuchiki;
extern crate regex;

use kuchiki::{NodeData, NodeRef};

use regex::Regex;

lazy_static! {
    static ref RE_NEW_LINE: Regex = Regex::new("\n[\n\t ]*").unwrap();
}

#[derive(Debug, Clone)]
pub enum Heading {
    Header {
        level: u8,
        text: String,
    },
    Group(Vec<Heading>),
}

impl Heading {
    #[inline]
    pub fn get_end_level(&self) -> u8 {
        match self {
            Heading::Header {
                level,
                ..
            } => *level,
            Heading::Group(headings) => headings[headings.len() - 1].get_end_level(),
        }
    }

    #[inline]
    pub fn get_start_level(&self) -> u8 {
        match self {
            Heading::Header {
                level,
                ..
            } => *level,
            Heading::Group(headings) => headings[0].get_end_level(),
        }
    }
}

pub(crate) fn create_heading(node: NodeRef, depth: usize, max_depth: usize) -> Option<Heading> {
    if depth > max_depth {
        return None;
    }

    let mut heading = if let NodeData::Element(element_data) = node.data() {
        let local_name: &str = &element_data.name.local;

        let local_name_length = local_name.len();

        match local_name_length {
            2 => {
                if local_name.starts_with('h') {
                    match local_name[1..].parse::<u8>() {
                        Ok(level) if level >= 1 && level <= 6 => {
                            Heading::Header {
                                level,
                                text: String::new(),
                            }
                        }
                        _ => return None,
                    }
                } else {
                    return None;
                }
            }
            6 => {
                if local_name.eq("hgroup") {
                    Heading::Group(Vec::with_capacity(2))
                } else {
                    return None;
                }
            }
            _ => return None,
        }
    } else {
        return None;
    };

    match &mut heading {
        Heading::Header {
            text,
            ..
        } => {
            for child in node.children() {
                create_text(text, child, depth + 1, max_depth);
            }
        }
        Heading::Group(headings) => {
            for child in node.children() {
                if let Some(heading) = create_heading(child, depth + 1, max_depth) {
                    headings.push(heading);
                }
            }

            if headings.is_empty() {
                return None;
            }
        }
    }

    Some(heading)
}

impl Into<String> for Heading {
    #[inline]
    fn into(self) -> String {
        match self {
            Heading::Header {
                text,
                ..
            } => text,
            Heading::Group(headings) => {
                let mut iter = headings.into_iter();

                let mut text: String = iter.next().unwrap().into();

                for heading in iter {
                    text.push_str(" — ");

                    let t: String = heading.into();
                    text.push_str(&t);
                }

                text
            }
        }
    }
}

#[inline]
pub(crate) fn create_text(text: &mut String, node: NodeRef, depth: usize, max_depth: usize) {
    if depth > max_depth {
        return;
    }

    if let NodeData::Text(t) = node.data() {
        let t = t.borrow();

        text.push_str(RE_NEW_LINE.replace(t.as_str(), " ").trim());
    } else {
        for child in node.children() {
            create_text(text, child, depth + 1, max_depth);
        }
    }
}