html-outliner 0.1.6

Outline HTML documents for better SEO.
Documentation
use kuchiki::{NodeData, NodeRef};
use once_cell::sync::Lazy;
use regex::Regex;

static RE_NEW_LINE: Lazy<Regex> = Lazy::new(|| Regex::new("\n[\n\t ]*").unwrap());

#[derive(Debug, Clone)]
pub enum Heading {
    Header { level: u8, text: String },
    Group(Vec<Heading>),
}

impl Heading {
    #[inline]
    pub fn get_end_level(&self) -> u8 {
        match self {
            Heading::Header {
                level, ..
            } => *level,
            Heading::Group(headings) => headings[headings.len() - 1].get_end_level(),
        }
    }

    #[inline]
    pub fn get_start_level(&self) -> u8 {
        match self {
            Heading::Header {
                level, ..
            } => *level,
            Heading::Group(headings) => headings[0].get_end_level(),
        }
    }
}

pub(crate) fn create_heading(node: NodeRef, depth: usize, max_depth: usize) -> Option<Heading> {
    if depth > max_depth {
        return None;
    }

    let mut heading = if let NodeData::Element(element_data) = node.data() {
        let local_name: &str = &element_data.name.local;

        let local_name_length = local_name.len();

        match local_name_length {
            2 => {
                if let Some(stripped_local_name) = local_name.strip_prefix('h') {
                    match stripped_local_name.parse::<u8>() {
                        Ok(level) if (1..=6).contains(&level) => Heading::Header {
                            level,
                            text: String::new(),
                        },
                        _ => return None,
                    }
                } else {
                    return None;
                }
            },
            6 => {
                if local_name.eq("hgroup") {
                    Heading::Group(Vec::with_capacity(2))
                } else {
                    return None;
                }
            },
            _ => return None,
        }
    } else {
        return None;
    };

    match &mut heading {
        Heading::Header {
            text, ..
        } => {
            for child in node.children() {
                create_text(text, child, depth + 1, max_depth);
            }
        },
        Heading::Group(headings) => {
            for child in node.children() {
                if let Some(heading) = create_heading(child, depth + 1, max_depth) {
                    headings.push(heading);
                }
            }

            if headings.is_empty() {
                return None;
            }
        },
    }

    Some(heading)
}

impl From<Heading> for String {
    #[inline]
    fn from(heading: Heading) -> String {
        match heading {
            Heading::Header {
                text, ..
            } => text,
            Heading::Group(headings) => {
                let mut iter = headings.into_iter();

                let mut text: String = iter.next().unwrap().into();

                for heading in iter {
                    text.push_str("");

                    let t: String = heading.into();
                    text.push_str(&t);
                }

                text
            },
        }
    }
}

#[inline]
pub(crate) fn create_text(text: &mut String, node: NodeRef, depth: usize, max_depth: usize) {
    if depth > max_depth {
        return;
    }

    if let NodeData::Text(t) = node.data() {
        let t = t.borrow();

        text.push_str(RE_NEW_LINE.replace(t.as_str(), " ").trim());
    } else {
        for child in node.children() {
            create_text(text, child, depth + 1, max_depth);
        }
    }
}