1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
//! This module contains enums and structs used in the library.

use std::collections::HashMap;

/// Represents the different types of HTML elements that the library supports.
#[derive(Debug, PartialEq, Eq, Clone, Default)]
pub enum NodeType {
    Html,
    Head,
    Style,
    Link,
    Script,
    Meta,
    Title,
    Body,
    H1,
    H2,
    H3,
    H4,
    H5,
    H6,
    P,
    Div,
    Strong,
    Em,
    A,
    Ul,
    Ol,
    Li,
    Pre,
    Code,
    Hr,
    Br,
    Blockquote,
    #[default]
    Text,
    Comment,
    Unknown(String),
}

impl NodeType {
    pub fn is_special_tag(&self) -> bool {
        use NodeType::*;
        match self {
            Blockquote | Ul | Ol => true,
            _ => false,
        }
    }

    pub fn from_str(input: &str) -> Self {
        use NodeType::*;
        match input.to_lowercase().as_str() {
            "html" => Html,
            "head" => Head,
            "style" => Style,
            "link" => Link,
            "script" => Script,
            "meta" => Meta,
            "title" => Title,
            "body" => Body,
            "h1" => H1,
            "h2" => H2,
            "h3" => H3,
            "h4" => H4,
            "h5" => H5,
            "h6" => H6,
            "p" => P,
            "div" => Div,
            "strong" => Strong,
            "em" => Em,
            "a" => A,
            "ul" => Ul,
            "ol" => Ol,
            "li" => Li,
            "pre" => Pre,
            "code" => Code,
            "hr" => Hr,
            "br" => Br,
            "blockquote" => Blockquote,
            unknown => Unknown(unknown.to_string()),
        }
    }
}

/// Represents a node in the HTML tree.
#[derive(Debug, PartialEq, Eq, Clone, Default)]
pub struct Node {
    pub tag_name: Option<NodeType>,
    pub value: Option<String>,
    pub attributes: Option<HashMap<String, String>>,
    pub within_special_tag: Option<Vec<NodeType>>,
    pub children: Vec<Node>,
}

impl Node {
    /// Checks whether the node is within any of the special tags passed in
    pub fn is_in_special_tag(&self, tags: &[NodeType]) -> bool {
        if let Some(within_special_tag) = &self.within_special_tag {
            within_special_tag.iter().any(|tag| tags.contains(tag))
        } else {
            false
        }
    }

    /// Returns the leading spaces if there is any
    /// This is used to format the output of the unordered and ordered lists
    pub fn leading_spaces(&self) -> String {
        let ul_or_ol = &[NodeType::Ul, NodeType::Ol];
        if let Some(within_special_tag) = &self.within_special_tag {
            " ".repeat(
                (within_special_tag
                    .iter()
                    .filter(|tag| ul_or_ol.contains(tag))
                    .count()
                    - 1)
                    * 2,
            )
        } else {
            String::new()
        }
    }

    /// Creates a new Node from tag_name, value, attributes, within_special_tag and children
    pub fn new(
        tag_name: Option<NodeType>,
        value: Option<String>,
        attributes: Option<HashMap<String, String>>,
        within_special_tag: Option<Vec<NodeType>>,
        children: Vec<Node>,
    ) -> Self {
        Node {
            tag_name,
            value,
            attributes,
            within_special_tag,
            children,
        }
    }
}