turndown/
node.rs

1use crate::utilities::{is_block, is_meaningful_when_blank, is_void, FlankingWhitespace};
2use std::collections::HashMap;
3
4/// Represents different types of DOM nodes
5#[derive(Clone, Copy, Debug, PartialEq, Eq)]
6pub enum NodeType {
7    Element,
8    Text,
9    Comment,
10    Document,
11    ProcessingInstruction,
12}
13
14/// Represents an HTML/DOM node with minimal stored state
15/// Computed properties are derived from node_name on-demand
16#[derive(Clone, Debug)]
17pub struct Node {
18    pub node_type: NodeType,
19    pub node_name: String,
20    pub node_value: String,
21    pub children: Vec<Node>,
22    pub attributes: HashMap<String, String>,
23    pub is_code: bool, // Only meaningful state derived from context
24}
25
26impl Node {
27    /// Creates a new element node
28    pub fn new_element(name: &str) -> Self {
29        Node {
30            node_type: NodeType::Element,
31            node_name: name.to_uppercase(),
32            node_value: String::new(),
33            children: Vec::new(),
34            attributes: HashMap::new(),
35            is_code: false,
36        }
37    }
38
39    /// Creates a new text node
40    pub fn new_text(value: &str) -> Self {
41        Node {
42            node_type: NodeType::Text,
43            node_name: "#text".to_string(),
44            node_value: value.to_string(),
45            children: Vec::new(),
46            attributes: HashMap::new(),
47            is_code: false,
48        }
49    }
50
51    /// Creates a new document node
52    pub fn new_document() -> Self {
53        Node {
54            node_type: NodeType::Document,
55            node_name: "#document".to_string(),
56            node_value: String::new(),
57            children: Vec::new(),
58            attributes: HashMap::new(),
59            is_code: false,
60        }
61    }
62
63    /// Creates a new comment node
64    pub fn new_comment(value: &str) -> Self {
65        Node {
66            node_type: NodeType::Comment,
67            node_name: "#comment".to_string(),
68            node_value: value.to_string(),
69            children: Vec::new(),
70            attributes: HashMap::new(),
71            is_code: false,
72        }
73    }
74
75    // Computed property methods (lazy evaluation)
76
77    /// Checks if this element is a block-level element
78    pub fn is_block(&self) -> bool {
79        is_block(&self.node_name)
80    }
81
82    /// Checks if this element is a void (self-closing) element
83    pub fn is_void(&self) -> bool {
84        is_void(&self.node_name)
85    }
86
87    /// Checks if this element is meaningful when blank
88    pub fn is_meaningful_when_blank(&self) -> bool {
89        is_meaningful_when_blank(&self.node_name)
90    }
91
92    /// Calculates if node is blank (empty or only whitespace/void elements)
93    pub fn is_blank(&self) -> bool {
94        if self.is_meaningful_when_blank() {
95            return false;
96        }
97
98        // Void elements with meaningful attributes are never blank
99        if self.is_void() {
100            if self.get_attribute("src").is_some()
101                || self.get_attribute("data").is_some()
102                || matches!(self.node_name.as_str(), "BR" | "HR")
103            {
104                return false;
105            }
106        }
107
108        let has_text_content = self
109            .children
110            .iter()
111            .any(|child| child.node_type == NodeType::Text && !child.node_value.trim().is_empty());
112
113        // Check if there are any void elements with meaningful attributes
114        let has_meaningful_void_children = self.children.iter().any(|child| {
115            child.node_type == NodeType::Element
116                && child.is_void()
117                && (child.get_attribute("src").is_some()
118                    || child.get_attribute("data").is_some()
119                    || matches!(child.node_name.as_str(), "BR" | "HR"))
120        });
121
122        // Element is blank if it has no text content AND no meaningful void children
123        let has_only_empty_void_children = self.children.iter().all(|child| {
124            if child.node_type == NodeType::Element && child.is_void() {
125                child.get_attribute("src").is_none()
126                    && child.get_attribute("data").is_none()
127                    && !matches!(child.node_name.as_str(), "BR" | "HR")
128            } else {
129                false
130            }
131        });
132
133        !has_text_content
134            && !has_meaningful_void_children
135            && (has_only_empty_void_children || self.children.is_empty())
136    }
137
138    /// Gets the flanking whitespace (leading/trailing whitespace)
139    pub fn flanking_whitespace(&self) -> FlankingWhitespace {
140        if self.node_type != NodeType::Element {
141            return FlankingWhitespace::new(String::new(), String::new());
142        }
143
144        let mut leading = String::new();
145        let mut trailing = String::new();
146
147        // Check for leading whitespace in first text child
148        if let Some(first_child) = self.children.first() {
149            if first_child.node_type == NodeType::Text {
150                let text = &first_child.node_value;
151                let trimmed = text.trim_start();
152                if trimmed.len() < text.len() {
153                    leading = text[..(text.len() - trimmed.len())].to_string();
154                }
155            }
156        }
157
158        // Check for trailing whitespace in last text child
159        if let Some(last_child) = self.children.last() {
160            if last_child.node_type == NodeType::Text {
161                let text = &last_child.node_value;
162                let trimmed = text.trim_end();
163                if trimmed.len() < text.len() {
164                    trailing = text[trimmed.len()..].to_string();
165                }
166            }
167        }
168
169        FlankingWhitespace::new(leading, trailing)
170    }
171
172    /// Gets an attribute value
173    pub fn get_attribute(&self, name: &str) -> Option<String> {
174        self.attributes.get(name).cloned()
175    }
176
177    /// Sets an attribute value
178    pub fn set_attribute(&mut self, name: &str, value: &str) {
179        self.attributes.insert(name.to_string(), value.to_string());
180    }
181
182    /// Adds a child node
183    pub fn add_child(&mut self, child: Node) {
184        self.children.push(child);
185    }
186
187    /// Checks if this node has any children
188    pub fn has_children(&self) -> bool {
189        !self.children.is_empty()
190    }
191
192    /// Gets the text content recursively
193    pub fn text_content(&self) -> String {
194        match self.node_type {
195            NodeType::Text => self.node_value.clone(),
196            NodeType::Element | NodeType::Document => self
197                .children
198                .iter()
199                .map(|c| c.text_content())
200                .collect::<Vec<_>>()
201                .join(""),
202            NodeType::Comment | NodeType::ProcessingInstruction => String::new(),
203        }
204    }
205
206    /// Converts node to outer HTML representation
207    pub fn to_outer_html(&self) -> String {
208        match self.node_type {
209            NodeType::Element => {
210                let mut html = format!("<{}", self.node_name.to_lowercase());
211                for (key, value) in &self.attributes {
212                    html.push_str(&format!(r#" {}="{}""#, key, value));
213                }
214                html.push('>');
215
216                for child in &self.children {
217                    html.push_str(&child.to_outer_html());
218                }
219
220                if !self.is_void() {
221                    html.push_str(&format!("</{}>", self.node_name.to_lowercase()));
222                }
223                html
224            }
225            NodeType::Text => self.node_value.clone(),
226            NodeType::Comment => format!("<!--{}-->", self.node_value),
227            NodeType::Document => self.children.iter().map(|c| c.to_outer_html()).collect(),
228            NodeType::ProcessingInstruction => String::new(),
229        }
230    }
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236
237    #[test]
238    fn test_new_element() {
239        let node = Node::new_element("div");
240        assert_eq!(node.node_type, NodeType::Element);
241        assert_eq!(node.node_name, "DIV");
242        assert!(node.is_block());
243    }
244
245    #[test]
246    fn test_new_text() {
247        let node = Node::new_text("Hello");
248        assert_eq!(node.node_type, NodeType::Text);
249        assert_eq!(node.node_value, "Hello");
250        assert!(!node.is_block());
251    }
252
253    #[test]
254    fn test_void_element() {
255        let node = Node::new_element("br");
256        assert!(node.is_void());
257    }
258
259    #[test]
260    fn test_add_child() {
261        let mut parent = Node::new_element("div");
262        let child = Node::new_text("child");
263        parent.add_child(child);
264        assert_eq!(parent.children.len(), 1);
265    }
266
267    #[test]
268    fn test_get_set_attribute() {
269        let mut node = Node::new_element("a");
270        node.set_attribute("href", "http://example.com");
271        assert_eq!(
272            node.get_attribute("href"),
273            Some("http://example.com".to_string())
274        );
275    }
276
277    #[test]
278    fn test_text_content() {
279        let mut parent = Node::new_element("p");
280        parent.add_child(Node::new_text("Hello "));
281        parent.add_child(Node::new_text("World"));
282        assert_eq!(parent.text_content(), "Hello World");
283    }
284
285    #[test]
286    fn test_is_blank() {
287        let node = Node::new_element("div");
288        assert!(node.is_blank());
289
290        let mut node_with_text = Node::new_element("p");
291        node_with_text.add_child(Node::new_text("content"));
292        assert!(!node_with_text.is_blank());
293    }
294}