scrape_core/dom/
node.rs

1//! DOM node types and identifiers.
2
3use std::collections::HashMap;
4
5/// A node ID in the DOM tree.
6///
7/// This is an opaque handle to a node in the document.
8/// The inner value is `pub(crate)` to allow internal indexing while
9/// preventing external construction.
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
11pub struct NodeId(pub(crate) usize);
12
13impl NodeId {
14    /// Creates a new node ID.
15    #[must_use]
16    pub(crate) const fn new(id: usize) -> Self {
17        Self(id)
18    }
19
20    /// Returns the raw ID value (for internal use).
21    #[must_use]
22    pub(crate) const fn index(self) -> usize {
23        self.0
24    }
25}
26
27/// Types of nodes in the DOM tree.
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum NodeKind {
30    /// Element node (e.g., `<div>`, `<span>`).
31    Element {
32        /// Tag name (lowercase).
33        name: String,
34        /// Element attributes.
35        attributes: HashMap<String, String>,
36    },
37    /// Text node.
38    Text {
39        /// Text content.
40        content: String,
41    },
42    /// Comment node.
43    Comment {
44        /// Comment content.
45        content: String,
46    },
47}
48
49impl NodeKind {
50    /// Returns the tag name if this is an element node.
51    #[must_use]
52    pub fn tag_name(&self) -> Option<&str> {
53        match self {
54            Self::Element { name, .. } => Some(name),
55            _ => None,
56        }
57    }
58
59    /// Alias for [`tag_name`](Self::tag_name) for backwards compatibility.
60    #[must_use]
61    #[deprecated(since = "0.2.0", note = "use `tag_name()` instead")]
62    pub fn as_element_name(&self) -> Option<&str> {
63        self.tag_name()
64    }
65
66    /// Returns the attributes if this is an element node.
67    #[must_use]
68    pub fn attributes(&self) -> Option<&HashMap<String, String>> {
69        match self {
70            Self::Element { attributes, .. } => Some(attributes),
71            _ => None,
72        }
73    }
74
75    /// Returns the text content if this is a text node.
76    #[must_use]
77    pub fn as_text(&self) -> Option<&str> {
78        match self {
79            Self::Text { content } => Some(content),
80            _ => None,
81        }
82    }
83
84    /// Returns the comment content if this is a comment node.
85    #[must_use]
86    pub fn as_comment(&self) -> Option<&str> {
87        match self {
88            Self::Comment { content } => Some(content),
89            _ => None,
90        }
91    }
92
93    /// Returns `true` if this is an element node.
94    #[must_use]
95    pub const fn is_element(&self) -> bool {
96        matches!(self, Self::Element { .. })
97    }
98
99    /// Returns `true` if this is a text node.
100    #[must_use]
101    pub const fn is_text(&self) -> bool {
102        matches!(self, Self::Text { .. })
103    }
104
105    /// Returns `true` if this is a comment node.
106    #[must_use]
107    pub const fn is_comment(&self) -> bool {
108        matches!(self, Self::Comment { .. })
109    }
110}
111
112/// A node in the DOM tree.
113///
114/// Nodes are linked via `first_child`/`last_child` for parent-child relationships
115/// and `prev_sibling`/`next_sibling` for sibling relationships. This linked structure
116/// eliminates per-node `Vec` allocations and enables O(1) append operations.
117#[derive(Debug, Clone)]
118pub struct Node {
119    /// The kind of node (element, text, or comment).
120    pub kind: NodeKind,
121    /// Parent node, if any.
122    pub parent: Option<NodeId>,
123    /// First child node.
124    pub first_child: Option<NodeId>,
125    /// Last child node.
126    pub last_child: Option<NodeId>,
127    /// Previous sibling.
128    pub prev_sibling: Option<NodeId>,
129    /// Next sibling.
130    pub next_sibling: Option<NodeId>,
131}
132
133impl Node {
134    /// Creates a new element node.
135    #[must_use]
136    pub fn element(name: impl Into<String>, attributes: HashMap<String, String>) -> Self {
137        Self {
138            kind: NodeKind::Element { name: name.into(), attributes },
139            parent: None,
140            first_child: None,
141            last_child: None,
142            prev_sibling: None,
143            next_sibling: None,
144        }
145    }
146
147    /// Creates a new text node.
148    #[must_use]
149    pub fn text(content: impl Into<String>) -> Self {
150        Self {
151            kind: NodeKind::Text { content: content.into() },
152            parent: None,
153            first_child: None,
154            last_child: None,
155            prev_sibling: None,
156            next_sibling: None,
157        }
158    }
159
160    /// Creates a new comment node.
161    #[must_use]
162    pub fn comment(content: impl Into<String>) -> Self {
163        Self {
164            kind: NodeKind::Comment { content: content.into() },
165            parent: None,
166            first_child: None,
167            last_child: None,
168            prev_sibling: None,
169            next_sibling: None,
170        }
171    }
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn node_id_equality() {
180        let id1 = NodeId::new(42);
181        let id2 = NodeId::new(42);
182        let id3 = NodeId::new(43);
183        assert_eq!(id1, id2);
184        assert_ne!(id1, id3);
185    }
186
187    #[test]
188    fn node_kind_element() {
189        let kind = NodeKind::Element { name: "div".into(), attributes: HashMap::new() };
190        assert!(kind.is_element());
191        assert!(!kind.is_text());
192        assert!(!kind.is_comment());
193        assert_eq!(kind.tag_name(), Some("div"));
194    }
195
196    #[test]
197    fn node_kind_text() {
198        let kind = NodeKind::Text { content: "Hello".into() };
199        assert!(!kind.is_element());
200        assert!(kind.is_text());
201        assert!(!kind.is_comment());
202        assert_eq!(kind.as_text(), Some("Hello"));
203    }
204
205    #[test]
206    fn node_kind_comment() {
207        let kind = NodeKind::Comment { content: "A comment".into() };
208        assert!(!kind.is_element());
209        assert!(!kind.is_text());
210        assert!(kind.is_comment());
211        assert_eq!(kind.as_comment(), Some("A comment"));
212    }
213
214    #[test]
215    fn node_element_constructor() {
216        let node = Node::element("div", HashMap::new());
217        assert!(node.kind.is_element());
218        assert!(node.parent.is_none());
219        assert!(node.first_child.is_none());
220        assert!(node.last_child.is_none());
221        assert!(node.prev_sibling.is_none());
222        assert!(node.next_sibling.is_none());
223    }
224
225    #[test]
226    fn node_text_constructor() {
227        let node = Node::text("Hello");
228        assert!(node.kind.is_text());
229        assert_eq!(node.kind.as_text(), Some("Hello"));
230    }
231
232    #[test]
233    fn node_comment_constructor() {
234        let node = Node::comment("A comment");
235        assert!(node.kind.is_comment());
236        assert_eq!(node.kind.as_comment(), Some("A comment"));
237    }
238}