Skip to main content

scrape_core/dom/
node.rs

1//! DOM node types and identifiers.
2
3use std::collections::HashMap;
4
5use super::tag_id::TagId;
6
7/// A node ID in the DOM tree.
8///
9/// This is an opaque handle to a node in the document.
10/// The inner value is `pub(crate)` to allow internal indexing while
11/// preventing external construction.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
13pub struct NodeId(pub(crate) usize);
14
15impl NodeId {
16    /// Creates a new node ID.
17    #[must_use]
18    pub(crate) const fn new(id: usize) -> Self {
19        Self(id)
20    }
21
22    /// Returns the raw ID value (for internal use).
23    #[must_use]
24    pub(crate) const fn index(self) -> usize {
25        self.0
26    }
27}
28
29/// Types of nodes in the DOM tree.
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub enum NodeKind {
32    /// Element node (e.g., `<div>`, `<span>`).
33    Element {
34        /// Interned tag identifier for fast comparison.
35        tag_id: TagId,
36        /// Tag name (lowercase).
37        name: String,
38        /// Element attributes.
39        attributes: HashMap<String, String>,
40    },
41    /// Text node.
42    Text {
43        /// Text content.
44        content: String,
45    },
46    /// Comment node.
47    Comment {
48        /// Comment content.
49        content: String,
50    },
51}
52
53impl NodeKind {
54    /// Returns the tag ID if this is an element node.
55    #[inline]
56    #[must_use]
57    pub fn tag_id(&self) -> Option<TagId> {
58        match self {
59            Self::Element { tag_id, .. } => Some(*tag_id),
60            _ => None,
61        }
62    }
63
64    /// Returns the tag name if this is an element node.
65    #[must_use]
66    pub fn tag_name(&self) -> Option<&str> {
67        match self {
68            Self::Element { name, .. } => Some(name),
69            _ => None,
70        }
71    }
72
73    /// Alias for [`tag_name`](Self::tag_name) for backwards compatibility.
74    #[must_use]
75    #[deprecated(since = "0.2.0", note = "use `tag_name()` instead")]
76    pub fn as_element_name(&self) -> Option<&str> {
77        self.tag_name()
78    }
79
80    /// Returns the attributes if this is an element node.
81    #[must_use]
82    pub fn attributes(&self) -> Option<&HashMap<String, String>> {
83        match self {
84            Self::Element { attributes, .. } => Some(attributes),
85            _ => None,
86        }
87    }
88
89    /// Returns true if this element has the given tag ID (fast path).
90    #[inline]
91    #[must_use]
92    pub fn is_tag(&self, tag_id: TagId) -> bool {
93        match self {
94            Self::Element { tag_id: id, .. } => *id == tag_id,
95            _ => false,
96        }
97    }
98
99    /// Returns the text content if this is a text node.
100    #[must_use]
101    pub fn as_text(&self) -> Option<&str> {
102        match self {
103            Self::Text { content } => Some(content),
104            _ => None,
105        }
106    }
107
108    /// Returns the comment content if this is a comment node.
109    #[must_use]
110    pub fn as_comment(&self) -> Option<&str> {
111        match self {
112            Self::Comment { content } => Some(content),
113            _ => None,
114        }
115    }
116
117    /// Returns `true` if this is an element node.
118    #[must_use]
119    pub const fn is_element(&self) -> bool {
120        matches!(self, Self::Element { .. })
121    }
122
123    /// Returns `true` if this is a text node.
124    #[must_use]
125    pub const fn is_text(&self) -> bool {
126        matches!(self, Self::Text { .. })
127    }
128
129    /// Returns `true` if this is a comment node.
130    #[must_use]
131    pub const fn is_comment(&self) -> bool {
132        matches!(self, Self::Comment { .. })
133    }
134}
135
136/// A node in the DOM tree.
137///
138/// Nodes are linked via `first_child`/`last_child` for parent-child relationships
139/// and `prev_sibling`/`next_sibling` for sibling relationships. This linked structure
140/// eliminates per-node `Vec` allocations and enables O(1) append operations.
141#[derive(Debug, Clone)]
142pub struct Node {
143    /// The kind of node (element, text, or comment).
144    pub kind: NodeKind,
145    /// Parent node, if any.
146    pub parent: Option<NodeId>,
147    /// First child node.
148    pub first_child: Option<NodeId>,
149    /// Last child node.
150    pub last_child: Option<NodeId>,
151    /// Previous sibling.
152    pub prev_sibling: Option<NodeId>,
153    /// Next sibling.
154    pub next_sibling: Option<NodeId>,
155}
156
157impl Node {
158    /// Creates a new element node.
159    #[must_use]
160    pub fn element(name: impl Into<String>, attributes: HashMap<String, String>) -> Self {
161        let name = name.into();
162        let tag_id = TagId::from_name(&name);
163        Self {
164            kind: NodeKind::Element { tag_id, name, attributes },
165            parent: None,
166            first_child: None,
167            last_child: None,
168            prev_sibling: None,
169            next_sibling: None,
170        }
171    }
172
173    /// Creates a new text node.
174    #[must_use]
175    pub fn text(content: impl Into<String>) -> Self {
176        Self {
177            kind: NodeKind::Text { content: content.into() },
178            parent: None,
179            first_child: None,
180            last_child: None,
181            prev_sibling: None,
182            next_sibling: None,
183        }
184    }
185
186    /// Creates a new comment node.
187    #[must_use]
188    pub fn comment(content: impl Into<String>) -> Self {
189        Self {
190            kind: NodeKind::Comment { content: content.into() },
191            parent: None,
192            first_child: None,
193            last_child: None,
194            prev_sibling: None,
195            next_sibling: None,
196        }
197    }
198}
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203
204    #[test]
205    fn node_id_equality() {
206        let id1 = NodeId::new(42);
207        let id2 = NodeId::new(42);
208        let id3 = NodeId::new(43);
209        assert_eq!(id1, id2);
210        assert_ne!(id1, id3);
211    }
212
213    #[test]
214    fn node_kind_element() {
215        let kind = NodeKind::Element {
216            tag_id: TagId::Div,
217            name: "div".into(),
218            attributes: HashMap::new(),
219        };
220        assert!(kind.is_element());
221        assert!(!kind.is_text());
222        assert!(!kind.is_comment());
223        assert_eq!(kind.tag_name(), Some("div"));
224        assert_eq!(kind.tag_id(), Some(TagId::Div));
225    }
226
227    #[test]
228    fn node_kind_text() {
229        let kind = NodeKind::Text { content: "Hello".into() };
230        assert!(!kind.is_element());
231        assert!(kind.is_text());
232        assert!(!kind.is_comment());
233        assert_eq!(kind.as_text(), Some("Hello"));
234    }
235
236    #[test]
237    fn node_kind_comment() {
238        let kind = NodeKind::Comment { content: "A comment".into() };
239        assert!(!kind.is_element());
240        assert!(!kind.is_text());
241        assert!(kind.is_comment());
242        assert_eq!(kind.as_comment(), Some("A comment"));
243    }
244
245    #[test]
246    fn node_element_constructor() {
247        let node = Node::element("div", HashMap::new());
248        assert!(node.kind.is_element());
249        assert!(node.parent.is_none());
250        assert!(node.first_child.is_none());
251        assert!(node.last_child.is_none());
252        assert!(node.prev_sibling.is_none());
253        assert!(node.next_sibling.is_none());
254    }
255
256    #[test]
257    fn node_text_constructor() {
258        let node = Node::text("Hello");
259        assert!(node.kind.is_text());
260        assert_eq!(node.kind.as_text(), Some("Hello"));
261    }
262
263    #[test]
264    fn node_comment_constructor() {
265        let node = Node::comment("A comment");
266        assert!(node.kind.is_comment());
267        assert_eq!(node.kind.as_comment(), Some("A comment"));
268    }
269}