Skip to main content

scrape_core/dom/
document.rs

1//! Document container and tree operations.
2
3use std::{collections::HashMap, marker::PhantomData};
4
5use super::{
6    arena::Arena,
7    index::DocumentIndex,
8    node::{Node, NodeId},
9    state::{Building, DocumentState, MutableState, Queryable, QueryableState, Sealed},
10};
11
12/// An HTML document containing a tree of nodes.
13///
14/// The document owns all nodes via an arena allocator, ensuring
15/// cache-friendly contiguous storage. Navigation is performed
16/// using [`NodeId`] handles.
17///
18/// # Architecture
19///
20/// Nodes are stored in a single contiguous `Arena<Node>`. Parent-child
21/// relationships use `first_child`/`last_child` links (O(1) append),
22/// and siblings are doubly linked via `prev_sibling`/`next_sibling`.
23///
24/// # Navigation
25///
26/// The document provides both direct navigation methods and lazy iterators:
27///
28/// - [`parent`](Document::parent), [`first_child`](Document::first_child),
29///   [`last_child`](Document::last_child) - direct links
30/// - [`children`](Document::children) - iterate over direct children
31/// - [`ancestors`](Document::ancestors) - iterate from parent to root
32/// - [`descendants`](Document::descendants) - depth-first subtree traversal
33///
34/// # Typestate
35///
36/// Internally, the document uses typestate pattern to enforce lifecycle
37/// guarantees at compile time. The type parameter `S` tracks whether
38/// the document is being built, is queryable, or is sealed.
39#[derive(Debug)]
40pub struct DocumentImpl<S: DocumentState = Queryable> {
41    arena: Arena<Node>,
42    root: Option<NodeId>,
43    index: Option<DocumentIndex>,
44    _state: PhantomData<S>,
45}
46
47/// Public alias for backward compatibility.
48///
49/// The public `Document` type always refers to a queryable document.
50/// Internally, we use `DocumentImpl<S>` for typestate enforcement.
51pub type Document = DocumentImpl<Queryable>;
52
53// ==================== Default Implementations ====================
54
55impl Default for DocumentImpl<Building> {
56    fn default() -> Self {
57        Self::new()
58    }
59}
60
61impl Default for Document {
62    fn default() -> Self {
63        DocumentImpl::<Building>::new().build()
64    }
65}
66
67// ==================== Building State ====================
68
69impl DocumentImpl<Building> {
70    /// Creates a new empty document in building state.
71    ///
72    /// The default capacity is 256 nodes, which is sufficient for typical HTML pages
73    /// and reduces reallocations during parsing.
74    #[must_use]
75    pub fn new() -> Self {
76        Self::with_capacity(256)
77    }
78
79    /// Creates a new empty document with the specified capacity.
80    ///
81    /// Use this when you know the approximate number of nodes to avoid reallocations.
82    #[must_use]
83    pub fn with_capacity(capacity: usize) -> Self {
84        Self { arena: Arena::with_capacity(capacity), root: None, index: None, _state: PhantomData }
85    }
86
87    /// Sets the root node ID.
88    pub fn set_root(&mut self, id: NodeId) {
89        self.root = Some(id);
90    }
91
92    /// Creates a new element node and returns its ID.
93    pub fn create_element(
94        &mut self,
95        name: impl Into<String>,
96        attributes: HashMap<String, String>,
97    ) -> NodeId {
98        NodeId::new(self.arena.alloc(Node::element(name, attributes)))
99    }
100
101    /// Creates a new text node and returns its ID.
102    pub fn create_text(&mut self, content: impl Into<String>) -> NodeId {
103        NodeId::new(self.arena.alloc(Node::text(content)))
104    }
105
106    /// Creates a new comment node and returns its ID.
107    pub fn create_comment(&mut self, content: impl Into<String>) -> NodeId {
108        NodeId::new(self.arena.alloc(Node::comment(content)))
109    }
110
111    /// Appends a child node to a parent.
112    ///
113    /// Updates parent, `first_child`, `last_child`, and sibling links.
114    ///
115    /// # Panics
116    ///
117    /// Panics in debug builds if `parent_id` or `child_id` are invalid.
118    pub fn append_child(&mut self, parent_id: NodeId, child_id: NodeId) {
119        debug_assert!(parent_id.index() < self.arena.len(), "Invalid parent_id");
120        debug_assert!(child_id.index() < self.arena.len(), "Invalid child_id");
121
122        // Get current last child of parent
123        let prev_last = self.arena.get(parent_id.index()).and_then(|p| p.last_child);
124
125        // Update child's parent and prev_sibling
126        if let Some(child) = self.arena.get_mut(child_id.index()) {
127            child.parent = Some(parent_id);
128            child.prev_sibling = prev_last;
129            child.next_sibling = None;
130        }
131
132        // Update previous last child's next_sibling
133        if let Some(prev_id) = prev_last
134            && let Some(prev) = self.arena.get_mut(prev_id.index())
135        {
136            prev.next_sibling = Some(child_id);
137        }
138
139        // Update parent's first_child (if first) and last_child
140        if let Some(parent) = self.arena.get_mut(parent_id.index()) {
141            if parent.first_child.is_none() {
142                parent.first_child = Some(child_id);
143            }
144            parent.last_child = Some(child_id);
145        }
146    }
147
148    /// Transitions the document from Building to Queryable state.
149    ///
150    /// This is a one-way transition. Once built, the document structure
151    /// cannot be modified.
152    ///
153    /// # Examples
154    ///
155    /// ```rust
156    /// use std::collections::HashMap;
157    ///
158    /// use scrape_core::{Building, DocumentImpl};
159    ///
160    /// let mut doc = DocumentImpl::<Building>::new();
161    /// let root = doc.create_element("div", HashMap::new());
162    /// doc.set_root(root);
163    ///
164    /// // Transition to queryable state
165    /// let doc = doc.build();
166    /// ```
167    #[must_use]
168    pub fn build(self) -> DocumentImpl<Queryable> {
169        DocumentImpl { arena: self.arena, root: self.root, index: self.index, _state: PhantomData }
170    }
171}
172
173// ==================== Queryable State ====================
174
175impl DocumentImpl<Queryable> {
176    /// Creates a new empty document in queryable state.
177    ///
178    /// This is a convenience method for backward compatibility.
179    /// Internally, it creates a Building document and immediately transitions to Queryable.
180    #[must_use]
181    pub fn new() -> Self {
182        DocumentImpl::<Building>::new().build()
183    }
184
185    /// Creates a new empty document with the specified capacity in queryable state.
186    ///
187    /// This is a convenience method for backward compatibility.
188    #[must_use]
189    pub fn with_capacity(capacity: usize) -> Self {
190        DocumentImpl::<Building>::with_capacity(capacity).build()
191    }
192
193    /// Sets the root node ID.
194    ///
195    /// Available on Queryable for backward compatibility with tests.
196    pub fn set_root(&mut self, id: NodeId) {
197        self.root = Some(id);
198    }
199
200    /// Creates a new element node and returns its ID.
201    ///
202    /// Available on Queryable for backward compatibility with tests.
203    pub fn create_element(
204        &mut self,
205        name: impl Into<String>,
206        attributes: HashMap<String, String>,
207    ) -> NodeId {
208        NodeId::new(self.arena.alloc(Node::element(name, attributes)))
209    }
210
211    /// Creates a new text node and returns its ID.
212    ///
213    /// Available on Queryable for backward compatibility with tests.
214    pub fn create_text(&mut self, content: impl Into<String>) -> NodeId {
215        NodeId::new(self.arena.alloc(Node::text(content)))
216    }
217
218    /// Creates a new comment node and returns its ID.
219    ///
220    /// Available on Queryable for backward compatibility with tests.
221    pub fn create_comment(&mut self, content: impl Into<String>) -> NodeId {
222        NodeId::new(self.arena.alloc(Node::comment(content)))
223    }
224
225    /// Appends a child node to a parent.
226    ///
227    /// Available on Queryable for backward compatibility with tests.
228    ///
229    /// Updates parent, `first_child`, `last_child`, and sibling links.
230    ///
231    /// # Panics
232    ///
233    /// Panics in debug builds if `parent_id` or `child_id` are invalid.
234    pub fn append_child(&mut self, parent_id: NodeId, child_id: NodeId) {
235        debug_assert!(parent_id.index() < self.arena.len(), "Invalid parent_id");
236        debug_assert!(child_id.index() < self.arena.len(), "Invalid child_id");
237
238        // Get current last child of parent
239        let prev_last = self.arena.get(parent_id.index()).and_then(|p| p.last_child);
240
241        // Update child's parent and prev_sibling
242        if let Some(child) = self.arena.get_mut(child_id.index()) {
243            child.parent = Some(parent_id);
244            child.prev_sibling = prev_last;
245            child.next_sibling = None;
246        }
247
248        // Update previous last child's next_sibling
249        if let Some(prev_id) = prev_last
250            && let Some(prev) = self.arena.get_mut(prev_id.index())
251        {
252            prev.next_sibling = Some(child_id);
253        }
254
255        // Update parent's first_child (if first) and last_child
256        if let Some(parent) = self.arena.get_mut(parent_id.index()) {
257            if parent.first_child.is_none() {
258                parent.first_child = Some(child_id);
259            }
260            parent.last_child = Some(child_id);
261        }
262    }
263
264    /// Seals the document, preventing any future modifications.
265    ///
266    /// This is a one-way transition for when you need to guarantee
267    /// the document will never change.
268    #[must_use]
269    pub fn seal(self) -> DocumentImpl<Sealed> {
270        DocumentImpl { arena: self.arena, root: self.root, index: self.index, _state: PhantomData }
271    }
272
273    /// Sets the document index.
274    ///
275    /// Only available in Queryable state since index would be invalidated
276    /// by structural modifications.
277    pub fn set_index(&mut self, index: DocumentIndex) {
278        self.index = Some(index);
279    }
280}
281
282// ==================== Shared Methods (All States) ====================
283
284impl<S: DocumentState> DocumentImpl<S> {
285    /// Returns the root node ID, if any.
286    #[must_use]
287    pub fn root(&self) -> Option<NodeId> {
288        self.root
289    }
290
291    /// Returns a reference to the node with the given ID.
292    #[inline]
293    #[must_use]
294    pub fn get(&self, id: NodeId) -> Option<&Node> {
295        self.arena.get(id.index())
296    }
297
298    /// Returns the number of nodes in the document.
299    #[must_use]
300    pub fn len(&self) -> usize {
301        self.arena.len()
302    }
303
304    /// Returns `true` if the document has no nodes.
305    #[must_use]
306    pub fn is_empty(&self) -> bool {
307        self.arena.is_empty()
308    }
309
310    /// Returns an iterator over all nodes.
311    pub fn nodes(&self) -> impl Iterator<Item = (NodeId, &Node)> {
312        self.arena.iter().map(|(i, node)| (NodeId::new(i), node))
313    }
314}
315
316// ==================== Mutable State Methods ====================
317
318impl<S: MutableState> DocumentImpl<S> {
319    /// Returns a mutable reference to the node with the given ID.
320    ///
321    /// Only available for documents in mutable states (Building).
322    #[inline]
323    #[must_use]
324    pub fn get_mut(&mut self, id: NodeId) -> Option<&mut Node> {
325        self.arena.get_mut(id.index())
326    }
327}
328
329// ==================== Queryable State Methods ====================
330
331impl<S: QueryableState> DocumentImpl<S> {
332    /// Returns the document index, if built.
333    ///
334    /// Only available in queryable states (Queryable, Sealed).
335    #[must_use]
336    pub fn index(&self) -> Option<&DocumentIndex> {
337        self.index.as_ref()
338    }
339}
340
341// ==================== Navigation APIs ====================
342
343impl<S: DocumentState> DocumentImpl<S> {
344    // ==================== Navigation APIs ====================
345
346    /// Returns the parent of a node.
347    #[inline]
348    #[must_use]
349    pub fn parent(&self, id: NodeId) -> Option<NodeId> {
350        self.arena.get(id.index()).and_then(|n| n.parent)
351    }
352
353    /// Returns the first child of a node.
354    #[inline]
355    #[must_use]
356    pub fn first_child(&self, id: NodeId) -> Option<NodeId> {
357        self.arena.get(id.index()).and_then(|n| n.first_child)
358    }
359
360    /// Returns the last child of a node.
361    #[inline]
362    #[must_use]
363    pub fn last_child(&self, id: NodeId) -> Option<NodeId> {
364        self.arena.get(id.index()).and_then(|n| n.last_child)
365    }
366
367    /// Returns the next sibling of a node.
368    #[inline]
369    #[must_use]
370    pub fn next_sibling(&self, id: NodeId) -> Option<NodeId> {
371        self.arena.get(id.index()).and_then(|n| n.next_sibling)
372    }
373
374    /// Returns the previous sibling of a node.
375    #[inline]
376    #[must_use]
377    pub fn prev_sibling(&self, id: NodeId) -> Option<NodeId> {
378        self.arena.get(id.index()).and_then(|n| n.prev_sibling)
379    }
380
381    /// Returns an iterator over children of a node.
382    ///
383    /// The iterator yields children in order from first to last.
384    ///
385    /// # Examples
386    ///
387    /// ```
388    /// use std::collections::HashMap;
389    ///
390    /// use scrape_core::Document;
391    ///
392    /// let mut doc = Document::new();
393    /// let parent = doc.create_element("div", HashMap::new());
394    /// let child1 = doc.create_element("span", HashMap::new());
395    /// let child2 = doc.create_element("span", HashMap::new());
396    ///
397    /// doc.append_child(parent, child1);
398    /// doc.append_child(parent, child2);
399    ///
400    /// let children: Vec<_> = doc.children(parent).collect();
401    /// assert_eq!(children.len(), 2);
402    /// ```
403    #[must_use]
404    pub fn children(&self, id: NodeId) -> ChildrenIter<'_, S> {
405        ChildrenIter { doc: self, current: self.first_child(id) }
406    }
407
408    /// Returns an iterator over ancestors of a node.
409    ///
410    /// The iterator yields ancestors from parent to root (does not include the node itself).
411    ///
412    /// # Examples
413    ///
414    /// ```
415    /// use std::collections::HashMap;
416    ///
417    /// use scrape_core::Document;
418    ///
419    /// let mut doc = Document::new();
420    /// let grandparent = doc.create_element("html", HashMap::new());
421    /// let parent = doc.create_element("body", HashMap::new());
422    /// let child = doc.create_element("div", HashMap::new());
423    ///
424    /// doc.append_child(grandparent, parent);
425    /// doc.append_child(parent, child);
426    ///
427    /// let ancestors: Vec<_> = doc.ancestors(child).collect();
428    /// assert_eq!(ancestors.len(), 2); // parent, grandparent
429    /// ```
430    #[must_use]
431    pub fn ancestors(&self, id: NodeId) -> AncestorsIter<'_, S> {
432        AncestorsIter { doc: self, current: self.parent(id) }
433    }
434
435    /// Returns an iterator over descendants in depth-first pre-order.
436    ///
437    /// Does not include the starting node itself.
438    ///
439    /// # Examples
440    ///
441    /// ```
442    /// use std::collections::HashMap;
443    ///
444    /// use scrape_core::Document;
445    ///
446    /// let mut doc = Document::new();
447    /// let root = doc.create_element("html", HashMap::new());
448    /// let child1 = doc.create_element("head", HashMap::new());
449    /// let child2 = doc.create_element("body", HashMap::new());
450    /// let grandchild = doc.create_element("div", HashMap::new());
451    ///
452    /// doc.append_child(root, child1);
453    /// doc.append_child(root, child2);
454    /// doc.append_child(child2, grandchild);
455    ///
456    /// let descendants: Vec<_> = doc.descendants(root).collect();
457    /// assert_eq!(descendants.len(), 3); // head, body, div
458    /// ```
459    #[must_use]
460    pub fn descendants(&self, id: NodeId) -> DescendantsIter<'_, S> {
461        DescendantsIter { doc: self, root: id, stack: vec![id], started: false }
462    }
463
464    /// Returns an iterator over siblings following a node.
465    ///
466    /// Does not include the node itself.
467    ///
468    /// # Examples
469    ///
470    /// ```
471    /// use std::collections::HashMap;
472    ///
473    /// use scrape_core::Document;
474    ///
475    /// let mut doc = Document::new();
476    /// let parent = doc.create_element("ul", HashMap::new());
477    /// let child1 = doc.create_element("li", HashMap::new());
478    /// let child2 = doc.create_element("li", HashMap::new());
479    /// let child3 = doc.create_element("li", HashMap::new());
480    ///
481    /// doc.append_child(parent, child1);
482    /// doc.append_child(parent, child2);
483    /// doc.append_child(parent, child3);
484    ///
485    /// let next: Vec<_> = doc.next_siblings(child1).collect();
486    /// assert_eq!(next.len(), 2); // child2, child3
487    /// ```
488    #[must_use]
489    pub fn next_siblings(&self, id: NodeId) -> NextSiblingsIter<'_, S> {
490        NextSiblingsIter { doc: self, current: self.next_sibling(id) }
491    }
492
493    /// Returns an iterator over siblings preceding a node.
494    ///
495    /// Does not include the node itself. Iterates in reverse order
496    /// (from immediate predecessor toward first sibling).
497    ///
498    /// # Examples
499    ///
500    /// ```
501    /// use std::collections::HashMap;
502    ///
503    /// use scrape_core::Document;
504    ///
505    /// let mut doc = Document::new();
506    /// let parent = doc.create_element("ul", HashMap::new());
507    /// let child1 = doc.create_element("li", HashMap::new());
508    /// let child2 = doc.create_element("li", HashMap::new());
509    /// let child3 = doc.create_element("li", HashMap::new());
510    ///
511    /// doc.append_child(parent, child1);
512    /// doc.append_child(parent, child2);
513    /// doc.append_child(parent, child3);
514    ///
515    /// let prev: Vec<_> = doc.prev_siblings(child3).collect();
516    /// assert_eq!(prev.len(), 2); // child2, child1 (reverse order)
517    /// ```
518    #[must_use]
519    pub fn prev_siblings(&self, id: NodeId) -> PrevSiblingsIter<'_, S> {
520        PrevSiblingsIter { doc: self, current: self.prev_sibling(id) }
521    }
522
523    /// Returns an iterator over all siblings of a node (excluding the node itself).
524    ///
525    /// Iterates in document order from first sibling to last.
526    ///
527    /// # Examples
528    ///
529    /// ```
530    /// use std::collections::HashMap;
531    ///
532    /// use scrape_core::Document;
533    ///
534    /// let mut doc = Document::new();
535    /// let parent = doc.create_element("ul", HashMap::new());
536    /// let child1 = doc.create_element("li", HashMap::new());
537    /// let child2 = doc.create_element("li", HashMap::new());
538    /// let child3 = doc.create_element("li", HashMap::new());
539    ///
540    /// doc.append_child(parent, child1);
541    /// doc.append_child(parent, child2);
542    /// doc.append_child(parent, child3);
543    ///
544    /// let siblings: Vec<_> = doc.siblings(child2).collect();
545    /// assert_eq!(siblings.len(), 2); // child1, child3
546    /// ```
547    #[must_use]
548    pub fn siblings(&self, id: NodeId) -> SiblingsIter<'_, S> {
549        // Find first sibling by going to parent's first child
550        let first = self.parent(id).and_then(|p| self.first_child(p));
551        SiblingsIter { doc: self, current: first, exclude: id }
552    }
553}
554
555/// Iterator over direct children of a node.
556///
557/// Created by [`Document::children`].
558#[derive(Debug)]
559pub struct ChildrenIter<'a, S: DocumentState = Queryable> {
560    doc: &'a DocumentImpl<S>,
561    current: Option<NodeId>,
562}
563
564impl<S: DocumentState> Iterator for ChildrenIter<'_, S> {
565    type Item = NodeId;
566
567    fn next(&mut self) -> Option<Self::Item> {
568        let current = self.current?;
569        self.current = self.doc.next_sibling(current);
570        Some(current)
571    }
572}
573
574/// Iterator over ancestors of a node (parent, grandparent, ...).
575///
576/// Created by [`Document::ancestors`].
577#[derive(Debug)]
578pub struct AncestorsIter<'a, S: DocumentState = Queryable> {
579    doc: &'a DocumentImpl<S>,
580    current: Option<NodeId>,
581}
582
583impl<S: DocumentState> Iterator for AncestorsIter<'_, S> {
584    type Item = NodeId;
585
586    fn next(&mut self) -> Option<Self::Item> {
587        let current = self.current?;
588        self.current = self.doc.parent(current);
589        Some(current)
590    }
591}
592
593/// Iterator over descendants in depth-first pre-order.
594///
595/// Created by [`Document::descendants`].
596#[derive(Debug)]
597pub struct DescendantsIter<'a, S: DocumentState = Queryable> {
598    doc: &'a DocumentImpl<S>,
599    root: NodeId,
600    stack: Vec<NodeId>,
601    started: bool,
602}
603
604impl<S: DocumentState> Iterator for DescendantsIter<'_, S> {
605    type Item = NodeId;
606
607    fn next(&mut self) -> Option<Self::Item> {
608        if !self.started {
609            self.started = true;
610            if let Some(first) = self.doc.first_child(self.root) {
611                self.stack.clear();
612                self.stack.push(first);
613            } else {
614                return None;
615            }
616        }
617
618        let current = self.stack.pop()?;
619
620        // Push next sibling first (so it's processed after children)
621        if let Some(next) = self.doc.next_sibling(current) {
622            self.stack.push(next);
623        }
624
625        // Push first child (will be processed next - depth-first)
626        if let Some(child) = self.doc.first_child(current) {
627            self.stack.push(child);
628        }
629
630        Some(current)
631    }
632}
633
634/// Iterator over siblings following a node.
635///
636/// Created by [`Document::next_siblings`].
637#[derive(Debug)]
638pub struct NextSiblingsIter<'a, S: DocumentState = Queryable> {
639    doc: &'a DocumentImpl<S>,
640    current: Option<NodeId>,
641}
642
643impl<S: DocumentState> Iterator for NextSiblingsIter<'_, S> {
644    type Item = NodeId;
645
646    fn next(&mut self) -> Option<Self::Item> {
647        let current = self.current?;
648        self.current = self.doc.next_sibling(current);
649        Some(current)
650    }
651}
652
653/// Iterator over siblings preceding a node.
654///
655/// Created by [`Document::prev_siblings`].
656#[derive(Debug)]
657pub struct PrevSiblingsIter<'a, S: DocumentState = Queryable> {
658    doc: &'a DocumentImpl<S>,
659    current: Option<NodeId>,
660}
661
662impl<S: DocumentState> Iterator for PrevSiblingsIter<'_, S> {
663    type Item = NodeId;
664
665    fn next(&mut self) -> Option<Self::Item> {
666        let current = self.current?;
667        self.current = self.doc.prev_sibling(current);
668        Some(current)
669    }
670}
671
672/// Iterator over all siblings of a node (excluding the node itself).
673///
674/// Created by [`Document::siblings`].
675/// Iterates in document order (from first sibling to last).
676#[derive(Debug)]
677pub struct SiblingsIter<'a, S: DocumentState = Queryable> {
678    doc: &'a DocumentImpl<S>,
679    current: Option<NodeId>,
680    exclude: NodeId,
681}
682
683impl<S: DocumentState> Iterator for SiblingsIter<'_, S> {
684    type Item = NodeId;
685
686    fn next(&mut self) -> Option<Self::Item> {
687        loop {
688            let current = self.current?;
689            self.current = self.doc.next_sibling(current);
690            if current != self.exclude {
691                return Some(current);
692            }
693        }
694    }
695}
696
697// ==================== Iterator Extensions ====================
698
699impl<'a, S: DocumentState> ChildrenIter<'a, S> {
700    /// Returns an iterator over only element children (skipping text/comment nodes).
701    ///
702    /// This is more efficient than using `.filter()` externally because
703    /// it avoids closure overhead and can be specialized.
704    ///
705    /// # Examples
706    ///
707    /// ```rust
708    /// use scrape_core::Soup;
709    ///
710    /// let soup = Soup::parse("<div>text<span>A</span>more<span>B</span></div>");
711    /// let div = soup.find("div").unwrap().unwrap();
712    /// let doc = soup.document();
713    ///
714    /// // Only elements, skipping text nodes
715    /// let count = doc.children(div.node_id()).elements().count();
716    /// assert_eq!(count, 2); // span, span
717    /// ```
718    #[must_use]
719    pub fn elements(self) -> ElementChildrenIter<'a, S> {
720        ElementChildrenIter { inner: self }
721    }
722}
723
724/// Iterator over element children only.
725///
726/// Created by [`ChildrenIter::elements`].
727#[derive(Debug)]
728pub struct ElementChildrenIter<'a, S: DocumentState = Queryable> {
729    inner: ChildrenIter<'a, S>,
730}
731
732impl<S: DocumentState> Iterator for ElementChildrenIter<'_, S> {
733    type Item = NodeId;
734
735    #[inline]
736    fn next(&mut self) -> Option<Self::Item> {
737        loop {
738            let id = self.inner.next()?;
739            if self.inner.doc.get(id).is_some_and(|n| n.kind.is_element()) {
740                return Some(id);
741            }
742        }
743    }
744}
745
746impl<'a, S: DocumentState> DescendantsIter<'a, S> {
747    /// Returns an iterator over only element descendants (skipping text/comment nodes).
748    ///
749    /// # Examples
750    ///
751    /// ```rust
752    /// use scrape_core::Soup;
753    ///
754    /// let soup = Soup::parse("<div>text<p>para</p></div>");
755    /// let div = soup.find("div").unwrap().unwrap();
756    /// let doc = soup.document();
757    ///
758    /// let count = doc.descendants(div.node_id()).elements().count();
759    /// assert_eq!(count, 1); // p only
760    /// ```
761    #[must_use]
762    pub fn elements(self) -> ElementDescendantsIter<'a, S> {
763        ElementDescendantsIter { inner: self }
764    }
765}
766
767/// Iterator over element descendants only.
768///
769/// Created by [`DescendantsIter::elements`].
770#[derive(Debug)]
771pub struct ElementDescendantsIter<'a, S: DocumentState = Queryable> {
772    inner: DescendantsIter<'a, S>,
773}
774
775impl<S: DocumentState> Iterator for ElementDescendantsIter<'_, S> {
776    type Item = NodeId;
777
778    #[inline]
779    fn next(&mut self) -> Option<Self::Item> {
780        loop {
781            let id = self.inner.next()?;
782            if self.inner.doc.get(id).is_some_and(|n| n.kind.is_element()) {
783                return Some(id);
784            }
785        }
786    }
787}
788
789impl<'a, S: DocumentState> AncestorsIter<'a, S> {
790    /// Returns an iterator over only element ancestors (skipping non-element nodes).
791    ///
792    /// In practice, ancestors are almost always elements, but this method
793    /// provides consistent API with other iterators.
794    #[must_use]
795    pub fn elements(self) -> ElementAncestorsIter<'a, S> {
796        ElementAncestorsIter { inner: self }
797    }
798}
799
800/// Iterator over element ancestors only.
801///
802/// Created by [`AncestorsIter::elements`].
803#[derive(Debug)]
804pub struct ElementAncestorsIter<'a, S: DocumentState = Queryable> {
805    inner: AncestorsIter<'a, S>,
806}
807
808impl<S: DocumentState> Iterator for ElementAncestorsIter<'_, S> {
809    type Item = NodeId;
810
811    #[inline]
812    fn next(&mut self) -> Option<Self::Item> {
813        loop {
814            let id = self.inner.next()?;
815            if self.inner.doc.get(id).is_some_and(|n| n.kind.is_element()) {
816                return Some(id);
817            }
818        }
819    }
820}
821
822impl<'a, S: DocumentState> NextSiblingsIter<'a, S> {
823    /// Returns an iterator over only element siblings (skipping text/comment nodes).
824    #[must_use]
825    pub fn elements(self) -> ElementNextSiblingsIter<'a, S> {
826        ElementNextSiblingsIter { inner: self }
827    }
828}
829
830/// Iterator over next element siblings only.
831///
832/// Created by [`NextSiblingsIter::elements`].
833#[derive(Debug)]
834pub struct ElementNextSiblingsIter<'a, S: DocumentState = Queryable> {
835    inner: NextSiblingsIter<'a, S>,
836}
837
838impl<S: DocumentState> Iterator for ElementNextSiblingsIter<'_, S> {
839    type Item = NodeId;
840
841    #[inline]
842    fn next(&mut self) -> Option<Self::Item> {
843        loop {
844            let id = self.inner.next()?;
845            if self.inner.doc.get(id).is_some_and(|n| n.kind.is_element()) {
846                return Some(id);
847            }
848        }
849    }
850}
851
852impl<'a, S: DocumentState> PrevSiblingsIter<'a, S> {
853    /// Returns an iterator over only element siblings (skipping text/comment nodes).
854    #[must_use]
855    pub fn elements(self) -> ElementPrevSiblingsIter<'a, S> {
856        ElementPrevSiblingsIter { inner: self }
857    }
858}
859
860/// Iterator over previous element siblings only.
861///
862/// Created by [`PrevSiblingsIter::elements`].
863#[derive(Debug)]
864pub struct ElementPrevSiblingsIter<'a, S: DocumentState = Queryable> {
865    inner: PrevSiblingsIter<'a, S>,
866}
867
868impl<S: DocumentState> Iterator for ElementPrevSiblingsIter<'_, S> {
869    type Item = NodeId;
870
871    #[inline]
872    fn next(&mut self) -> Option<Self::Item> {
873        loop {
874            let id = self.inner.next()?;
875            if self.inner.doc.get(id).is_some_and(|n| n.kind.is_element()) {
876                return Some(id);
877            }
878        }
879    }
880}
881
882impl<'a, S: DocumentState> SiblingsIter<'a, S> {
883    /// Returns an iterator over only element siblings (skipping text/comment nodes).
884    #[must_use]
885    pub fn elements(self) -> ElementSiblingsIter<'a, S> {
886        ElementSiblingsIter { inner: self }
887    }
888}
889
890/// Iterator over all element siblings (excluding self).
891///
892/// Created by [`SiblingsIter::elements`].
893#[derive(Debug)]
894pub struct ElementSiblingsIter<'a, S: DocumentState = Queryable> {
895    inner: SiblingsIter<'a, S>,
896}
897
898impl<S: DocumentState> Iterator for ElementSiblingsIter<'_, S> {
899    type Item = NodeId;
900
901    #[inline]
902    fn next(&mut self) -> Option<Self::Item> {
903        loop {
904            let id = self.inner.next()?;
905            if self.inner.doc.get(id).is_some_and(|n| n.kind.is_element()) {
906                return Some(id);
907            }
908        }
909    }
910}
911
912#[cfg(test)]
913mod tests {
914    use super::*;
915
916    fn create_test_doc() -> Document {
917        let mut doc = Document::new();
918
919        let html = doc.create_element("html", HashMap::new());
920        doc.set_root(html);
921
922        let head = doc.create_element("head", HashMap::new());
923        let body = doc.create_element("body", HashMap::new());
924        let div = doc.create_element("div", HashMap::new());
925        let text = doc.create_text("Hello");
926
927        doc.append_child(html, head);
928        doc.append_child(html, body);
929        doc.append_child(body, div);
930        doc.append_child(div, text);
931
932        doc
933    }
934
935    #[test]
936    fn document_create_element() {
937        let mut doc = Document::new();
938        let id = doc.create_element("div", HashMap::new());
939        assert_eq!(doc.len(), 1);
940
941        let node = doc.get(id).unwrap();
942        assert!(node.kind.is_element());
943        assert_eq!(node.kind.tag_name(), Some("div"));
944    }
945
946    #[test]
947    fn document_create_text() {
948        let mut doc = Document::new();
949        let id = doc.create_text("Hello World");
950
951        let node = doc.get(id).unwrap();
952        assert!(node.kind.is_text());
953        assert_eq!(node.kind.as_text(), Some("Hello World"));
954    }
955
956    #[test]
957    fn document_root() {
958        let mut doc = Document::new();
959        assert!(doc.root().is_none());
960
961        let root_id = doc.create_element("html", HashMap::new());
962        doc.set_root(root_id);
963
964        assert_eq!(doc.root(), Some(root_id));
965    }
966
967    #[test]
968    fn parent_navigation() {
969        let doc = create_test_doc();
970        let root = doc.root().unwrap();
971
972        let body = doc.children(root).nth(1).unwrap();
973        assert_eq!(doc.parent(body), Some(root));
974    }
975
976    #[test]
977    fn children_iteration() {
978        let doc = create_test_doc();
979        let root = doc.root().unwrap();
980
981        assert_eq!(doc.children(root).count(), 2);
982    }
983
984    #[test]
985    fn sibling_navigation() {
986        let doc = create_test_doc();
987        let root = doc.root().unwrap();
988
989        let head = doc.first_child(root).unwrap();
990        let body = doc.next_sibling(head).unwrap();
991
992        assert_eq!(doc.prev_sibling(body), Some(head));
993        assert!(doc.next_sibling(body).is_none());
994    }
995
996    #[test]
997    fn descendants_iteration() {
998        let doc = create_test_doc();
999        let root = doc.root().unwrap();
1000
1001        assert_eq!(doc.descendants(root).count(), 4);
1002    }
1003
1004    #[test]
1005    fn ancestors_iteration() {
1006        let doc = create_test_doc();
1007        let root = doc.root().unwrap();
1008
1009        let body = doc.children(root).nth(1).unwrap();
1010        let div = doc.first_child(body).unwrap();
1011        let text = doc.first_child(div).unwrap();
1012
1013        assert_eq!(doc.ancestors(text).count(), 3);
1014    }
1015
1016    #[test]
1017    fn first_and_last_child() {
1018        let doc = create_test_doc();
1019        let root = doc.root().unwrap();
1020
1021        let first = doc.first_child(root).unwrap();
1022        let last = doc.last_child(root).unwrap();
1023
1024        assert_eq!(doc.get(first).unwrap().kind.tag_name(), Some("head"));
1025        assert_eq!(doc.get(last).unwrap().kind.tag_name(), Some("body"));
1026    }
1027
1028    #[test]
1029    fn children_empty_for_leaf_nodes() {
1030        let doc = create_test_doc();
1031        let root = doc.root().unwrap();
1032
1033        let body = doc.children(root).nth(1).unwrap();
1034        let div = doc.first_child(body).unwrap();
1035        let text = doc.first_child(div).unwrap();
1036
1037        assert!(doc.children(text).next().is_none());
1038    }
1039
1040    #[test]
1041    fn descendants_empty_for_leaf_nodes() {
1042        let doc = create_test_doc();
1043        let root = doc.root().unwrap();
1044
1045        let body = doc.children(root).nth(1).unwrap();
1046        let div = doc.first_child(body).unwrap();
1047        let text = doc.first_child(div).unwrap();
1048
1049        assert!(doc.descendants(text).next().is_none());
1050    }
1051
1052    #[test]
1053    fn ancestors_empty_for_root() {
1054        let doc = create_test_doc();
1055        let root = doc.root().unwrap();
1056
1057        assert!(doc.ancestors(root).next().is_none());
1058    }
1059
1060    #[test]
1061    fn document_parent_child_relationship() {
1062        let mut doc = Document::new();
1063        let parent_id = doc.create_element("div", HashMap::new());
1064        let child_id = doc.create_element("span", HashMap::new());
1065
1066        doc.append_child(parent_id, child_id);
1067
1068        let parent = doc.get(parent_id).unwrap();
1069        assert_eq!(parent.first_child, Some(child_id));
1070        assert_eq!(parent.last_child, Some(child_id));
1071
1072        let child = doc.get(child_id).unwrap();
1073        assert_eq!(child.parent, Some(parent_id));
1074    }
1075
1076    #[test]
1077    fn document_sibling_links() {
1078        let mut doc = Document::new();
1079        let parent_id = doc.create_element("div", HashMap::new());
1080        let child1_id = doc.create_element("span", HashMap::new());
1081        let child2_id = doc.create_element("span", HashMap::new());
1082        let child3_id = doc.create_element("span", HashMap::new());
1083
1084        doc.append_child(parent_id, child1_id);
1085        doc.append_child(parent_id, child2_id);
1086        doc.append_child(parent_id, child3_id);
1087
1088        let child1 = doc.get(child1_id).unwrap();
1089        assert_eq!(child1.prev_sibling, None);
1090        assert_eq!(child1.next_sibling, Some(child2_id));
1091
1092        let child2 = doc.get(child2_id).unwrap();
1093        assert_eq!(child2.prev_sibling, Some(child1_id));
1094        assert_eq!(child2.next_sibling, Some(child3_id));
1095
1096        let child3 = doc.get(child3_id).unwrap();
1097        assert_eq!(child3.prev_sibling, Some(child2_id));
1098        assert_eq!(child3.next_sibling, None);
1099    }
1100
1101    #[test]
1102    fn descendants_order_depth_first() {
1103        let mut doc = Document::new();
1104        let root = doc.create_element("root", HashMap::new());
1105        let a = doc.create_element("a", HashMap::new());
1106        let b = doc.create_element("b", HashMap::new());
1107        let a1 = doc.create_element("a1", HashMap::new());
1108        let a2 = doc.create_element("a2", HashMap::new());
1109
1110        doc.set_root(root);
1111        doc.append_child(root, a);
1112        doc.append_child(root, b);
1113        doc.append_child(a, a1);
1114        doc.append_child(a, a2);
1115
1116        let names: Vec<_> =
1117            doc.descendants(root).map(|id| doc.get(id).unwrap().kind.tag_name().unwrap()).collect();
1118
1119        // Depth-first pre-order: a -> a1 -> a2 -> b
1120        assert_eq!(names, vec!["a", "a1", "a2", "b"]);
1121    }
1122
1123    #[test]
1124    fn next_siblings_iteration() {
1125        let doc = create_test_doc();
1126        let root = doc.root().unwrap();
1127        let head = doc.first_child(root).unwrap();
1128
1129        assert_eq!(doc.next_siblings(head).count(), 1); // body
1130    }
1131
1132    #[test]
1133    fn next_siblings_empty_for_last() {
1134        let doc = create_test_doc();
1135        let root = doc.root().unwrap();
1136        let body = doc.last_child(root).unwrap();
1137
1138        assert!(doc.next_siblings(body).next().is_none());
1139    }
1140
1141    #[test]
1142    fn prev_siblings_iteration() {
1143        let doc = create_test_doc();
1144        let root = doc.root().unwrap();
1145        let body = doc.last_child(root).unwrap();
1146
1147        assert_eq!(doc.prev_siblings(body).count(), 1); // head
1148    }
1149
1150    #[test]
1151    fn prev_siblings_empty_for_first() {
1152        let doc = create_test_doc();
1153        let root = doc.root().unwrap();
1154        let head = doc.first_child(root).unwrap();
1155
1156        assert!(doc.prev_siblings(head).next().is_none());
1157    }
1158
1159    #[test]
1160    fn siblings_iteration() {
1161        let mut doc = Document::new();
1162        let parent = doc.create_element("ul", HashMap::new());
1163        let child1 = doc.create_element("li", HashMap::new());
1164        let child2 = doc.create_element("li", HashMap::new());
1165        let child3 = doc.create_element("li", HashMap::new());
1166
1167        doc.append_child(parent, child1);
1168        doc.append_child(parent, child2);
1169        doc.append_child(parent, child3);
1170
1171        let siblings: Vec<_> = doc.siblings(child2).collect();
1172        assert_eq!(siblings.len(), 2);
1173        assert_eq!(siblings[0], child1); // Document order
1174        assert_eq!(siblings[1], child3);
1175    }
1176
1177    #[test]
1178    fn siblings_empty_for_only_child() {
1179        let mut doc = Document::new();
1180        let parent = doc.create_element("div", HashMap::new());
1181        let child = doc.create_element("span", HashMap::new());
1182
1183        doc.append_child(parent, child);
1184
1185        assert!(doc.siblings(child).next().is_none());
1186    }
1187
1188    #[test]
1189    fn siblings_excludes_self() {
1190        let mut doc = Document::new();
1191        let parent = doc.create_element("ul", HashMap::new());
1192        let child1 = doc.create_element("li", HashMap::new());
1193        let child2 = doc.create_element("li", HashMap::new());
1194
1195        doc.append_child(parent, child1);
1196        doc.append_child(parent, child2);
1197
1198        let siblings1: Vec<_> = doc.siblings(child1).collect();
1199        assert_eq!(siblings1.len(), 1);
1200        assert_eq!(siblings1[0], child2);
1201
1202        let siblings2: Vec<_> = doc.siblings(child2).collect();
1203        assert_eq!(siblings2.len(), 1);
1204        assert_eq!(siblings2[0], child1);
1205    }
1206
1207    #[test]
1208    fn test_children_elements() {
1209        let mut doc = Document::new();
1210        let parent = doc.create_element("div", HashMap::new());
1211        let text = doc.create_text("text");
1212        let child1 = doc.create_element("span", HashMap::new());
1213        let child2 = doc.create_element("p", HashMap::new());
1214
1215        doc.append_child(parent, text);
1216        doc.append_child(parent, child1);
1217        doc.append_child(parent, child2);
1218
1219        let mut iter = doc.children(parent).elements();
1220        assert_eq!(iter.next(), Some(child1));
1221        assert_eq!(iter.next(), Some(child2));
1222        assert_eq!(iter.next(), None);
1223    }
1224
1225    #[test]
1226    fn test_descendants_elements() {
1227        let mut doc = Document::new();
1228        let root = doc.create_element("div", HashMap::new());
1229        let text = doc.create_text("text");
1230        let child = doc.create_element("span", HashMap::new());
1231        let grandchild = doc.create_element("b", HashMap::new());
1232
1233        doc.append_child(root, text);
1234        doc.append_child(root, child);
1235        doc.append_child(child, grandchild);
1236
1237        assert_eq!(doc.descendants(root).elements().count(), 2); // span, b
1238    }
1239
1240    #[test]
1241    fn test_ancestors_elements() {
1242        let mut doc = Document::new();
1243        let root = doc.create_element("html", HashMap::new());
1244        let body = doc.create_element("body", HashMap::new());
1245        let div = doc.create_element("div", HashMap::new());
1246
1247        doc.set_root(root);
1248        doc.append_child(root, body);
1249        doc.append_child(body, div);
1250
1251        assert_eq!(doc.ancestors(div).elements().count(), 2);
1252    }
1253
1254    #[test]
1255    fn test_next_siblings_elements() {
1256        let mut doc = Document::new();
1257        let parent = doc.create_element("ul", HashMap::new());
1258        let li1 = doc.create_element("li", HashMap::new());
1259        let text = doc.create_text(" ");
1260        let li2 = doc.create_element("li", HashMap::new());
1261
1262        doc.append_child(parent, li1);
1263        doc.append_child(parent, text);
1264        doc.append_child(parent, li2);
1265
1266        let siblings: Vec<_> = doc.next_siblings(li1).elements().collect();
1267        assert_eq!(siblings.len(), 1);
1268        assert_eq!(siblings[0], li2);
1269    }
1270
1271    #[test]
1272    fn test_prev_siblings_elements() {
1273        let mut doc = Document::new();
1274        let parent = doc.create_element("ul", HashMap::new());
1275        let li1 = doc.create_element("li", HashMap::new());
1276        let text = doc.create_text(" ");
1277        let li2 = doc.create_element("li", HashMap::new());
1278
1279        doc.append_child(parent, li1);
1280        doc.append_child(parent, text);
1281        doc.append_child(parent, li2);
1282
1283        let siblings: Vec<_> = doc.prev_siblings(li2).elements().collect();
1284        assert_eq!(siblings.len(), 1);
1285        assert_eq!(siblings[0], li1);
1286    }
1287
1288    #[test]
1289    fn test_siblings_elements() {
1290        let mut doc = Document::new();
1291        let parent = doc.create_element("ul", HashMap::new());
1292        let li1 = doc.create_element("li", HashMap::new());
1293        let text = doc.create_text(" ");
1294        let li2 = doc.create_element("li", HashMap::new());
1295        let li3 = doc.create_element("li", HashMap::new());
1296
1297        doc.append_child(parent, li1);
1298        doc.append_child(parent, text);
1299        doc.append_child(parent, li2);
1300        doc.append_child(parent, li3);
1301
1302        assert_eq!(doc.siblings(li2).elements().count(), 2); // li1, li3
1303    }
1304}