1use std::cell::Ref;
2use std::fmt::Debug;
3use std::ops::Deref;
4use std::ops::DerefMut;
5
6use html5ever::serialize;
7use html5ever::serialize::SerializeOpts;
8use html5ever::serialize::TraversalScope;
9use html5ever::Attribute;
10
11use html5ever::QualName;
12use tendril::StrTendril;
13
14use crate::dom_tree::Traversal;
15use crate::entities::copy_attrs;
16use crate::Document;
17use crate::Matcher;
18use crate::Tree;
19use crate::TreeNodeOps;
20
21use super::id_provider::NodeIdProver;
22use super::inner::TreeNode;
23use super::node_data::NodeData;
24use super::serializing::SerializableNodeRef;
25use super::text_formatting::format_text;
26use super::Element;
27use super::NodeId;
28use super::{child_nodes, descendant_nodes};
29
30pub type Node<'a> = NodeRef<'a>;
31
32#[derive(Clone, Copy, Debug)]
33pub struct NodeRef<'a> {
37 pub id: NodeId,
38 pub tree: &'a Tree,
39}
40
41impl<'a> NodeRef<'a> {
42 pub fn new(id: NodeId, tree: &'a Tree) -> Self {
44 Self { id, tree }
45 }
46
47 #[inline]
49 pub fn query<F, B>(&self, f: F) -> Option<B>
50 where
51 F: FnOnce(&TreeNode) -> B,
52 {
53 self.tree.query_node(&self.id, f)
54 }
55
56 #[inline]
59 pub fn query_or<F, B>(&self, default: B, f: F) -> B
60 where
61 F: FnOnce(&TreeNode) -> B,
62 {
63 self.tree.query_node_or(&self.id, default, f)
64 }
65
66 #[inline]
68 pub fn update<F, B>(&self, f: F) -> Option<B>
69 where
70 F: FnOnce(&mut TreeNode) -> B,
71 {
72 self.tree.update_node(&self.id, f)
73 }
74
75 #[inline]
77 pub fn parent(&self) -> Option<Self> {
78 self.tree.parent_of(&self.id)
79 }
80
81 #[inline]
83 pub fn children(&self) -> Vec<Self> {
84 self.tree.children_of(&self.id)
85 }
86
87 #[inline]
89 pub fn children_it(&self, rev: bool) -> impl Iterator<Item = Self> {
90 self.tree
91 .child_ids_of_it(&self.id, rev)
92 .map(|n| NodeRef::new(n, self.tree))
93 }
94
95 #[inline]
104 pub fn ancestors(&self, max_depth: Option<usize>) -> Vec<Self> {
105 self.tree.ancestors_of(&self.id, max_depth)
106 }
107
108 #[inline]
116 pub fn ancestors_it(&self, max_depth: Option<usize>) -> impl Iterator<Item = Self> {
117 self.tree
118 .ancestor_ids_of_it(&self.id, max_depth)
119 .map(|n| NodeRef::new(n, self.tree))
120 }
121
122 #[inline]
127 pub fn descendants(&self) -> Vec<Self> {
128 self.descendants_it().collect()
129 }
130
131 #[inline]
136 pub fn descendants_it(&self) -> impl Iterator<Item = Self> {
137 self.tree
138 .descendant_ids_of_it(&self.id)
139 .map(|n| NodeRef::new(n, self.tree))
140 }
141
142 #[inline]
144 pub fn first_child(&self) -> Option<Self> {
145 self.tree.first_child_of(&self.id)
146 }
147
148 #[inline]
150 pub fn last_child(&self) -> Option<Self> {
151 self.tree.last_child_of(&self.id)
152 }
153
154 #[inline]
156 pub fn next_sibling(&self) -> Option<Self> {
157 self.tree.next_sibling_of(&self.id)
158 }
159
160 #[inline]
162 pub fn prev_sibling(&self) -> Option<Self> {
163 self.tree.prev_sibling_of(&self.id)
164 }
165
166 #[inline]
168 pub fn last_sibling(&self) -> Option<Self> {
169 self.tree.last_sibling_of(&self.id)
170 }
171}
172
173impl NodeRef<'_> {
175 #[inline]
177 pub fn remove_from_parent(&self) {
178 self.tree.remove_from_parent(&self.id)
179 }
180
181 #[inline]
183 pub fn remove_children(&self) {
184 self.tree.remove_children_of(&self.id)
185 }
186
187 #[inline]
190 pub fn insert_before<P: NodeIdProver>(&self, id_provider: P) {
191 self.tree.insert_before_of(&self.id, id_provider.node_id())
192 }
193
194 pub fn insert_after<P: NodeIdProver>(&self, id_provider: P) {
197 self.tree.insert_after_of(&self.id, id_provider.node_id())
198 }
199
200 #[inline]
202 pub fn append_child<P: NodeIdProver>(&self, id_provider: P) {
203 let new_child_id = id_provider.node_id();
204 let mut nodes = self.tree.nodes.borrow_mut();
205 TreeNodeOps::append_child_of(nodes.deref_mut(), &self.id, new_child_id);
206 }
207
208 #[inline]
210 pub fn append_children<P: NodeIdProver>(&self, id_provider: P) {
211 let mut nodes = self.tree.nodes.borrow_mut();
212 TreeNodeOps::append_children_of(&mut nodes, &self.id, id_provider.node_id());
213 }
214
215 #[inline]
217 pub fn prepend_child<P: NodeIdProver>(&self, id_provider: P) {
218 let new_child_id = id_provider.node_id();
219 let mut nodes = self.tree.nodes.borrow_mut();
220 TreeNodeOps::remove_from_parent(nodes.deref_mut(), new_child_id);
221 TreeNodeOps::prepend_child_of(nodes.deref_mut(), &self.id, new_child_id);
222 }
223
224 pub fn prepend_children<P: NodeIdProver>(&self, id_provider: P) {
226 let new_child_id = id_provider.node_id();
228 let mut nodes = self.tree.nodes.borrow_mut();
229 TreeNodeOps::prepend_children_of(&mut nodes, &self.id, new_child_id);
230 }
231
232 #[inline]
235 pub fn insert_siblings_before<P: NodeIdProver>(&self, id_provider: P) {
236 let mut nodes = self.tree.nodes.borrow_mut();
237 TreeNodeOps::insert_siblings_before(nodes.deref_mut(), &self.id, id_provider.node_id());
238 }
239
240 #[inline]
242 pub fn insert_siblings_after<P: NodeIdProver>(&self, id_provider: P) {
243 let mut nodes = self.tree.nodes.borrow_mut();
244 TreeNodeOps::insert_siblings_after(nodes.deref_mut(), &self.id, id_provider.node_id());
245 }
246
247 pub fn replace_with<P: NodeIdProver>(&self, id_provider: P) {
250 let mut nodes = self.tree.nodes.borrow_mut();
251 TreeNodeOps::insert_before_of(nodes.deref_mut(), &self.id, id_provider.node_id());
252 TreeNodeOps::remove_from_parent(&mut nodes, &self.id);
253 }
254
255 pub fn replace_with_html<T>(&self, html: T)
258 where
259 T: Into<StrTendril>,
260 {
261 self.merge_html_with_fn(html, |tree_nodes, new_node_id, node| {
262 TreeNodeOps::insert_siblings_before(tree_nodes, &node.id, &new_node_id);
263 TreeNodeOps::remove_from_parent(tree_nodes, &node.id);
264 });
265 }
266
267 pub fn append_html<T>(&self, html: T)
269 where
270 T: Into<StrTendril>,
271 {
272 self.merge_html_with_fn(html, |tree_nodes, new_node_id, node| {
273 TreeNodeOps::append_children_of(tree_nodes, &node.id, &new_node_id);
274 });
275 }
276
277 pub fn prepend_html<T>(&self, html: T)
279 where
280 T: Into<StrTendril>,
281 {
282 self.merge_html_with_fn(html, |tree_nodes, new_node_id, node| {
283 TreeNodeOps::prepend_children_of(tree_nodes, &node.id, &new_node_id);
284 });
285 }
286
287 pub fn before_html<T>(&self, html: T)
289 where
290 T: Into<StrTendril>,
291 {
292 self.merge_html_with_fn(html, |tree_nodes, new_node_id, node| {
293 TreeNodeOps::insert_siblings_before(tree_nodes, &node.id, &new_node_id);
294 });
295 }
296
297 pub fn after_html<T>(&self, html: T)
299 where
300 T: Into<StrTendril>,
301 {
302 self.merge_html_with_fn(html, |tree_nodes, new_node_id, node| {
303 TreeNodeOps::insert_siblings_after(tree_nodes, &node.id, &new_node_id);
304 });
305 }
306
307 pub fn set_html<T>(&self, html: T)
309 where
310 T: Into<StrTendril>,
311 {
312 self.remove_children();
313 self.append_html(html);
314 }
315
316 pub fn set_text<T>(&self, text: T)
322 where
323 T: Into<StrTendril>,
324 {
325 let mut nodes = self.tree.nodes.borrow_mut();
326 TreeNodeOps::set_text(nodes.deref_mut(), &self.id, text);
327 }
328
329 fn merge_html_with_fn<T, F>(&self, html: T, f: F)
331 where
332 T: Into<StrTendril>,
333 F: Fn(&mut Vec<TreeNode>, NodeId, &NodeRef),
334 {
335 let fragment = Document::fragment(html);
336 let mut borrowed_nodes = self.tree.nodes.borrow_mut();
337 TreeNodeOps::merge_with_fn(
338 &mut borrowed_nodes,
339 fragment.tree,
340 |tree_nodes, new_node_id| {
341 if TreeNodeOps::is_valid_node_id(tree_nodes, &new_node_id) {
342 f(tree_nodes, new_node_id, self);
343 }
344 },
345 );
346 }
347
348 pub fn wrap_node<P: NodeIdProver>(&self, new_parent: P) {
351 let wrapper_id = new_parent.node_id();
352 let mut nodes = self.tree.nodes.borrow_mut();
353
354 TreeNodeOps::insert_before_of(&mut nodes, &self.id, wrapper_id);
356 TreeNodeOps::append_child_of(&mut nodes, wrapper_id, &self.id);
358 }
359
360 pub fn wrap_html<T>(&self, html: T)
365 where
366 T: Into<StrTendril>,
367 {
368 self.merge_html_with_fn(html, |tree_nodes, wrapper_id, node| {
369 TreeNodeOps::insert_before_of(tree_nodes, &node.id, &wrapper_id);
371 TreeNodeOps::append_child_of(tree_nodes, &wrapper_id, &node.id);
373 });
374 }
375
376 pub fn unwrap_node(&self) {
379 if let Some(parent) = self.parent() {
380 if !parent.is_element() {
381 return; }
383
384 if parent.parent().is_some() {
386 parent.insert_siblings_before(self);
388 parent.remove_from_parent();
390 }
391 }
392 }
393}
394
395impl NodeRef<'_> {
396 pub fn next_element_sibling(&self) -> Option<Self> {
398 let nodes = self.tree.nodes.borrow();
399 TreeNodeOps::next_element_sibling_of(nodes.deref(), &self.id)
400 .map(|id| NodeRef::new(id, self.tree))
401 }
402
403 pub fn prev_element_sibling(&self) -> Option<Self> {
405 let nodes = self.tree.nodes.borrow();
406 TreeNodeOps::prev_element_sibling_of(nodes.deref(), &self.id)
407 .map(|id| NodeRef::new(id, self.tree))
408 }
409
410 pub fn first_element_child(&self) -> Option<Self> {
412 let nodes = self.tree.nodes.borrow();
413 TreeNodeOps::first_element_child_of(nodes.deref(), &self.id)
414 .map(|id| NodeRef::new(id, self.tree))
415 }
416
417 pub fn element_children(&self) -> Vec<Self> {
419 self.children_it(false).filter(|n| n.is_element()).collect()
420 }
421}
422
423impl NodeRef<'_> {
424 pub fn node_name(&self) -> Option<StrTendril> {
426 let nodes = self.tree.nodes.borrow();
427 nodes
428 .get(self.id.value)
429 .and_then(|node| node.as_element().map(|e| e.node_name()))
430 }
431
432 pub fn id_attr(&self) -> Option<StrTendril> {
434 self.query_or(None, |node| node.as_element().and_then(|e| e.id()))
435 }
436
437 pub fn class(&self) -> Option<StrTendril> {
439 self.query_or(None, |node| node.as_element().and_then(|e| e.class()))
440 }
441
442 pub fn has_class(&self, class: &str) -> bool {
444 self.query_or(false, |node| {
445 node.as_element().is_some_and(|e| e.has_class(class))
446 })
447 }
448
449 pub fn add_class(&self, class: &str) {
451 self.update(|node| node.add_class(class));
452 }
453
454 pub fn remove_class(&self, class: &str) {
456 self.update(|node| node.remove_class(class));
457 }
458
459 pub fn attr(&self, name: &str) -> Option<StrTendril> {
461 self.query_or(None, |node| node.as_element().and_then(|e| e.attr(name)))
462 }
463
464 pub fn attr_or<T>(&self, name: &str, default: T) -> StrTendril
466 where
467 tendril::Tendril<tendril::fmt::UTF8>: std::convert::From<T>,
468 {
469 self.query_or(None, |node| node.as_element().and_then(|e| e.attr(name)))
470 .unwrap_or_else(|| StrTendril::from(default))
471 }
472
473 pub fn attrs(&self) -> Vec<Attribute> {
475 self.query_or(vec![], |node| {
476 node.as_element().map_or(vec![], |e| copy_attrs(&e.attrs))
477 })
478 }
479
480 pub fn set_attr(&self, name: &str, val: &str) {
482 self.update(|node| node.set_attr(name, val));
483 }
484
485 pub fn remove_attr(&self, name: &str) {
487 self.update(|node| node.remove_attr(name));
488 }
489
490 pub fn remove_attrs(&self, names: &[&str]) {
495 self.update(|node| node.remove_attrs(names));
496 }
497
498 pub fn retain_attrs(&self, names: &[&str]) {
503 self.update(|node| node.retain_attrs(names));
504 }
505
506 pub fn remove_all_attrs(&self) {
508 self.update(|node| node.remove_all_attrs());
509 }
510
511 pub fn has_attr(&self, name: &str) -> bool {
513 self.query_or(false, |node| {
514 node.as_element().is_some_and(|e| e.has_attr(name))
515 })
516 }
517
518 pub fn rename(&self, name: &str) {
520 self.update(|node| node.rename(name));
521 }
522}
523
524impl NodeRef<'_> {
525 pub fn is_document(&self) -> bool {
527 self.query_or(false, |node| node.is_document())
528 }
529
530 pub fn is_fragment(&self) -> bool {
532 self.query_or(false, |node| node.is_fragment())
533 }
534
535 pub fn is_element(&self) -> bool {
537 self.query_or(false, |node| node.is_element())
538 }
539
540 pub fn is_text(&self) -> bool {
542 self.query_or(false, |node| node.is_text())
543 }
544 pub fn is_comment(&self) -> bool {
546 self.query_or(false, |node| node.is_comment())
547 }
548 pub fn is_doctype(&self) -> bool {
550 self.query_or(false, |node| node.is_doctype())
551 }
552
553 pub fn may_have_children(&self) -> bool {
555 self.query_or(false, |node| node.may_have_children())
556 }
557}
558
559impl NodeRef<'_> {
560 pub fn html(&self) -> StrTendril {
563 self.serialize_html(TraversalScope::IncludeNode).unwrap()
564 }
565
566 pub fn inner_html(&self) -> StrTendril {
569 self.serialize_html(TraversalScope::ChildrenOnly(None))
570 .unwrap()
571 }
572
573 pub fn try_html(&self) -> Option<StrTendril> {
575 self.serialize_html(TraversalScope::IncludeNode)
576 }
577
578 pub fn try_inner_html(&self) -> Option<StrTendril> {
580 self.serialize_html(TraversalScope::ChildrenOnly(None))
581 }
582
583 fn serialize_html(&self, traversal_scope: TraversalScope) -> Option<StrTendril> {
584 let inner: SerializableNodeRef = (*self).into();
585 let mut result = vec![];
586 serialize(
587 &mut result,
588 &inner,
589 SerializeOpts {
590 scripting_enabled: false,
591 create_missing_parent: false,
592 traversal_scope,
593 },
594 )
595 .ok()?;
596 StrTendril::try_from_byte_slice(&result).ok()
597 }
598
599 pub fn text(&self) -> StrTendril {
601 let nodes = self.tree.nodes.borrow();
602 TreeNodeOps::text_of(nodes, self.id)
603 }
604
605 pub fn immediate_text(&self) -> StrTendril {
607 let nodes = self.tree.nodes.borrow();
608 TreeNodeOps::immediate_text_of(nodes, self.id)
609 }
610
611 pub fn formatted_text(&self) -> StrTendril {
619 format_text(self, false)
620 }
621
622 pub fn has_text(&self, needle: &str) -> bool {
624 let nodes = self.tree.nodes.borrow();
625 let id = self.id;
626 let node_ids = std::iter::once(id).chain(descendant_nodes(Ref::clone(&nodes), &id));
627 for node in node_ids.filter_map(|node_id| nodes.get(node_id.value)) {
628 if let NodeData::Text { ref contents } = node.data {
629 if contents.contains(needle) {
630 return true;
631 }
632 }
633 }
634 false
635 }
636
637 pub fn has_only_text(&self) -> bool {
639 let nodes = self.tree.nodes.borrow();
640 if child_nodes(Ref::clone(&nodes), &self.id, false).count() == 1 {
641 let first_child = nodes
642 .get(self.id.value)
643 .and_then(|n| n.first_child)
644 .and_then(|id| nodes.get(id.value));
645 first_child.is_some_and(|n| {
646 n.is_text()
647 && !TreeNodeOps::text_of(Ref::clone(&nodes), n.id)
648 .trim()
649 .is_empty()
650 })
651 } else {
652 false
653 }
654 }
655
656 pub fn is_empty_element(&self) -> bool {
661 let nodes = self.tree.nodes.borrow();
662 let Some(node) = nodes.get(self.id.value) else {
663 return false;
664 };
665 node.is_element()
666 && !child_nodes(Ref::clone(&nodes), &self.id, false)
667 .flat_map(|id| nodes.get(id.value))
668 .any(|child| {
669 child.is_element()
670 || (child.is_text()
671 && !TreeNodeOps::text_of(Ref::clone(&nodes), child.id)
672 .trim()
673 .is_empty())
674 })
675 }
676
677 pub fn normalize(&self) {
681 let mut child = self.first_child();
682 let mut text: StrTendril = StrTendril::new();
683
684 while let Some(ref node) = child {
685 let next_node = node.next_sibling();
686
687 if node.is_text() {
688 text.push_tendril(&node.text());
689 if !next_node.as_ref().is_some_and(|n| n.is_text()) && !text.is_empty() {
690 let t = text;
691 text = StrTendril::new();
692 node.set_text(t);
693 } else {
694 node.remove_from_parent();
695 }
696 } else if node.may_have_children() {
697 node.normalize();
698 }
699 child = next_node;
700 }
701 }
702
703 pub fn strip_elements(&self, names: &[&str]) {
710 if names.is_empty() {
711 return;
712 }
713 let mut child = self.first_child();
714
715 while let Some(ref child_node) = child {
716 let next_node = child_node.next_sibling();
717 if child_node.may_have_children() {
718 child_node.strip_elements(names);
719 }
720 if !child_node.is_element() {
721 child = next_node;
722 continue;
723 }
724 if child_node
725 .qual_name_ref()
726 .is_some_and(|name| names.contains(&name.local.as_ref()))
727 {
728 if let Some(first_inline) = child_node.first_child() {
729 child_node.insert_siblings_before(&first_inline);
730 };
731 child_node.remove_from_parent();
732 }
733 child = next_node;
734 }
735 }
736
737 pub fn to_fragment(&self) -> Document {
739 if self.id.value == 0 || self.has_name("html") {
740 return Document {
741 tree: self.tree.clone(),
742 ..Default::default()
743 };
744 }
745
746 let frag = Document::fragment_sink();
747 let f_tree = &frag.tree;
748 let f_root_id = f_tree.root().id;
749
750 f_tree.new_element("body");
751
752 let html_node = f_tree.new_element("html");
753 f_tree.append_child_of(&f_root_id, &html_node.id);
754
755 {
756 let new_child_id = f_tree.copy_node(self);
757 let mut fragment_nodes = f_tree.nodes.borrow_mut();
758 TreeNodeOps::append_children_of(&mut fragment_nodes, &html_node.id, &new_child_id);
759 }
760
761 frag
762 }
763}
764
765impl NodeRef<'_> {
766 pub fn is_match(&self, matcher: &Matcher) -> bool {
768 self.is_element() && matcher.match_element(self)
769 }
770
771 pub fn is(&self, sel: &str) -> bool {
773 Matcher::new(sel).is_ok_and(|matcher| self.is_match(&matcher))
774 }
775
776 pub fn base_uri(&self) -> Option<StrTendril> {
780 self.tree.base_uri()
781 }
782
783 pub fn find(&self, path: &[&str]) -> Vec<Self> {
792 let nodes = self.tree.nodes.borrow();
793 let found_ids = Traversal::find_descendant_elements(&nodes, self.id, path);
794 found_ids
795 .iter()
796 .map(|node_id| NodeRef::new(*node_id, self.tree))
797 .collect()
798 }
799
800 pub fn normalized_char_count(&self) -> usize {
813 let nodes = self.tree.nodes.borrow();
814 TreeNodeOps::normalized_char_count(nodes, self.id)
815 }
816}
817
818impl<'a> NodeRef<'a> {
819 pub fn element_ref(&self) -> Option<Ref<'a, Element>> {
823 Ref::filter_map(self.tree.nodes.borrow(), |nodes| {
824 let node = nodes.get(self.id.value)?;
825 if let NodeData::Element(ref el) = node.data {
826 Some(el)
827 } else {
828 None
829 }
830 })
831 .ok()
832 }
833
834 pub fn qual_name_ref(&self) -> Option<Ref<'a, QualName>> {
838 self.tree.get_name(&self.id)
839 }
840
841 pub fn has_name(&self, name: &str) -> bool {
845 self.element_ref()
846 .is_some_and(|el| el.name.local.as_ref() == name)
847 }
848
849 pub fn is_nonempty_text(&self) -> bool {
854 self.query_or(false, |t| {
855 if let NodeData::Text { ref contents } = t.data {
856 contents.chars().any(|c| !c.is_whitespace())
857 } else {
858 false
859 }
860 })
861 }
862}
863
864#[cfg(feature = "markdown")]
865impl NodeRef<'_> {
866 pub fn md(&self, skip_tags: Option<&[&str]>) -> StrTendril {
872 crate::serializing::serialize_md(self, false, skip_tags)
873 }
874}