1use std::convert::TryInto;
13use std::fmt;
14use std::iter;
15use std::mem;
16use std::num::NonZeroU32;
17use std::ops::{Deref, DerefMut};
18
19#[doc(no_inline)]
20pub use html5ever::{Attribute, LocalName, Namespace, QualName};
21
22#[doc(no_inline)]
23pub use tendril::StrTendril;
24
25mod node_ref;
28mod serializer;
29#[macro_use] pub mod filter;
30pub mod html;
31
32#[cfg(feature = "xml")]
33pub mod xml;
34
35#[cfg(test)]
36mod tests;
37
38pub use node_ref::{NodeRef, Descender, Selector};
39
40pub struct Document {
52 nodes: Vec<Node>,
53}
54
55#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
59pub struct NodeId(NonZeroU32);
60
61#[derive(Clone, Debug)]
64pub struct Node {
65 data: NodeData,
66 parent: Option<NodeId>,
67 prev_sibling: Option<NodeId>,
68 next_sibling: Option<NodeId>,
69 first_child: Option<NodeId>,
70 last_child: Option<NodeId>,
71}
72
73#[derive(Clone, Debug)]
75pub enum NodeData {
76 Hole,
79
80 Document,
82
83 DocType(DocumentType),
85
86 Text(StrTendril),
88
89 Comment(StrTendril),
91
92 Elem(Element),
94
95 Pi(ProcessingInstruction),
97}
98
99#[derive(Clone, Debug)]
101pub struct DocumentType {
102 pub name: StrTendril,
103 _priv: ()
104}
105
106#[derive(Clone, Debug)]
108pub struct ProcessingInstruction {
109 pub data: StrTendril,
110 _priv: ()
111}
112
113#[derive(Clone, Debug)]
115pub struct Element {
116 pub name: QualName,
117 pub attrs: Vec<Attribute>,
118 _priv: ()
119}
120
121impl Document {
123 pub const DOCUMENT_NODE_ID: NodeId = NodeId(
125 unsafe { NonZeroU32::new_unchecked(1) }
126 );
127
128 const WASTE_ALLOWANCE: usize = 1024;
130
131 pub fn new() -> Self {
133 Document::with_capacity(8)
134 }
135
136 pub fn with_capacity(count: u32) -> Self {
139 let mut nodes = Vec::with_capacity(count as usize);
140 nodes.push(Node::new(NodeData::Hole)); nodes.push(Node::new(NodeData::Document)); Document { nodes }
143 }
144
145 #[inline]
152 pub fn len(&self) -> u32 {
153 let nodes: u32 = self.nodes.len()
154 .try_into()
155 .expect("Document (u32) node index overflow");
156 debug_assert!(nodes > 0);
157 nodes - 1 }
159
160 #[inline]
165 pub fn is_empty(&self) -> bool {
166 self.len() < 2
167 }
168
169 pub fn root_element(&self) -> Option<NodeId> {
175 let document_node = &self[Document::DOCUMENT_NODE_ID];
176 debug_assert!(
177 (if let NodeData::Document = document_node.data { true }
178 else { false }),
179 "not document node: {:?}", document_node);
180 debug_assert!(document_node.parent.is_none());
181 debug_assert!(document_node.next_sibling.is_none());
182 debug_assert!(document_node.prev_sibling.is_none());
183 let mut root = None;
184 for child in self.children(Document::DOCUMENT_NODE_ID) {
185 match &self[child].data {
186 NodeData::DocType(_) |
187 NodeData::Comment(_) |
188 NodeData::Pi(_) => {}
189 NodeData::Document => {
190 debug_assert!(false, "Document child of Document");
191 root = None;
192 break;
193 }
194 NodeData::Hole => {
195 debug_assert!(false, "Hole in Document");
196 root = None;
197 break;
198 }
199 NodeData::Text(_) => {
200 root = None;
201 break;
202 }
203 NodeData::Elem(_) => {
204 if root.is_none() {
205 root = Some(child);
206 } else {
207 root = None; break;
209 }
210 }
211 }
212 }
213 root
214 }
215
216 fn push_node(&mut self, node: Node) -> NodeId {
217 debug_assert!(
218 (if let NodeData::Document | NodeData::Hole = node.data { false }
219 else { true }),
220 "Invalid push {:?}", node.data);
221 let next_index = self.nodes.len()
222 .try_into()
223 .expect("Document (u32) node index overflow");
224 debug_assert!(next_index > 1);
225 self.nodes.push(node);
226 NodeId(unsafe { NonZeroU32::new_unchecked(next_index) })
227 }
228
229 #[inline]
243 #[must_use="If the fragment isn't needed, use `unlink()` instead."]
244 pub fn detach(&mut self, id: NodeId) -> Document {
245 self.unlink_only(id);
246
247 let guess_cap = std::cmp::max(8, self.len() - id.0.get() + 2);
249 let mut ndoc = Document::with_capacity(guess_cap);
250
251 ndoc.append_move(Document::DOCUMENT_NODE_ID, self, id);
252
253 if (ndoc.nodes.capacity() - ndoc.nodes.len()) >
255 Document::WASTE_ALLOWANCE
256 {
257 ndoc.nodes.shrink_to_fit();
258 }
259 ndoc
260 }
261
262 pub fn attach_child(&mut self, parent: NodeId, mut other: Document) {
268 self.nodes.reserve(other.len() as usize - 1); let children = other.children(Document::DOCUMENT_NODE_ID)
270 .collect::<Vec<_>>();
271 for child in children {
272 self.append_move(parent, &mut other, child);
273 }
274 }
275
276 pub fn attach_before_sibling(
282 &mut self,
283 sibling: NodeId,
284 mut other: Document)
285 {
286 self.nodes.reserve(other.len() as usize - 1);
287 let children = other.children(Document::DOCUMENT_NODE_ID)
288 .collect::<Vec<_>>();
289 for oid in children {
290 let onode = &mut other[oid];
291 let nid = self.insert_before_sibling(
292 sibling,
293 Node::new(onode.take_data()));
294 for coid in other.children(oid).collect::<Vec<_>>() {
295 self.append_move(nid, &mut other, coid);
296 }
297 }
298 }
299
300 fn append_move(&mut self, id: NodeId, odoc: &mut Document, oid: NodeId) {
303 let id = self.append_child(id, Node::new(odoc[oid].take_data()));
304 let mut ns = NodeStack2::new();
305 ns.push_if(odoc[oid].first_child, id);
306
307 while let Some((oid, id)) = ns.pop() {
308 let onode = &mut odoc[oid];
309 let nid = self.append_child(id, Node::new(onode.take_data()));
310 ns.push_if(onode.next_sibling, id);
311 ns.push_if(onode.first_child, nid);
312 }
313 }
314
315 #[inline]
328 pub fn unlink(&mut self, id: NodeId) -> NodeData {
329 self.unlink_only(id);
330 self[id].take_data()
331 }
332
333 fn unlink_only(&mut self, id: NodeId) {
335 assert!(
336 id != Document::DOCUMENT_NODE_ID,
337 "Can't detach the synthetic document node");
338
339 let (parent, prev_sibling, next_sibling) = {
340 let node = &mut self[id];
341 (node.parent.take(),
342 node.prev_sibling.take(),
343 node.next_sibling.take())
344 };
345
346 if let Some(next_sibling) = next_sibling {
347 self[next_sibling].prev_sibling = prev_sibling
348 } else if let Some(parent) = parent {
349 self[parent].last_child = prev_sibling;
350 }
351
352 if let Some(prev_sibling) = prev_sibling {
353 self[prev_sibling].next_sibling = next_sibling;
354 } else if let Some(parent) = parent {
355 self[parent].first_child = next_sibling;
356 }
357 }
358
359 pub fn append_child(&mut self, parent: NodeId, node: Node)
361 -> NodeId
362 {
363 let id = self.push_node(node);
364 self.append(parent, id);
365 id
366 }
367
368 fn append(&mut self, parent: NodeId, new_child: NodeId) {
369 self.unlink_only(new_child);
370 self[new_child].parent = Some(parent);
371 self[parent].assert_suitable_parent();
372 if let Some(last_child) = self[parent].last_child.take() {
373 self[new_child].prev_sibling = Some(last_child);
374 debug_assert!(self[last_child].next_sibling.is_none());
375 self[last_child].next_sibling = Some(new_child);
376 } else {
377 debug_assert!(self[parent].first_child.is_none());
378 self[parent].first_child = Some(new_child);
379 }
380 self[parent].last_child = Some(new_child);
381 }
382
383 pub fn insert_before_sibling(&mut self, sibling: NodeId, node: Node)
385 -> NodeId
386 {
387 let id = self.push_node(node);
388 self.insert_before(sibling, id);
389 id
390 }
391
392 fn insert_before(&mut self, sibling: NodeId, new_sibling: NodeId) {
393 self.unlink_only(new_sibling);
394 let parent = self[sibling].parent
395 .expect("insert_before sibling has no parent");
396 self[parent].assert_suitable_parent();
397 self[new_sibling].parent = Some(parent);
398 self[new_sibling].next_sibling = Some(sibling);
399 if let Some(prev_sibling) = self[sibling].prev_sibling.take() {
400 self[new_sibling].prev_sibling = Some(prev_sibling);
401 debug_assert_eq!(
402 self[prev_sibling].next_sibling,
403 Some(sibling)
404 );
405 self[prev_sibling].next_sibling = Some(new_sibling);
406 } else {
407 debug_assert_eq!(self[parent].first_child, Some(sibling));
408 self[parent].first_child = Some(new_sibling);
409 }
410 self[sibling].prev_sibling = Some(new_sibling);
411 }
412
413 pub fn text(&self, id: NodeId) -> Option<StrTendril> {
419 let mut ns = NodeStack1::new();
420 ns.push_if(self[id].first_child);
421 let mut text = None;
422 while let Some(id) = ns.pop() {
423 let node = &self[id];
424 if let NodeData::Text(t) = &node.data {
425 match &mut text {
426 None => text = Some(t.clone()),
427 Some(text) => text.push_tendril(&t),
428 }
429 ns.push_if(node.next_sibling);
430 } else {
431 ns.push_if(node.next_sibling);
432 ns.push_if(node.first_child);
433 }
434 }
435 text
436 }
437
438 pub fn children(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
442 iter::successors(
443 self[id].first_child,
444 move |&id| self[id].next_sibling
445 )
446 }
447
448 pub fn node_and_following_siblings(&self, id: NodeId)
451 -> impl Iterator<Item = NodeId> + '_
452 {
453 iter::successors(Some(id), move |&id| self[id].next_sibling)
454 }
455
456 pub fn node_and_ancestors(&self, id: NodeId)
459 -> impl Iterator<Item = NodeId> + '_
460 {
461 iter::successors(Some(id), move |&id| self[id].parent)
462 }
463
464 pub fn nodes(&self) -> impl Iterator<Item = NodeId> + '_ {
467 self.descendants(Document::DOCUMENT_NODE_ID)
468 }
469
470 #[inline]
473 pub fn descendants(&self, id: NodeId) -> impl Iterator<Item = NodeId> + '_
474 {
475 NodeRef::new(self, id).descendants().map(|nr| nr.id())
476 }
477
478 pub fn compact(&mut self) {
481 let mut ndoc = Document::with_capacity(self.len() + 1);
482 let mut ns = NodeStack2::new();
483 ns.push_if(
484 self[Document::DOCUMENT_NODE_ID].first_child,
485 Document::DOCUMENT_NODE_ID);
486
487 while let Some((id, nid)) = ns.pop() {
488 let nnode = Node::new(self[id].take_data());
489 let ncid = ndoc.append_child(nid, nnode);
490 ns.push_if(self[id].next_sibling, nid);
491 ns.push_if(self[id].first_child, ncid);
492 }
493
494 if (ndoc.nodes.capacity() - ndoc.nodes.len()) >
496 Document::WASTE_ALLOWANCE
497 {
498 ndoc.nodes.shrink_to_fit();
499 }
500
501 self.nodes = ndoc.nodes;
502 }
503
504 pub fn deep_clone(&self, id: NodeId) -> Document {
507
508 let guess_cap = std::cmp::max(8, (self.len() - id.0.get() + 2) / 8);
511 let mut ndoc = Document::with_capacity(guess_cap);
512
513 if id == Document::DOCUMENT_NODE_ID {
514 for child in self.children(id) {
515 ndoc.append_deep_clone(Document::DOCUMENT_NODE_ID, self, child);
516 }
517 } else {
518 ndoc.append_deep_clone(Document::DOCUMENT_NODE_ID, self, id);
519 }
520
521 ndoc
522 }
523
524 pub fn append_deep_clone(
527 &mut self,
528 id: NodeId,
529 odoc: &Document,
530 oid: NodeId)
531 {
532 let id = self.append_child(id, Node::new(odoc[oid].data.clone()));
533 for child in odoc.children(oid) {
534 self.append_deep_clone(id, odoc, child);
535 }
536 }
537
538 pub fn bulk_clone(&self) -> Document {
545 Document { nodes: self.nodes.clone() }
546 }
547
548 #[inline]
563 pub fn fold(&mut self, id: NodeId) -> NodeData {
564 self.fold_only(id);
565 self[id].take_data()
566 }
567
568 fn fold_only(&mut self, id: NodeId) {
570 assert!(
571 id != Document::DOCUMENT_NODE_ID,
572 "Can't fold the synthetic document node");
573
574 let mut next_child = self[id].first_child;
575 while let Some(child) = next_child {
576 debug_assert_eq!(self[child].parent, Some(id));
577 next_child = self[child].next_sibling;
578 self.insert_before(id, child);
579 }
580 self.unlink_only(id);
581 }
582}
583
584impl Default for Document {
585 fn default() -> Document {
586 Document::new()
587 }
588}
589
590impl fmt::Debug for Document {
591 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
592 f.debug_list().entries(&self.nodes[1..]).finish()
593 }
594}
595
596impl std::ops::Index<NodeId> for Document {
597 type Output = Node;
598
599 #[inline]
600 fn index(&self, id: NodeId) -> &Node {
601 &self.nodes[id.0.get() as usize]
602 }
603}
604
605impl std::ops::IndexMut<NodeId> for Document {
606 #[inline]
607 fn index_mut(&mut self, id: NodeId) -> &mut Node {
608 &mut self.nodes[id.0.get() as usize]
609 }
610}
611
612impl Element {
613 pub fn new<LN>(lname: LN) -> Element
615 where LN: Into<LocalName>
616 {
617 Element {
618 name: QualName::new(None, ns!(), lname.into()),
619 attrs: Vec::new(),
620 _priv: ()
621 }
622 }
623
624 pub fn is_elem<LN>(&self, lname: LN) -> bool
626 where LN: Into<LocalName>
627 {
628 self.name.local == lname.into()
629 }
630
631 pub fn html_tag_meta(&self) -> Option<&'static html::TagMeta> {
634 if self.name.ns == html::ns::HTML {
635 html::TAG_META.get(&self.name.local)
636 } else {
637 None
638 }
639 }
640
641 pub fn attr<LN>(&self, lname: LN) -> Option<&StrTendril>
643 where LN: Into<LocalName>
644 {
645 let lname = lname.into();
646 self.attrs
647 .iter()
648 .find(|attr| attr.name.local == lname)
649 .map(|attr| &attr.value)
650 }
651
652 pub fn remove_attr<LN>(&mut self, lname: LN) -> Option<StrTendril>
659 where LN: Into<LocalName>
660 {
661 let mut found = None;
662 let mut i = 0;
663 let lname = lname.into();
664 while i < self.attrs.len() {
665 if self.attrs[i].name.local == lname {
666 found = Some(self.attrs.remove(i).value);
667 } else {
668 i += 1;
669 }
670 }
671 found
672 }
673
674 pub fn set_attr<LN, V>(&mut self, lname: LN, value: V)
684 -> Option<StrTendril>
685 where LN: Into<LocalName>, V: Into<StrTendril>
686 {
687 let mut found = None;
688 let mut i = 0;
689 let lname = lname.into();
690
691 let mut value = Some(value.into());
694
695 while i < self.attrs.len() {
696 if self.attrs[i].name.local == lname {
697 if found.is_none() {
698 found = Some(mem::replace(
699 &mut self.attrs[i].value,
700 value.take().unwrap(),
701 ));
702 i += 1;
703 } else {
704 found = Some(self.attrs.remove(i).value);
705 };
706 } else {
707 i += 1;
708 }
709 }
710 if found.is_none() {
711 self.attrs.push(Attribute {
712 name: QualName::new(None, ns!(), lname),
713 value: value.take().unwrap()
714 });
715 }
716 found
717 }
718}
719
720impl Node {
721 pub fn new_elem(element: Element) -> Node {
723 Node::new(NodeData::Elem(element))
724 }
725
726 pub fn new_text<T>(text: T) -> Node
728 where T: Into<StrTendril>
729 {
730 Node::new(NodeData::Text(text.into()))
731 }
732
733 fn take_data(&mut self) -> NodeData {
736 mem::replace(&mut self.data, NodeData::Hole)
739 }
740
741 fn new(data: NodeData) -> Self {
742 Node {
743 parent: None,
744 prev_sibling: None,
745 next_sibling: None,
746 first_child: None,
747 last_child: None,
748 data,
749 }
750 }
751}
752
753impl Deref for Node {
754 type Target = NodeData;
755
756 #[inline]
757 fn deref(&self) -> &NodeData {
758 &self.data
759 }
760}
761
762impl DerefMut for Node {
763 #[inline]
764 fn deref_mut(&mut self) -> &mut NodeData {
765 &mut self.data
766 }
767}
768
769impl NodeData {
770 pub fn as_element(&self) -> Option<&Element> {
772 match self {
773 NodeData::Elem(ref data) => Some(data),
774 _ => None,
775 }
776 }
777
778 pub fn as_element_mut(&mut self) -> Option<&mut Element> {
780 match self {
781 NodeData::Elem(ref mut data) => Some(data),
782 _ => None,
783 }
784 }
785
786 pub fn as_text(&self) -> Option<&StrTendril> {
788 match self {
789 NodeData::Text(ref t) => Some(t),
790 _ => None,
791 }
792 }
793
794 pub fn as_text_mut(&mut self) -> Option<&mut StrTendril> {
796 match self {
797 NodeData::Text(ref mut t) => Some(t),
798 _ => None,
799 }
800 }
801
802 pub fn attr<LN>(&self, lname: LN) -> Option<&StrTendril>
805 where LN: Into<LocalName>
806 {
807 if let Some(edata) = self.as_element() {
808 edata.attr(lname)
809 } else {
810 None
811 }
812 }
813
814 pub fn is_elem<LN>(&self, lname: LN) -> bool
816 where LN: Into<LocalName>
817 {
818 if let Some(edata) = self.as_element() {
819 edata.is_elem(lname)
820 } else {
821 false
822 }
823 }
824
825 #[inline]
826 fn assert_suitable_parent(&self) {
827 debug_assert!(
828 (if let NodeData::Document | NodeData::Elem(_) = self { true }
829 else { false }),
830 "Not a suitable parent: {:?}", self)
831 }
832}
833
834struct NodeStack1(Vec<NodeId>);
835
836impl NodeStack1 {
837 #[inline]
838 fn new() -> Self {
839 NodeStack1(Vec::with_capacity(16))
840 }
841
842 #[inline]
843 fn push_if(&mut self, id: Option<NodeId>) {
844 if let Some(id) = id {
845 self.0.push(id);
846 }
847 }
848
849 #[inline]
850 fn pop(&mut self) -> Option<NodeId> {
851 self.0.pop()
852 }
853}
854
855struct NodeStack2(Vec<(NodeId, NodeId)>);
856
857impl NodeStack2 {
858 #[inline]
859 fn new() -> Self {
860 NodeStack2(Vec::with_capacity(16))
861 }
862
863 #[inline]
864 fn push_if(&mut self, id: Option<NodeId>, oid: NodeId) {
865 if let Some(id) = id {
866 self.0.push((id, oid));
867 }
868 }
869
870 #[inline]
871 fn pop(&mut self) -> Option<(NodeId, NodeId)> {
872 self.0.pop()
873 }
874}