xot/access.rs
1use indextree::NodeEdge as IndexTreeNodeEdge;
2
3use crate::error::Error;
4use crate::levelorder::{level_order_traverse, LevelOrder};
5use crate::nodemap::{category_predicate, Attributes, Namespaces};
6use crate::output::NamespaceDeclarations;
7use crate::xmlvalue::{Value, ValueCategory, ValueType};
8use crate::xotdata::{Node, Xot};
9use crate::{NameId, NamespaceId, PrefixId, Prefixes};
10
11/// Traversal axis.
12///
13/// This can be used with `[Xot::Axis]` to traverse the tree in different ways.
14///
15/// The axis behaviors are based on the XPath specification.
16///
17/// Note that the namespace axis is not supported; it's tricky to support as it
18/// includes all namespace nodes in scope of an element, not just those
19/// namespaces defined on that element, and has not been a requirement since
20/// XPath 2.0.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
22pub enum Axis {
23 /// The children of the node. Equivalent to [`Xot::children`].
24 Child,
25 /// The descendants of the node, without the current node itself.
26 Descendant,
27 /// The parent of the node, or an empty iterator.
28 Parent,
29 /// The ancestors of the node, without the current node itself.
30 Ancestor,
31 /// The siblings following the node, without the current sibling.
32 /// Equivalent to [`Xot::following_siblings`].
33 FollowingSibling,
34 /// The siblings preceding the node, without the current sibling.
35 /// Equivalent to [`Xot::preceding_siblings`].
36 PrecedingSibling,
37 /// The nodes following the node. Equivalent to [`Xot::following`].
38 Following,
39 /// The nodes preceding the node. Equivalent to [`Xot::preceding`].
40 Preceding,
41 /// The attributes nodes of this node. Equivalent to [`Xot::attribute_nodes`].
42 Attribute,
43 /// The node itself as an iterator.
44 Self_,
45 /// The node and its descendants, in document order. Equivalent to
46 /// [`Xot::descendants`].
47 DescendantOrSelf,
48 /// The node and its ancestors. Equivalent to [`Xot::ancestors`].
49 AncestorOrSelf,
50}
51
52/// Node edges.
53///
54/// Used by [`Xot::traverse`] and [`Xot::reverse_traverse`].
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
56pub enum NodeEdge {
57 /// The start edge of a node. In case of an element
58 /// this is the start tag. In case of document this is
59 /// the start of the document.
60 Start(Node),
61 /// The end edge of a node. In case of an element
62 /// this is the end tag. In case of document this is the end
63 /// of the document. For any other values, the
64 /// end edge occurs immediately after the start
65 /// edge.
66 End(Node),
67}
68
69impl NodeEdge {
70 /// Returns the starting or ending node.
71 ///
72 /// # Examples
73 ///
74 /// ```rust
75 /// let mut xot = xot::Xot::new();
76 /// let root = xot.parse("<p><a/><b><c/></b></p>").unwrap();
77 /// let p = xot.document_element(root).unwrap();
78 /// let a = xot.first_child(p).unwrap();
79 /// let b = xot.next_sibling(a).unwrap();
80 /// let c = xot.first_child(b).unwrap();
81 /// let traversed: Vec<xot::Node> = xot
82 /// .traverse(p)
83 /// .map(|edge| edge.node())
84 /// .collect();
85 /// assert_eq!(traversed, &[p, a, a, b, c, c, b, p]);
86 #[must_use]
87 pub fn node(self) -> Node {
88 match self {
89 Self::Start(node) | Self::End(node) => node,
90 }
91 }
92
93 /// Returns the next edge in depth-first traversal order.
94 ///
95 /// # Examples
96 ///
97 /// ```rust
98 /// let mut xot = xot::Xot::new();
99 /// let root = xot.parse("<p><a/><b><c/><d/><e/></b><f><g/><h/></f></p>").unwrap();
100 /// let traversed: Vec<xot::NodeEdge> = xot.traverse(root).collect();
101 ///
102 /// let mut current = xot::NodeEdge::Start(root);
103 /// let mut traversed2 = vec![current];
104 /// while let Some(next) = current.next(&xot) {
105 /// if let xot::NodeEdge::End(node) = current {
106 /// // You can use `&mut Xot` inside the loop.
107 /// xot.remove(node);
108 /// }
109 ///
110 /// traversed2.push(next);
111 /// current = next;
112 /// }
113 /// assert_eq!(traversed, traversed2);
114 /// ```
115 #[must_use]
116 pub fn next(self, xot: &Xot) -> Option<Self> {
117 match self {
118 Self::Start(current) => xot
119 .first_child(current)
120 .map(Self::Start)
121 .or(Some(Self::End(current))),
122 Self::End(current) => xot
123 .next_sibling(current)
124 .map(Self::Start)
125 .or_else(|| xot.parent(current).map(Self::End)),
126 }
127 }
128
129 /// Returns the previous edge in depth-first traversal order.
130 ///
131 /// # Examples
132 ///
133 /// ```rust
134 /// let mut xot = xot::Xot::new();
135 /// let root = xot.parse("<p><a/><b><c/><d/><e/></b><f><g/><h/></f></p>").unwrap();
136 /// let rev_traversed: Vec<xot::NodeEdge> = xot.reverse_traverse(root).collect();
137 ///
138 /// let mut current = xot::NodeEdge::End(root);
139 /// let mut rev_traversed2 = vec![current];
140 /// while let Some(next) = current.previous(&xot) {
141 /// if let xot::NodeEdge::Start(node) = current {
142 /// // You can use `&mut Xot` inside the loop.
143 /// xot.remove(node);
144 /// }
145 ///
146 /// rev_traversed2.push(next);
147 /// current = next;
148 /// }
149 /// assert_eq!(rev_traversed, rev_traversed2);
150 /// ```
151 #[must_use]
152 pub fn previous(self, xot: &Xot) -> Option<Self> {
153 match self {
154 Self::End(current) => xot
155 .last_child(current)
156 .map(Self::End)
157 .or(Some(Self::Start(current))),
158 Self::Start(current) => xot
159 .previous_sibling(current)
160 .map(Self::End)
161 .or_else(|| xot.parent(current).map(Self::Start)),
162 }
163 }
164}
165
166/// ## Read-only access
167///
168/// These are functions that provide read-only access to the tree.
169impl Xot {
170 /// Obtain the document element from the document node.
171 ///
172 /// Returns [`Error::NotDocument`](`crate::error::Error::NotDocument`)
173 /// error if this is not the document node.
174 ///
175 /// This accepts document nodes representing fragments. If the fragment has
176 /// multiple elements, it returns the first element. If the fragment has no
177 /// elements, it errors with [`Error::NoElementAtTopLevel`].
178 ///
179 /// ```rust
180 /// let mut xot = xot::Xot::new();
181 ///
182 /// let doc = xot.parse("<p>Example</p>").unwrap();
183 ///
184 /// let doc_el = xot.document_element(doc).unwrap();
185 ///
186 /// // Check that we indeed have the `p` element
187 /// let p_name = xot.name("p").unwrap();
188 /// assert_eq!(xot.element(doc_el).unwrap().name(), p_name);
189 /// ```
190 pub fn document_element(&self, node: Node) -> Result<Node, Error> {
191 if self.value_type(node) != ValueType::Document {
192 return Err(Error::NotDocument(node));
193 }
194 for child in self.children(node) {
195 if let Value::Element(_) = self.value(child) {
196 return Ok(child);
197 }
198 }
199 Err(Error::NoElementAtTopLevel)
200 }
201
202 /// Given a node indicating a document, check if it is a well-formed
203 /// document.
204 ///
205 /// This checks that the document has a single document element, and that
206 /// there are no text nodes as the top level. If it is a a well-formed
207 /// document, this produces no errors.
208 ///
209 /// If this is not supplied with a document node,
210 /// this produces a [`Error::NotDocument`] error.
211 ///
212 /// If there are no elements at the top level, this produces a
213 /// [`Error::NoElementAtTopLevel`] error.
214 ///
215 /// If there are multiple elements at the top level, this produces a
216 /// [`Error::MultipleElementsAtTopLevel`] error.
217 ///
218 /// If there is a text node at the top level, this produces a
219 /// [`Error::TextAtTopLevel`] error.
220 ///
221 /// If there is a illegal content (a namespace node, an attribute node
222 /// or a document node) at the top level under a document node,
223 /// this produces a [`Error::IllegalAtTopLevel`] error.
224 pub fn validate_well_formed_document(&self, node: Node) -> Result<(), Error> {
225 if self.value_type(node) != ValueType::Document {
226 return Err(Error::NotDocument(node));
227 }
228
229 let mut element_count = 0;
230 for child in self.children(node) {
231 match self.value(child) {
232 Value::Element(_) => element_count += 1,
233 // no text nodes at the top level
234 Value::Text(_) => return Err(Error::TextAtTopLevel(child)),
235 Value::Comment(_) | Value::ProcessingInstruction(_) => {
236 // these we can have as many as we like
237 }
238 Value::Document | Value::Attribute(_) | Value::Namespace(_) => {
239 // these are completely unexpected under any document node
240 return Err(Error::IllegalAtTopLevel(child));
241 }
242 }
243 }
244 if element_count == 0 {
245 return Err(Error::NoElementAtTopLevel);
246 }
247 if element_count > 1 {
248 return Err(Error::MultipleElementsAtTopLevel);
249 }
250 Ok(())
251 }
252
253 /// Obtain top element, given node anywhere in a tree.
254 ///
255 /// In an XML document this is the document element.
256 /// In a XML document fragment this is the top element, but it may
257 /// have siblings.
258 /// If it's an unattached tree, it's the top node of that tree
259 pub fn top_element(&self, node: Node) -> Node {
260 if self.value_type(node) == ValueType::Document {
261 return self.document_element(node).unwrap();
262 }
263 let mut top = node;
264 for ancestor in self.ancestors(node) {
265 if let Value::Element(_) = self.value(ancestor) {
266 top = ancestor;
267 }
268 }
269 // XXX in an unattached tree this may not be an element.
270 top
271 }
272
273 /// Obtain root of the tree.
274 ///
275 /// This is the document node if possible (in a document or fragment), but
276 /// in an unattached tree this is the root of that tree.
277 pub fn root(&self, node: Node) -> Node {
278 self.ancestors(node).last().unwrap()
279 }
280
281 /// Check whether a node has been removed.
282 ///
283 /// This can happen because you removed it explicitly, or because you held
284 /// on to a reference and the node was replaced using [`Xot::replace`], or
285 /// unwrapped using [`Xot::element_unwrap`].
286 ///
287 /// ```rust
288 /// let mut xot = xot::Xot::new();
289 ///
290 /// let root = xot.parse("<p>Example</p>").unwrap();
291 /// let p = xot.document_element(root).unwrap();
292 /// let text = xot.first_child(p).unwrap();
293 /// xot.remove(text);
294 /// assert_eq!(xot.to_string(root).unwrap(), "<p/>");
295 /// assert!(xot.is_removed(text));
296 /// ```
297 pub fn is_removed(&self, node: Node) -> bool {
298 self.arena()[node.get()].is_removed()
299 }
300
301 /// Get parent node.
302 ///
303 /// Returns [`None`] if this is the document node or if the node is
304 /// unattached to a document.
305 ///
306 /// Attribute and namespace nodes have a parent, even though they aren't
307 /// children of the element they are in.
308 ///
309 /// ```rust
310 /// let mut xot = xot::Xot::new();
311 /// let root = xot.parse("<p>Example</p>").unwrap();
312 /// let p = xot.document_element(root).unwrap();
313 /// let text = xot.first_child(p).unwrap();
314 /// assert_eq!(xot.parent(text), Some(p));
315 /// assert_eq!(xot.parent(p), Some(root));
316 /// assert_eq!(xot.parent(root), None);
317 /// ```
318 pub fn parent(&self, node: Node) -> Option<Node> {
319 self.arena()[node.get()].parent().map(Node::new)
320 }
321
322 pub(crate) fn all_children(&self, node: Node) -> impl Iterator<Item = Node> + '_ {
323 node.get().children(&self.arena).map(Node::new)
324 }
325
326 pub(crate) fn abnormal_children(&self, node: Node) -> impl Iterator<Item = Node> + '_ {
327 node.get()
328 .children(&self.arena)
329 .take_while(|n| !self.arena[*n].get().is_normal())
330 .map(Node::new)
331 }
332
333 pub(crate) fn normal_children(&self, node: Node) -> impl Iterator<Item = Node> + '_ {
334 node.get()
335 .children(&self.arena)
336 .skip_while(|n| !self.arena[*n].get().is_normal())
337 .map(Node::new)
338 }
339
340 /// Attributes accessor.
341 ///
342 /// Returns a map of [`crate::NameId`] to a String reference representing the
343 /// attributes on the element.
344 ///
345 /// Note that if this is called on a non-element node, you get an empty
346 /// map.
347 ///
348 /// ```rust
349 /// let mut xot = xot::Xot::new();
350 /// let a = xot.add_name("a");
351 /// let root = xot.parse(r#"<p a="A">Example</p>"#).unwrap();
352 /// let p = xot.document_element(root).unwrap();
353 /// let attributes = xot.attributes(p);
354 ///
355 /// assert_eq!(attributes.get(a), Some(&"A".to_string()));
356 /// ```
357 pub fn attributes(&self, node: Node) -> Attributes {
358 Attributes::new(self, node)
359 }
360
361 /// Get the attribute value for a name.
362 ///
363 /// Note that if this is invoked non a non-element it's always going to
364 /// return None
365 pub fn get_attribute(&self, node: Node, name: NameId) -> Option<&str> {
366 self.attributes(node).get(name).map(String::as_str)
367 }
368
369 /// Namespaces accessor.
370 ///
371 /// Returns a map of [`crate::PrefixId`] to [`crate::NamespaceId`] representing
372 /// the namespace declarations on the element.
373 ///
374 /// Note that if this is called on a non-element node, you get an empty
375 /// map.
376 ///
377 /// ```rust
378 /// let mut xot = xot::Xot::new();
379 /// let foo_prefix = xot.add_prefix("foo");
380 /// let foo_ns = xot.add_namespace("FOO");
381 /// let root = xot.parse(r#"<p xmlns:foo="FOO">Example</p>"#).unwrap();
382 /// let p = xot.document_element(root).unwrap();
383 /// let namespaces = xot.namespaces(p);
384 ///
385 /// assert_eq!(namespaces.get(foo_prefix), Some(&foo_ns));
386 /// ```
387 pub fn namespaces(&self, node: Node) -> Namespaces {
388 Namespaces::new(self, node)
389 }
390
391 /// Get the namespace for a prefix.
392 ///
393 /// Note that if this is invoked non a non-element it's always going to
394 /// return None
395 pub fn get_namespace(&self, node: Node, prefix: PrefixId) -> Option<NamespaceId> {
396 self.namespaces(node).get(prefix).copied()
397 }
398
399 /// Copy the namespace declarations as a prefixes hash table.
400 ///
401 /// Sometimes it's more convenient to work with a hash table of
402 /// prefixes as opposed to the dynamic [`Xot::namespaces`] node map.
403 pub fn prefixes(&self, node: Node) -> Prefixes {
404 let mut prefixes = Prefixes::new();
405 for (prefix, ns) in self.namespaces(node).iter() {
406 prefixes.insert(prefix, *ns);
407 }
408 prefixes
409 }
410
411 /// Copy the namespace declarations as a namespace declarations vec.
412 pub fn namespace_declarations(&self, node: Node) -> NamespaceDeclarations {
413 self.namespaces(node)
414 .iter()
415 .map(|(prefix, ns)| (prefix, *ns))
416 .collect()
417 }
418
419 pub(crate) fn has_namespace_declarations(&self, node: Node) -> bool {
420 self.namespaces(node).iter().next().is_some()
421 }
422
423 /// Access the attribute nodes directly.
424 pub fn attribute_nodes(&self, node: Node) -> impl Iterator<Item = Node> + '_ {
425 self.all_children(node)
426 .skip_while(category_predicate(self, ValueCategory::Namespace))
427 .take_while(category_predicate(self, ValueCategory::Attribute))
428 }
429
430 /// Get first child.
431 ///
432 /// Returns [`None`] if there are no children.
433 ///
434 /// ```rust
435 /// let mut xot = xot::Xot::new();
436 /// let root = xot.parse("<p>Example</p>").unwrap();
437 /// let p = xot.document_element(root).unwrap();
438 /// let text = xot.first_child(p).unwrap();
439 /// assert_eq!(xot.first_child(root), Some(p));
440 /// assert_eq!(xot.first_child(p), Some(text));
441 /// assert_eq!(xot.first_child(text), None);
442 /// ```
443 pub fn first_child(&self, node: Node) -> Option<Node> {
444 self.normal_children(node).next()
445 }
446
447 /// Get last child.
448 ///
449 /// Returns [`None`] if there are no children.
450 pub fn last_child(&self, node: Node) -> Option<Node> {
451 let last_child = self.arena[node.get()].last_child()?;
452 if self.arena[last_child].get().is_normal() {
453 Some(Node::new(last_child))
454 } else {
455 None
456 }
457 }
458
459 /// Get next sibling.
460 ///
461 /// Returns [`None`] if there is no next sibling.
462 ///
463 /// For normal child nodes, gives the next child.
464 ///
465 /// For namespace and attribute nodes, gives the next namespace or
466 /// attribute in definition order.
467 ///
468 /// ```rust
469 /// let mut xot = xot::Xot::new();
470 /// let root = xot.parse("<p><a/><b/></p>").unwrap();
471 /// let p = xot.document_element(root).unwrap();
472 /// let a = xot.first_child(p).unwrap();
473 /// let b = xot.next_sibling(a).unwrap();
474 /// assert_eq!(xot.next_sibling(b), None);
475 /// ```
476 pub fn next_sibling(&self, node: Node) -> Option<Node> {
477 let current_category = self.arena[node.get()].get().value_category();
478 let next_sibling = self.arena[node.get()].next_sibling()?;
479 let next_category = self.arena[next_sibling].get().value_category();
480 if current_category != next_category {
481 return None;
482 }
483 Some(Node::new(next_sibling))
484 }
485
486 /// Get previous sibling.
487 ///
488 /// Returns [`None`] if there is no previous sibling.
489 pub fn previous_sibling(&self, node: Node) -> Option<Node> {
490 let current_category = self.arena[node.get()].get().value_category();
491 let previous_sibling = self.arena[node.get()].previous_sibling()?;
492 let previous_category = self.arena[previous_sibling].get().value_category();
493 if current_category != previous_category {
494 return None;
495 }
496 Some(Node::new(previous_sibling))
497 }
498
499 /// Iterator over ancestor nodes, including this one.
500 ///
501 /// Namespace and attribute node have ancestors, even though
502 /// they aren't the child of the element they are in.
503 ///
504 /// ```rust
505 /// let mut xot = xot::Xot::new();
506 ///
507 /// let root = xot.parse("<a><b><c/></b></a>").unwrap();
508 /// let a = xot.document_element(root).unwrap();
509 /// let b = xot.first_child(a).unwrap();
510 /// let c = xot.first_child(b).unwrap();
511 ///
512 /// let ancestors = xot.ancestors(c).collect::<Vec<_>>();
513 /// assert_eq!(ancestors, vec![c, b, a, root]);
514 /// ```
515 pub fn ancestors(&self, node: Node) -> impl Iterator<Item = Node> + '_ {
516 node.get().ancestors(self.arena()).map(Node::new)
517 }
518
519 /// Iterator over the child nodes of this node.
520 ///
521 /// Namespace and attribute nodes aren't consider child nodes even
522 /// if they have a parent element.
523 ///
524 /// ```rust
525 /// let mut xot = xot::Xot::new();
526 /// let root = xot.parse("<p><a/><b/></p>").unwrap();
527 /// let p = xot.document_element(root).unwrap();
528 /// let a = xot.first_child(p).unwrap();
529 /// let b = xot.next_sibling(a).unwrap();
530 /// let children = xot.children(p).collect::<Vec<_>>();
531 ///
532 /// assert_eq!(children, vec![a, b]);
533 /// ```
534 pub fn children(&self, node: Node) -> impl Iterator<Item = Node> + '_ {
535 self.normal_children(node)
536 }
537
538 /// Get index of child.
539 ///
540 /// Returns [`None`] if the node is not a child of this node, so
541 /// does not apply to namespace or attribute nodes.
542 ///
543 /// ```rust
544 /// let mut xot = xot::Xot::new();
545 /// let root = xot.parse("<p><a/><b/></p>").unwrap();
546 /// let p = xot.document_element(root).unwrap();
547 /// let a = xot.first_child(p).unwrap();
548 /// let b = xot.next_sibling(a).unwrap();
549 /// assert_eq!(xot.child_index(p, a), Some(0));
550 /// assert_eq!(xot.child_index(p, b), Some(1));
551 /// assert_eq!(xot.child_index(a, b), None);
552 /// ```
553 pub fn child_index(&self, parent: Node, child: Node) -> Option<usize> {
554 if self.parent(child) != Some(parent) {
555 return None;
556 }
557 self.normal_children(parent).position(|n| n == child)
558 }
559
560 /// Iterator over the child nodes of this node, in reverse order.
561 pub fn reverse_children(&self, node: Node) -> impl Iterator<Item = Node> + '_ {
562 node.get()
563 .children(self.arena())
564 .rev()
565 .take_while(|n| self.arena[*n].get().is_normal())
566 .map(Node::new)
567 }
568
569 fn normal_filter(&self) -> impl Fn(&indextree::NodeId) -> bool + '_ {
570 |node_id| self.arena[*node_id].get().is_normal()
571 }
572
573 fn normal_edge_filter(&self) -> impl Fn(&indextree::NodeEdge) -> bool + '_ {
574 move |edge| {
575 let node_id = match edge {
576 indextree::NodeEdge::Start(node_id) => node_id,
577 indextree::NodeEdge::End(node_id) => node_id,
578 };
579 self.arena[*node_id].get().is_normal()
580 }
581 }
582
583 fn category_filter(&self, category: ValueCategory) -> impl Fn(&indextree::NodeId) -> bool + '_ {
584 move |node_id| self.arena[*node_id].get().value_category() == category
585 }
586
587 /// Iterator over of the descendants of this node,
588 /// including this one. In document order (pre-order depth-first).
589 ///
590 /// Namespace and attribute nodes aren't included as descendants.
591 ///
592 /// ```rust
593 /// let mut xot = xot::Xot::new();
594 /// let root = xot.parse("<a><b><c/></b></a>").unwrap();
595 /// let a = xot.document_element(root).unwrap();
596 /// let b = xot.first_child(a).unwrap();
597 /// let c = xot.first_child(b).unwrap();
598 ///
599 /// let descendants = xot.descendants(a).collect::<Vec<_>>();
600 /// assert_eq!(descendants, vec![a, b, c]);
601 /// ```
602 pub fn descendants(&self, node: Node) -> impl Iterator<Item = Node> + '_ {
603 node.get()
604 .descendants(self.arena())
605 .filter(self.normal_filter())
606 .map(Node::new)
607 }
608
609 /// All the descendants of this node.
610 ///
611 /// This includes this one, and namespace and attribute nodes,
612 /// all in document order, where namespace nodes come before
613 /// attribute nodes and attribute nodes come before normal children
614 pub fn all_descendants(&self, node: Node) -> impl Iterator<Item = Node> + '_ {
615 node.get().descendants(self.arena()).map(Node::new)
616 }
617
618 /// Iterator over the following siblings of this node, including this one.
619 ///
620 /// In case of namespace or attribute nodes, includes the following sibling
621 /// namespace or attribute nodes.
622 ///
623 /// ```rust
624 /// let mut xot = xot::Xot::new();
625 /// let root = xot.parse("<p><a/><b/><c/></p>").unwrap();
626 /// let p = xot.document_element(root).unwrap();
627 /// let a = xot.first_child(p).unwrap();
628 /// let b = xot.next_sibling(a).unwrap();
629 /// let c = xot.next_sibling(b).unwrap();
630 /// let siblings = xot.following_siblings(a).collect::<Vec<_>>();
631 /// assert_eq!(siblings, vec![a, b, c]);
632 /// let siblings = xot.following_siblings(b).collect::<Vec<_>>();
633 /// assert_eq!(siblings, vec![b, c]);
634 /// ```
635 pub fn following_siblings(&self, node: Node) -> impl Iterator<Item = Node> + '_ {
636 let current_category = self.arena[node.get()].get().value_category();
637 node.get()
638 .following_siblings(self.arena())
639 .filter(self.category_filter(current_category))
640 .map(Node::new)
641 }
642
643 /// Iterator over the preceding siblings of this node.
644 pub fn preceding_siblings(&self, node: Node) -> impl Iterator<Item = Node> + '_ {
645 let current_category = self.arena[node.get()].get().value_category();
646 node.get()
647 .preceding_siblings(self.arena())
648 .filter(self.category_filter(current_category))
649 .map(Node::new)
650 }
651
652 /// Following nodes in document order
653 ///
654 /// These are nodes that come after this node in document order,
655 /// without that node itself, its ancestors, or its descendants.
656 ///
657 /// Does not include namespace or attribute nodes.
658 ///
659 /// ```rust
660 /// let mut xot = xot::Xot::new();
661 /// let root = xot.parse("<p><a/><b><c/><d/><e/></b><f><g/><h/></f></p>").unwrap();
662 /// let p = xot.document_element(root).unwrap();
663 /// let a = xot.first_child(p).unwrap();
664 /// let b = xot.next_sibling(a).unwrap();
665 /// let c = xot.first_child(b).unwrap();
666 /// let d = xot.next_sibling(c).unwrap();
667 /// let e = xot.next_sibling(d).unwrap();
668 /// let f = xot.next_sibling(b).unwrap();
669 /// let g = xot.first_child(f).unwrap();
670 /// let h = xot.next_sibling(g).unwrap();
671 /// let siblings = xot.following(c).collect::<Vec<_>>();
672 /// assert_eq!(siblings, vec![d, e, f, g, h]);
673 /// ```
674 pub fn following(&self, node: Node) -> impl Iterator<Item = Node> + '_ {
675 // start with an empty iterator
676 let mut joined_iterator: Box<dyn Iterator<Item = Node>> = Box::new(std::iter::empty());
677 let mut current_parent = Some(node);
678 while let Some(parent) = current_parent {
679 let mut current_sibling = parent;
680 while let Some(current) = self.next_sibling(current_sibling) {
681 // add descendants of next sibling
682 joined_iterator =
683 Box::new(joined_iterator.chain(Box::new(self.descendants(current))));
684 current_sibling = current;
685 }
686 current_parent = self.parent(parent);
687 }
688 joined_iterator
689 }
690
691 /// Preceding nodes in document order
692 ///
693 /// These are nodes that come before this node in document order,
694 /// without that node itself, its ancestors, or its descendants.
695 ///
696 /// Does not include namespace or attribute nodes.
697 ///
698 /// ```rust
699 /// let mut xot = xot::Xot::new();
700 /// let root = xot.parse("<p><a/><b><c/><d/><e/></b><f><g/><h/></f></p>").unwrap();
701 /// let p = xot.document_element(root).unwrap();
702 /// let a = xot.first_child(p).unwrap();
703 /// let b = xot.next_sibling(a).unwrap();
704 /// let c = xot.first_child(b).unwrap();
705 /// let d = xot.next_sibling(c).unwrap();
706 /// let e = xot.next_sibling(d).unwrap();
707 /// let f = xot.next_sibling(b).unwrap();
708 /// let g = xot.first_child(f).unwrap();
709 /// let h = xot.next_sibling(g).unwrap();
710 /// let siblings = xot.preceding(e).collect::<Vec<_>>();
711 /// assert_eq!(siblings, vec![d, c, a]);
712 /// let siblings = xot.preceding(h).collect::<Vec<_>>();
713 /// assert_eq!(siblings, vec![g, e, d, c, b, a]);
714 /// ```
715 pub fn preceding(&self, node: Node) -> impl Iterator<Item = Node> + '_ {
716 // start with an empty iterator
717 let mut joined_iterator: Box<dyn Iterator<Item = Node>> = Box::new(std::iter::empty());
718 let mut current_parent = Some(node);
719 while let Some(parent) = current_parent {
720 let mut current_sibling = parent;
721 while let Some(current) = self.previous_sibling(current_sibling) {
722 // add descendants of previous sibling, reversed
723 // this unfortunately requires an extra allocation, as descendants
724 // is not a double iterator.
725 let descendants = Box::new(self.descendants(current).collect::<Vec<_>>());
726 let reverse_descendants = descendants.into_iter().rev();
727 joined_iterator = Box::new(joined_iterator.chain(Box::new(reverse_descendants)));
728 current_sibling = current;
729 }
730 current_parent = self.parent(parent);
731 }
732 joined_iterator
733 }
734
735 /// Traverse over node edges.
736 ///
737 /// This can be used to traverse the tree in document order iteratively
738 /// without the need for recursion, while getting structure information
739 /// (unlike [`Xot::descendants`] which doesn't retain structure
740 /// information).
741 ///
742 /// For the tree `<a><b/></a>` this generates a [`NodeEdge::Start`] for
743 /// `<a>`, then a [`NodeEdge::Start`] for `<b>`, immediately followed by a
744 /// [`NodeEdge::End`] for `<b>`, and finally a [`NodeEdge::End`] for `<a>`.
745 ///
746 /// For value types other than element or root, the start and end always
747 /// come as pairs without any intervening edges.
748 ///
749 /// This does not include edges for namespace and attribute nodes.
750 ///
751 /// ```rust
752 /// let mut xot = xot::Xot::new();
753 /// let root = xot.parse("<a><b>Text</b></a>").unwrap();
754 /// let a = xot.document_element(root).unwrap();
755 /// let b = xot.first_child(a).unwrap();
756 /// let text = xot.first_child(b).unwrap();
757 /// let edges = xot.traverse(a).collect::<Vec<_>>();
758 /// assert_eq!(edges, vec![
759 /// xot::NodeEdge::Start(a),
760 /// xot::NodeEdge::Start(b),
761 /// xot::NodeEdge::Start(text),
762 /// xot::NodeEdge::End(text),
763 /// xot::NodeEdge::End(b),
764 /// xot::NodeEdge::End(a),
765 /// ]);
766 /// ```
767 pub fn traverse(&self, node: Node) -> impl Iterator<Item = NodeEdge> + '_ {
768 node.get()
769 .traverse(self.arena())
770 .filter(self.normal_edge_filter())
771 .map(|edge| match edge {
772 IndexTreeNodeEdge::Start(node_id) => NodeEdge::Start(Node::new(node_id)),
773 IndexTreeNodeEdge::End(node_id) => NodeEdge::End(Node::new(node_id)),
774 })
775 }
776
777 /// Traverse nodes, including namespace and attribute nodes.
778 pub fn all_traverse(&self, node: Node) -> impl Iterator<Item = NodeEdge> + '_ {
779 node.get().traverse(self.arena()).map(|edge| match edge {
780 IndexTreeNodeEdge::Start(node_id) => NodeEdge::Start(Node::new(node_id)),
781 IndexTreeNodeEdge::End(node_id) => NodeEdge::End(Node::new(node_id)),
782 })
783 }
784
785 /// Traverse over node edges in reverse order.
786 ///
787 /// Like [`Xot::traverse`] but in reverse order.
788 pub fn reverse_traverse(&self, node: Node) -> impl Iterator<Item = NodeEdge> + '_ {
789 node.get()
790 .reverse_traverse(self.arena())
791 .filter(self.normal_edge_filter())
792 .map(|edge| match edge {
793 IndexTreeNodeEdge::Start(node_id) => NodeEdge::Start(Node::new(node_id)),
794 IndexTreeNodeEdge::End(node_id) => NodeEdge::End(Node::new(node_id)),
795 })
796 }
797
798 /// Traverse over nodes in level order.
799 ///
800 /// This is a breath first traversal, where each level is visited in turn.
801 /// Sequences of nodes with a different parent are separated by
802 /// [`LevelOrder::End`].
803 ///
804 /// For the tree `<a><b><d/></b><c><e/></c></a>` this generates a
805 /// [`LevelOrder::Node`] for `<a>`, then a [`LevelOrder::End`]. Next, a
806 /// [`LevelOrder::Node`] for `<b/>` and `</c>` are generated, again
807 /// followed by a [`LevelOrder::End`]. Then a [`LevelOrder::Node`] is
808 /// generated for `<d/>`, followed by a [`LevelOrder::End`]. Finally a
809 /// [`LevelOrder::Node`] is generated for `<e/>`, followed by a
810 /// [`LevelOrder::End`].
811 ///
812 /// This does not include namespace or attribute nodes.
813 ///
814 /// ```rust
815 /// let mut xot = xot::Xot::new();
816 /// let root = xot.parse("<a><b><d/></b><c><e/></c></a>").unwrap();
817 /// let a = xot.document_element(root).unwrap();
818 /// let b = xot.first_child(a).unwrap();
819 /// let d = xot.first_child(b).unwrap();
820 /// let c = xot.next_sibling(b).unwrap();
821 /// let e = xot.first_child(c).unwrap();
822 ///
823 /// let levels = xot.level_order(a).collect::<Vec<_>>();
824 /// assert_eq!(levels, vec![
825 /// xot::LevelOrder::Node(a),
826 /// xot::LevelOrder::End,
827 /// xot::LevelOrder::Node(b),
828 /// xot::LevelOrder::Node(c),
829 /// xot::LevelOrder::End,
830 /// xot::LevelOrder::Node(d),
831 /// xot::LevelOrder::End,
832 /// xot::LevelOrder::Node(e),
833 /// xot::LevelOrder::End,
834 /// ]);
835 /// ```
836 pub fn level_order(&self, node: Node) -> impl Iterator<Item = LevelOrder> + '_ {
837 level_order_traverse(self, node)
838 }
839
840 /// Axis-based traversal.
841 ///
842 /// Use an [`crate::Axis`] to traverse the tree in a way defined by
843 /// XPath.
844 ///
845 /// `<https://developer.mozilla.org/en-US/docs/Web/XPath/Axes>`
846 pub fn axis(&self, axis: Axis, node: Node) -> Box<dyn Iterator<Item = Node> + '_> {
847 use Axis::*;
848 match axis {
849 Child => Box::new(self.children(node)),
850 Descendant => {
851 let mut descendants = self.descendants(node);
852 // since this includes self we get rid of it here
853 descendants.next();
854 Box::new(descendants)
855 }
856 Parent => {
857 if let Some(parent) = self.parent(node) {
858 Box::new(std::iter::once(parent))
859 } else {
860 Box::new(std::iter::empty())
861 }
862 }
863 Ancestor => {
864 let parent = self.parent(node);
865 if let Some(parent) = parent {
866 // the ancestors of the parent include self, which is
867 // what we want as the parent is already taken
868 // We can't get a Node::Attribute or Node::Namespace
869 // because we just took the parent
870 Box::new(self.ancestors(parent))
871 } else {
872 Box::new(std::iter::empty())
873 }
874 }
875 FollowingSibling => {
876 let mut following = self.following_siblings(node);
877 // consume the self sibling
878 following.next();
879 Box::new(following)
880 }
881 PrecedingSibling => {
882 let mut preceding = self.preceding_siblings(node);
883 // consume the self sibling
884 preceding.next();
885 Box::new(preceding)
886 }
887 Following => Box::new(self.following(node)),
888 Preceding => Box::new(self.preceding(node)),
889 Axis::Self_ => Box::new(std::iter::once(node)),
890 DescendantOrSelf => Box::new(self.descendants(node)),
891 AncestorOrSelf => Box::new(self.ancestors(node)),
892 Attribute => Box::new(self.attribute_nodes(node)),
893 }
894 }
895
896 /// Get (element) node in a document node that has an xml:id attribute of
897 /// given value.
898 ///
899 /// If that id does not exist, returns [`None`].
900 pub fn xml_id_node(&self, document_node: Node, value: &str) -> Option<Node> {
901 let value_nodes = self.id_nodes_map.get(&document_node.get())?;
902 value_nodes.get(value).map(|node_id| Node::new(*node_id))
903 }
904}