sxd_xpath/
nodeset.rs

1//! Support for collections of nodes.
2
3use std::borrow::ToOwned;
4use std::collections::{HashSet, HashMap};
5use std::collections::hash_set;
6use std::iter::{IntoIterator, FromIterator};
7use std::usize;
8
9use sxd_document::QName;
10use sxd_document::dom;
11
12macro_rules! unpack(
13    ($enum_name:ident, {
14        $($name:ident, $wrapper:ident, dom::$inner:ident),*
15    }) => (
16        $(
17            pub fn $name(self) -> Option<dom::$inner<'d>> {
18                match self {
19                    $enum_name::$wrapper(n) => Some(n),
20                    _ => None,
21                }
22            }
23        )*
24    )
25);
26
27macro_rules! conversion_trait(
28    ($res_type:ident, {
29        $(dom::$leaf_type:ident => Node::$variant:ident),*
30    }) => (
31        $(impl<'d> From<dom::$leaf_type<'d>> for $res_type<'d>  {
32            fn from(v: dom::$leaf_type<'d>) -> $res_type<'d> {
33                Node::$variant(v)
34            }
35        })*
36    )
37);
38
39/// Represents a namespace.
40///
41/// This differs from the DOM, which does not treat namespaces as a
42/// separate item.
43#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
44pub struct Namespace<'d> {
45    pub parent: dom::Element<'d>,
46    pub prefix: &'d str,
47    pub uri: &'d str,
48}
49
50impl<'d> Namespace<'d> {
51    pub fn document(&self) -> dom::Document<'d> { self.parent.document() }
52    pub fn parent(&self) -> dom::Element<'d> { self.parent }
53    pub fn prefix(&self) -> &'d str { self.prefix }
54    pub fn uri(&self) -> &'d str { self.uri }
55    pub fn expanded_name(&self) -> QName<'d> { QName::new(self.prefix) }
56}
57
58/// Any of the various types of nodes found in an XML document.
59#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
60pub enum Node<'d> {
61    Root(dom::Root<'d>),
62    Element(dom::Element<'d>),
63    Attribute(dom::Attribute<'d>),
64    Text(dom::Text<'d>),
65    Comment(dom::Comment<'d>),
66    Namespace(Namespace<'d>),
67    ProcessingInstruction(dom::ProcessingInstruction<'d>),
68}
69
70impl<'d> Node<'d> {
71    /// The document to which this node belongs.
72    pub fn document(&self) -> dom::Document<'d> {
73        use self::Node::*;
74        match *self {
75            Root(n)                  => n.document(),
76            Element(n)               => n.document(),
77            Attribute(n)             => n.document(),
78            Text(n)                  => n.document(),
79            Comment(n)               => n.document(),
80            ProcessingInstruction(n) => n.document(),
81            Namespace(n)             => n.document(),
82        }
83    }
84
85    /// The name of the node, including a prefix that corresponds to the namespace, if any.
86    pub fn prefixed_name(&self) -> Option<String> {
87        use self::Node::*;
88
89        fn qname_prefixed_name(element: dom::Element, name: QName, preferred_prefix: Option<&str>) -> String {
90            if let Some(ns_uri) = name.namespace_uri() {
91                if let Some(prefix) = element.prefix_for_namespace_uri(ns_uri, preferred_prefix) {
92                    format!("{}:{}", prefix, name.local_part())
93                } else {
94                    name.local_part().to_owned()
95                }
96            } else {
97                name.local_part().to_owned()
98            }
99        };
100
101        match *self {
102            Root(_)                  => None,
103            Element(n)               => {
104                Some(qname_prefixed_name(n, n.name(), n.preferred_prefix()))
105            },
106            Attribute(n)             => {
107                let parent = n.parent().expect("Cannot process attribute without parent");
108                Some(qname_prefixed_name(parent, n.name(), n.preferred_prefix()))
109            },
110            Text(_)                  => None,
111            Comment(_)               => None,
112            ProcessingInstruction(n) => Some(n.target().to_owned()),
113            Namespace(n)             => Some(n.prefix().to_owned()),
114        }
115    }
116
117    /// Returns the [expanded name][] of the node, if any.
118    ///
119    /// [expanded name]: https://www.w3.org/TR/xpath/#dt-expanded-name
120    pub fn expanded_name(&self) -> Option<QName<'d>> {
121        use self::Node::*;
122        match *self {
123            Root(_)                  => None,
124            Element(n)               => Some(n.name()),
125            Attribute(n)             => Some(n.name()),
126            Text(_)                  => None,
127            Comment(_)               => None,
128            ProcessingInstruction(n) => Some(QName::new(n.target())),
129            Namespace(n)             => Some(n.expanded_name())
130        }
131    }
132
133    /// Returns the parent of the node, if any.
134    pub fn parent(&self) -> Option<Node<'d>> {
135        use self::Node::*;
136        match *self {
137            Root(_)                  => None,
138            Element(n)               => n.parent().map(Into::into),
139            Attribute(n)             => n.parent().map(Into::into),
140            Text(n)                  => n.parent().map(Into::into),
141            Comment(n)               => n.parent().map(Into::into),
142            ProcessingInstruction(n) => n.parent().map(Into::into),
143            Namespace(n)             => Some(n.parent().into()),
144        }
145    }
146
147    /// Returns the children of the node, if any.
148    pub fn children(&self) -> Vec<Node<'d>> {
149        use self::Node::*;
150        match *self {
151            Root(n)                  => n.children().into_iter().map(Into::into).collect(),
152            Element(n)               => n.children().into_iter().map(Into::into).collect(),
153            Attribute(_)             => Vec::new(),
154            Text(_)                  => Vec::new(),
155            Comment(_)               => Vec::new(),
156            ProcessingInstruction(_) => Vec::new(),
157            Namespace(_)             => Vec::new(),
158        }
159    }
160
161    /// Returns the nodes with the same parent that occur before this node.
162    pub fn preceding_siblings(&self) -> Vec<Node<'d>> {
163        use self::Node::*;
164        match *self {
165            Root(_)                  => Vec::new(),
166            Element(n)               => n.preceding_siblings().into_iter().rev().map(Into::into).collect(),
167            Attribute(_)             => Vec::new(),
168            Text(n)                  => n.preceding_siblings().into_iter().rev().map(Into::into).collect(),
169            Comment(n)               => n.preceding_siblings().into_iter().rev().map(Into::into).collect(),
170            ProcessingInstruction(n) => n.preceding_siblings().into_iter().rev().map(Into::into).collect(),
171            Namespace(_)             => Vec::new(),
172        }
173    }
174
175    /// Returns the nodes with the same parent that occur after this node.
176    pub fn following_siblings(&self) -> Vec<Node<'d>> {
177        use self::Node::*;
178        match *self {
179            Root(_)                  => Vec::new(),
180            Element(n)               => n.following_siblings().into_iter().map(Into::into).collect(),
181            Attribute(_)             => Vec::new(),
182            Text(n)                  => n.following_siblings().into_iter().map(Into::into).collect(),
183            Comment(n)               => n.following_siblings().into_iter().map(Into::into).collect(),
184            ProcessingInstruction(n) => n.following_siblings().into_iter().map(Into::into).collect(),
185            Namespace(_)             => Vec::new(),
186        }
187    }
188
189    /// Returns the [string value] of this node.
190    ///
191    /// [string value]: https://www.w3.org/TR/xpath/#dt-string-value
192    pub fn string_value(&self) -> String {
193        use self::Node::*;
194
195        fn document_order_text_nodes(node: Node, result: &mut String) {
196            for child in node.children() {
197                match child {
198                    Node::Element(_) => document_order_text_nodes(child, result),
199                    Node::Text(n) => result.push_str(n.text()),
200                    _ => {},
201                }
202            }
203        };
204
205        fn text_descendants_string_value(node: Node) -> String {
206            let mut result = String::new();
207            document_order_text_nodes(node, &mut result);
208            result
209        }
210
211        match *self {
212            Root(_)                  => text_descendants_string_value(*self),
213            Element(_)               => text_descendants_string_value(*self),
214            Attribute(n)             => n.value().to_owned(),
215            ProcessingInstruction(n) => n.value().unwrap_or("").to_owned(),
216            Comment(n)               => n.text().to_owned(),
217            Text(n)                  => n.text().to_owned(),
218            Namespace(n)             => n.uri().to_owned(),
219        }
220    }
221
222    unpack!(Node, {
223        root, Root, dom::Root,
224        element, Element, dom::Element,
225        attribute, Attribute, dom::Attribute,
226        text, Text, dom::Text,
227        comment, Comment, dom::Comment,
228        processing_instruction, ProcessingInstruction, dom::ProcessingInstruction
229    });
230
231    pub fn namespace(self) -> Option<Namespace<'d>> {
232        match self {
233            Node::Namespace(n) => Some(n),
234            _ => None,
235        }
236    }
237}
238
239conversion_trait!(Node, {
240    dom::Root                  => Node::Root,
241    dom::Element               => Node::Element,
242    dom::Attribute             => Node::Attribute,
243    dom::Text                  => Node::Text,
244    dom::Comment               => Node::Comment,
245    dom::ProcessingInstruction => Node::ProcessingInstruction
246});
247
248impl<'d> Into<Node<'d>> for dom::ChildOfRoot<'d> {
249    fn into(self) -> Node<'d> {
250        use self::Node::*;
251        match self {
252            dom::ChildOfRoot::Element(n)               => Element(n),
253            dom::ChildOfRoot::Comment(n)               => Comment(n),
254            dom::ChildOfRoot::ProcessingInstruction(n) => ProcessingInstruction(n),
255        }
256    }
257}
258
259impl<'d> Into<Node<'d>> for dom::ChildOfElement<'d> {
260    fn into(self) -> Node<'d> {
261        use self::Node::*;
262        match self {
263            dom::ChildOfElement::Element(n)               => Element(n),
264            dom::ChildOfElement::Text(n)                  => Text(n),
265            dom::ChildOfElement::Comment(n)               => Comment(n),
266            dom::ChildOfElement::ProcessingInstruction(n) => ProcessingInstruction(n),
267        }
268    }
269}
270
271impl<'d> Into<Node<'d>> for dom::ParentOfChild<'d> {
272    fn into(self) -> Node<'d> {
273        use self::Node::*;
274        match self {
275            dom::ParentOfChild::Root(n)    => Root(n),
276            dom::ParentOfChild::Element(n) => Element(n),
277        }
278    }
279}
280
281/// An unordered collection of unique nodes
282#[derive(Debug, Default, Clone, PartialEq)]
283pub struct Nodeset<'d> {
284    nodes: HashSet<Node<'d>>,
285}
286
287impl<'d> Nodeset<'d> {
288    pub fn new() -> Nodeset<'d> {
289        Default::default()
290    }
291
292    /// Checks if the node is present in the set
293    pub fn contains<N>(&self, node: N) -> bool
294        where N: Into<Node<'d>>,
295    {
296        self.nodes.contains(&node.into())
297    }
298
299    /// Add the given node to the set
300    pub fn add<N>(&mut self, node: N)
301        where N: Into<Node<'d>>
302    {
303        self.nodes.insert(node.into());
304    }
305
306    pub fn iter<'a>(&'a self) -> Iter<'a, 'd> {
307        IntoIterator::into_iter(self)
308    }
309
310    pub fn size(&self) -> usize {
311        self.nodes.len()
312    }
313
314    /// Returns the node that occurs first in [document order]
315    ///
316    /// [document order]: https://www.w3.org/TR/xpath/#dt-document-order
317    pub fn document_order_first(&self) -> Option<Node<'d>> {
318        let node = match self.nodes.iter().next() {
319            Some(n) => n,
320            None => return None,
321        };
322
323        if self.nodes.len() == 1 {
324            return Some(node.clone());
325        }
326
327        let order = DocOrder::new(node.document());
328
329        self.nodes.iter().min_by_key(|&&n| order.order_of(n)).cloned()
330    }
331
332    pub fn document_order(&self) -> Vec<Node<'d>> {
333        let mut nodes: Vec<_> = self.iter().collect();
334        if nodes.len() == 1 {
335            return nodes;
336        }
337
338        let doc = match nodes.first().map(Node::document) {
339            Some(doc) => doc,
340            None => return nodes,
341        };
342
343        let order = DocOrder::new(doc);
344        nodes.sort_by_key(|&n| order.order_of(n));
345        nodes
346    }
347}
348
349impl<'d> Extend<Node<'d>> for Nodeset<'d> {
350    fn extend<I>(&mut self, iter: I)
351        where I: IntoIterator<Item = Node<'d>>
352    {
353        self.nodes.extend(iter)
354    }
355}
356
357// Rebuilding this multiple times cannot possibly be performant,
358// but I want to see how widely used this is first before
359// picking an appropriate caching point.
360struct DocOrder<'d>(HashMap<Node<'d>, usize>);
361
362impl<'d> DocOrder<'d> {
363    fn new(doc: dom::Document<'d>) -> Self {
364        let mut idx = 0;
365        let mut stack: Vec<Node> = vec![doc.root().into()];
366        let mut order = HashMap::new();
367
368        while let Some(n) = stack.pop() {
369            order.insert(n, idx);
370            idx += 1;
371
372            stack.extend(n.children().into_iter().rev());
373
374            if let Node::Element(e) = n {
375                // TODO: namespaces
376                stack.extend(e.attributes().into_iter().map(Node::Attribute));
377            }
378        }
379
380        DocOrder(order)
381    }
382
383    fn order_of(&self, node: Node<'d>) -> usize {
384        // See the library-level docs for rationale on this MAX
385        self.0.get(&node).cloned().unwrap_or(usize::MAX)
386    }
387}
388
389impl<'a, 'd: 'a> IntoIterator for &'a Nodeset<'d> {
390    type Item = Node<'d>;
391    type IntoIter = Iter<'a, 'd>;
392
393    fn into_iter(self) -> Iter<'a, 'd> {
394        Iter { iter: self.nodes.iter() }
395    }
396}
397
398impl<'d> IntoIterator for Nodeset<'d> {
399    type Item = Node<'d>;
400    type IntoIter = IntoIter<'d>;
401
402    fn into_iter(self) -> IntoIter<'d> {
403        IntoIter { iter: self.nodes.into_iter() }
404    }
405}
406
407impl<'d> From<OrderedNodes<'d>> for Nodeset<'d> {
408    fn from(other: OrderedNodes<'d>) -> Self {
409        other.0.into_iter().collect()
410    }
411}
412
413impl<'d> FromIterator<Node<'d>> for Nodeset<'d> {
414    fn from_iter<I>(iterator: I) -> Nodeset<'d>
415        where I: IntoIterator<Item = Node<'d>>
416    {
417        Nodeset { nodes: iterator.into_iter().collect() }
418    }
419}
420
421pub struct Iter<'a, 'd: 'a> {
422    iter: hash_set::Iter<'a, Node<'d>>,
423}
424
425impl<'a, 'd: 'a> Iterator for Iter<'a, 'd> {
426    type Item = Node<'d>;
427    fn next(&mut self) -> Option<Node<'d>> { self.iter.next().cloned() }
428}
429
430pub struct IntoIter<'d> {
431    iter: hash_set::IntoIter<Node<'d>>,
432}
433
434impl<'d> Iterator for IntoIter<'d> {
435    type Item = Node<'d>;
436    fn next(&mut self) -> Option<Node<'d>> { self.iter.next() }
437}
438
439#[derive(Debug, Clone, Default, PartialEq)]
440pub struct OrderedNodes<'d>(Vec<Node<'d>>);
441
442impl<'d> OrderedNodes<'d> {
443    pub fn new() -> Self { Default::default() }
444    pub fn size(&self) -> usize { self.0.len() }
445
446    pub fn add(&mut self, node: Node<'d>) {
447        self.0.push(node.into())
448    }
449}
450
451impl<'d> From<Vec<Node<'d>>> for OrderedNodes<'d> {
452    fn from(other: Vec<Node<'d>>) -> Self {
453        OrderedNodes(other)
454    }
455}
456
457impl<'d> From<OrderedNodes<'d>> for Vec<Node<'d>> {
458    fn from(other: OrderedNodes<'d>) -> Self {
459        other.0
460    }
461}
462
463impl<'d> FromIterator<Node<'d>> for OrderedNodes<'d> {
464    fn from_iter<I>(iterator: I) -> OrderedNodes<'d>
465        where I: IntoIterator<Item = Node<'d>>
466    {
467        OrderedNodes(iterator.into_iter().collect())
468    }
469}
470
471#[cfg(test)]
472mod test {
473    use std::borrow::ToOwned;
474
475    use sxd_document::Package;
476
477    use super::{Node, Nodeset};
478    use super::Node::*;
479
480    fn into_node<'d, T: Into<Node<'d>>>(n: T) -> Node<'d> { n.into() }
481
482    #[test]
483    fn nodeset_can_include_all_node_types() {
484        let package = Package::new();
485        let doc = package.as_document();
486        let mut nodes = Nodeset::new();
487
488        let r = doc.root();
489        let e = doc.create_element("element");
490        let a = e.set_attribute_value("name", "value");
491        let t = doc.create_text("text");
492        let c = doc.create_comment("comment");
493        let p = doc.create_processing_instruction("pi", None);
494
495        nodes.add(r);
496        nodes.add(e);
497        nodes.add(a);
498        nodes.add(t);
499        nodes.add(c);
500        nodes.add(p);
501
502        assert_eq!(6, nodes.size());
503        assert!(nodes.contains(Root(r)));
504        assert!(nodes.contains(Element(e)));
505        assert!(nodes.contains(Attribute(a)));
506        assert!(nodes.contains(Text(t)));
507        assert!(nodes.contains(Comment(c)));
508        assert!(nodes.contains(ProcessingInstruction(p)));
509    }
510
511    #[test]
512    fn nodesets_can_be_combined() {
513        let package = Package::new();
514        let doc = package.as_document();
515
516        let mut all_nodes = Nodeset::new();
517        let mut nodes1 = Nodeset::new();
518        let mut nodes2 = Nodeset::new();
519
520        let e1 = doc.create_element("element1");
521        let e2 = doc.create_element("element2");
522
523        all_nodes.add(e1);
524        all_nodes.add(e2);
525
526        nodes1.add(e1);
527        nodes2.add(e2);
528
529        nodes1.extend(nodes2);
530
531        assert_eq!(all_nodes, nodes1);
532    }
533
534    #[test]
535    fn nodeset_knows_first_node_in_document_order() {
536        let package = Package::new();
537        let doc = package.as_document();
538
539        let c1 = doc.create_comment("1");
540        let c2 = doc.create_comment("2");
541        doc.root().append_child(c1);
542        doc.root().append_child(c2);
543
544        let nodes = nodeset![c2, c1];
545
546        assert_eq!(Some(into_node(c1)), nodes.document_order_first());
547    }
548
549    #[test]
550    fn attributes_come_before_children_in_document_order() {
551        let package = Package::new();
552        let doc = package.as_document();
553
554        let parent = doc.create_element("parent");
555        let attr = parent.set_attribute_value("a", "v");
556        let child = doc.create_element("child");
557
558        doc.root().append_child(parent);
559        parent.append_child(child);
560
561        let nodes = nodeset![child, attr];
562
563        assert_eq!(Some(attr.into()), nodes.document_order_first());
564    }
565
566    #[test]
567    fn prefixed_name_of_element_with_preferred_prefix() {
568        let package = Package::new();
569        let doc = package.as_document();
570
571        let e = doc.create_element(("uri", "wow"));
572        e.set_preferred_prefix(Some("prefix"));
573        e.register_prefix("prefix", "uri");
574        let node: Node = e.into();
575
576        assert_eq!(Some("prefix:wow".to_owned()), node.prefixed_name());
577    }
578
579    #[test]
580    fn prefixed_name_of_element_with_prefix() {
581        let package = Package::new();
582        let doc = package.as_document();
583
584        let e = doc.create_element(("uri", "wow"));
585        e.register_prefix("prefix", "uri");
586        let node: Node = e.into();
587
588        assert_eq!(Some("prefix:wow".to_owned()), node.prefixed_name());
589    }
590
591    #[test]
592    fn prefixed_name_of_element_without_prefix() {
593        // See library-level doc about missing prefixes
594        let package = Package::new();
595        let doc = package.as_document();
596
597        let e = doc.create_element(("uri", "wow"));
598        let node: Node = e.into();
599
600        assert_eq!(Some("wow".to_owned()), node.prefixed_name());
601    }
602
603    #[test]
604    fn prefixed_name_of_attribute_with_preferred_prefix() {
605        let package = Package::new();
606        let doc = package.as_document();
607
608        let e = doc.create_element("element");
609        let a = e.set_attribute_value(("uri", "attr"), "value");
610        a.set_preferred_prefix(Some("prefix"));
611        e.register_prefix("prefix", "uri");
612        let node: Node = a.into();
613
614        assert_eq!(Some("prefix:attr".to_owned()), node.prefixed_name());
615    }
616
617    #[test]
618    fn prefixed_name_of_attribute_with_prefix() {
619        let package = Package::new();
620        let doc = package.as_document();
621
622        let e = doc.create_element("element");
623        let a = e.set_attribute_value(("uri", "attr"), "value");
624        e.register_prefix("prefix", "uri");
625        let node: Node = a.into();
626
627        assert_eq!(Some("prefix:attr".to_owned()), node.prefixed_name());
628    }
629
630    #[test]
631    fn prefixed_name_of_processing_instruction() {
632        let package = Package::new();
633        let doc = package.as_document();
634
635        let pi = doc.create_processing_instruction("target", Some("value"));
636        let node: Node = pi.into();
637
638        assert_eq!(Some("target".to_owned()), node.prefixed_name());
639    }
640
641    #[test]
642    fn string_value_of_element_node_is_concatenation_of_descendant_text_nodes() {
643        let package = Package::new();
644        let doc = package.as_document();
645
646        let element = doc.create_element("hello");
647        let child = doc.create_element("world");
648        let text1 = doc.create_text("Presenting: ");
649        let text2 = doc.create_text("Earth");
650        let text3 = doc.create_text("!");
651
652        element.append_child(text1);
653        element.append_child(child);
654        child.append_child(text2);
655        element.append_child(text3);
656
657        assert_eq!("Presenting: Earth!", into_node(element).string_value());
658    }
659
660    #[test]
661    fn string_value_of_attribute_node_is_value() {
662        let package = Package::new();
663        let doc = package.as_document();
664        let element = doc.create_element("hello");
665        let attribute: Node = element.set_attribute_value("world", "Earth").into();
666        assert_eq!("Earth", attribute.string_value());
667    }
668
669    #[test]
670    fn string_value_of_pi_node_is_empty_when_no_value() {
671        let package = Package::new();
672        let doc = package.as_document();
673        let pi: Node = doc.create_processing_instruction("hello", None).into();
674        assert_eq!("", pi.string_value());
675    }
676
677    #[test]
678    fn string_value_of_pi_node_is_the_value_when_value() {
679        let package = Package::new();
680        let doc = package.as_document();
681        let pi: Node = doc.create_processing_instruction("hello", Some("world")).into();
682        assert_eq!("world", pi.string_value());
683    }
684
685    #[test]
686    fn string_value_of_comment_node_is_the_text() {
687        let package = Package::new();
688        let doc = package.as_document();
689        let comment: Node = doc.create_comment("hello world").into();
690        assert_eq!("hello world", comment.string_value());
691    }
692
693    #[test]
694    fn string_value_of_text_node_is_the_text() {
695        let package = Package::new();
696        let doc = package.as_document();
697        let text: Node = doc.create_text("hello world").into();
698        assert_eq!("hello world", text.string_value());
699    }
700}