Skip to main content

xrust/
item.rs

1/*! Sequences and Items.
2
3A [Sequence] is the fundamental data type in XPath. It is a series of zero or more [Item]s.
4
5An [Item] is a [Node], Function or atomic [Value].
6
7[Node]s are defined as a trait.
8*/
9
10use qualname::{NamespacePrefix, NamespaceUri, QName};
11
12use crate::item;
13use crate::output::OutputDefinition;
14use crate::validators::{Schema, ValidationError};
15use crate::value::{Operator, Value};
16use crate::xdmerror::{Error, ErrorKind};
17use crate::xmldecl::{DTD, XMLDecl};
18use std::cmp::Ordering;
19use std::fmt;
20use std::fmt::Formatter;
21use std::rc::Rc;
22
23/// In XPath, the Sequence is the fundamental data structure.
24/// It is an ordered collection of [Item]s.
25/// The Rust implementation is a Vector of reference counted [Item]s.
26///
27/// See [SequenceTrait] for methods.
28pub type Sequence<N> = Vec<Item<N>>;
29
30pub trait SequenceTrait<N: Node> {
31    /// Return the string value of the [Sequence].
32    fn to_string(&self) -> String;
33    /// Return a XML formatted representation of the [Sequence].
34    fn to_xml(&self) -> String;
35    /// Return a XML formatted representation of the [Sequence], controlled by the supplied output definition.
36    fn to_xml_with_options(&self, od: &OutputDefinition) -> String;
37    /// Return a JSON formatted representation of the [Sequence].
38    fn to_json(&self) -> String;
39    /// Return the Effective Boolean Value of the [Sequence].
40    fn to_bool(&self) -> bool;
41    /// Convert the [Sequence] to an integer. The [Sequence] must be a singleton value.
42    fn to_int(&self) -> Result<i64, Error>;
43    /// Push an [Node] to the [Sequence]
44    fn push_node(&mut self, n: &N);
45    /// Push a [Value] to the [Sequence]
46    fn push_value(&mut self, v: &Rc<Value>);
47    /// Push an [Item] to the [Sequence]. This clones the item.
48    fn push_item(&mut self, i: &Item<N>);
49}
50
51impl<N: Node> SequenceTrait<N> for Sequence<N> {
52    /// Returns the string value of the Sequence.
53    fn to_string(&self) -> String {
54        let mut r = String::new();
55        for i in self {
56            r.push_str(i.to_string().as_str())
57        }
58        r
59    }
60    /// Renders the Sequence as XML
61    fn to_xml(&self) -> String {
62        let mut r = String::new();
63        for i in self {
64            r.push_str(i.to_xml().as_str())
65        }
66        r
67    }
68    /// Renders the Sequence as XML
69    fn to_xml_with_options(&self, od: &OutputDefinition) -> String {
70        let mut r = String::new();
71        for i in self {
72            r.push_str(i.to_xml_with_options(od).as_str())
73        }
74        r
75    }
76    /// Renders the Sequence as JSON
77    fn to_json(&self) -> String {
78        let mut r = String::new();
79        for i in self {
80            r.push_str(i.to_json().as_str())
81        }
82        r
83    }
84    /// Push a document's [Node] on to the [Sequence]. This clones the node.
85    fn push_node(&mut self, n: &N) {
86        self.push(Item::Node(n.clone()));
87    }
88    /// Push a [Value] on to the [Sequence].
89    fn push_value(&mut self, v: &Rc<Value>) {
90        self.push(Item::Value(Rc::clone(v)));
91    }
92    //fn new_function(&self, f: Function) -> Sequence {
93    //}
94    /// Push an [Item] on to the [Sequence]. This clones the Item.
95    fn push_item(&mut self, i: &Item<N>) {
96        self.push(i.clone());
97    }
98
99    /// Calculate the effective boolean value of the Sequence
100    fn to_bool(&self) -> bool {
101        if self.is_empty() {
102            false
103        } else {
104            match self[0] {
105                Item::Node(..) => true,
106                _ => {
107                    if self.len() == 1 {
108                        self[0].to_bool()
109                    } else {
110                        false // should be a type error
111                    }
112                }
113            }
114        }
115    }
116
117    /// Convenience routine for integer value of the [Sequence]. The Sequence must be a singleton; i.e. be a single item.
118    fn to_int(&self) -> Result<i64, Error> {
119        if self.len() == 1 {
120            self[0].to_int()
121        } else {
122            Err(Error::new(
123                ErrorKind::TypeError,
124                String::from("type error: sequence is not a singleton"),
125            ))
126        }
127    }
128}
129
130impl<N: Node> From<Value> for Sequence<N> {
131    fn from(v: Value) -> Self {
132        vec![Item::Value(Rc::new(v))]
133    }
134}
135impl<N: Node> From<Item<N>> for Sequence<N> {
136    fn from(i: Item<N>) -> Self {
137        vec![i]
138    }
139}
140
141/// All [Node]s have a type. The type of the [Node] determines what components are meaningful, such as name and content.
142///
143/// Every document must have a single node as it's toplevel node that is of type "Document".
144///
145/// Namespace nodes represent the declaration of an XML Namespace.
146#[derive(Copy, Clone, Eq, PartialEq, Debug, Default)]
147pub enum NodeType {
148    Document,
149    Element,
150    Text,
151    Attribute,
152    Comment,
153    ProcessingInstruction,
154    Reference,
155    Namespace,
156    #[default]
157    Unknown,
158}
159
160impl NodeType {
161    /// Return a string representation of the node type.
162    pub fn to_string(&self) -> &'static str {
163        match self {
164            NodeType::Document => "Document",
165            NodeType::Element => "Element",
166            NodeType::Attribute => "Attribute",
167            NodeType::Text => "Text",
168            NodeType::ProcessingInstruction => "Processing-Instruction",
169            NodeType::Comment => "Comment",
170            NodeType::Reference => "Reference",
171            NodeType::Namespace => "Namespace",
172            NodeType::Unknown => "--None--",
173        }
174    }
175}
176
177impl fmt::Display for NodeType {
178    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
179        f.write_str(self.to_string())
180    }
181}
182
183/// An Item in a [Sequence]. Can be a node, function or [Value].
184///
185/// Functions are not yet implemented.
186#[derive(Clone)]
187pub enum Item<N: Node> {
188    /// A [Node] in the source document.
189    Node(N),
190
191    /// Functions are not yet supported
192    Function,
193
194    /// A scalar value. These are in an Rc since they are frequently shared.
195    Value(Rc<Value>),
196}
197
198impl<N: item::Node> fmt::Display for Item<N> {
199    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
200        // Gives the string value of an item. All items have a string value.
201        let result = match self {
202            Item::Node(n) => n.to_string(),
203            Item::Function => "".to_string(),
204            Item::Value(v) => v.to_string(),
205        };
206        f.write_str(result.as_str())
207    }
208}
209
210impl<N: Node> Item<N> {
211    /// Serialize as XML
212    pub fn to_xml(&self) -> String {
213        match self {
214            Item::Node(n) => n.to_xml(),
215            Item::Function => "".to_string(),
216            Item::Value(v) => v.to_string(),
217        }
218    }
219    /// Serialize as XML, with options
220    pub fn to_xml_with_options(&self, od: &OutputDefinition) -> String {
221        match self {
222            Item::Node(n) => n.to_xml_with_options(od),
223            Item::Function => "".to_string(),
224            Item::Value(v) => v.to_string(),
225        }
226    }
227    /// Serialize as JSON
228    pub fn to_json(&self) -> String {
229        match self {
230            Item::Node(n) => n.to_json(),
231            Item::Function => "".to_string(),
232            Item::Value(v) => v.to_string(),
233        }
234    }
235
236    /// Determine the effective boolean value of the item.
237    /// See XPath 2.4.3.
238    pub fn to_bool(&self) -> bool {
239        match self {
240            Item::Node(..) => true,
241            Item::Function => false,
242            Item::Value(v) => v.to_bool(),
243        }
244    }
245
246    /// Gives the integer value of the item, if possible.
247    pub fn to_int(&self) -> Result<i64, Error> {
248        match self {
249            Item::Node(..) => Result::Err(Error::new(
250                ErrorKind::TypeError,
251                String::from("type error: item is a node"),
252            )),
253            Item::Function => Result::Err(Error::new(
254                ErrorKind::TypeError,
255                String::from("type error: item is a function"),
256            )),
257            Item::Value(v) => match v.to_int() {
258                Ok(i) => Ok(i),
259                Err(e) => Result::Err(e),
260            },
261        }
262    }
263
264    /// Gives the double value of the item. Returns NaN if the value cannot be converted to a double.
265    pub fn to_double(&self) -> f64 {
266        match self {
267            Item::Node(..) => f64::NAN,
268            Item::Function => f64::NAN,
269            Item::Value(v) => v.to_double(),
270        }
271    }
272
273    /// Gives the name of the item. Certain types of Nodes have names, such as element-type nodes. If the item does not have a name returns an empty string.
274    pub fn name(&self) -> Option<QName> {
275        match self {
276            Item::Node(n) => n.name(),
277            _ => None,
278        }
279    }
280
281    // TODO: atomization
282    // fn atomize(&self);
283
284    /// Compare two items.
285    pub fn compare(&self, other: &Item<N>, op: Operator) -> Result<bool, Error> {
286        match self {
287            Item::Value(v) => match other {
288                Item::Value(w) => v.compare(w, op),
289                Item::Node(..) => v.compare(&Value::from(other.to_string()), op),
290                _ => Result::Err(Error::new(ErrorKind::TypeError, String::from("type error"))),
291            },
292            Item::Node(..) => {
293                other.compare(&Item::Value(Rc::new(Value::from(self.to_string()))), op)
294            }
295            _ => Result::Err(Error::new(ErrorKind::TypeError, String::from("type error"))),
296        }
297    }
298
299    /// Is this item a node?
300    pub fn is_node(&self) -> bool {
301        matches!(self, Item::Node(_))
302    }
303
304    /// Is this item an element-type node?
305    pub fn is_element_node(&self) -> bool {
306        match self {
307            Item::Node(n) => matches!(n.node_type(), NodeType::Element),
308            _ => false,
309        }
310    }
311
312    /// Convenience method to set an attribute for a Node-type item.
313    /// If the item is not an element-type node, then this method has no effect.
314    pub fn add_attribute(&self, a: N) -> Result<(), Error> {
315        match self {
316            Item::Node(n) => match n.node_type() {
317                NodeType::Element => n.add_attribute(a),
318                _ => Ok(()),
319            },
320            _ => Ok(()),
321        }
322    }
323
324    /// Gives the type of the item.
325    pub fn item_type(&self) -> &'static str {
326        match self {
327            Item::Node(..) => "Node",
328            Item::Function => "Function",
329            Item::Value(v) => v.value_type(),
330        }
331    }
332    /// Make a shallow copy of an item.
333    /// That is, the item is duplicated but not it's content, including attributes.
334    pub fn shallow_copy(&self) -> Result<Self, Error> {
335        match self {
336            Item::Value(v) => Ok(Item::Value(v.clone())),
337            Item::Node(n) => Ok(Item::Node(n.shallow_copy()?)),
338            _ => Result::Err(Error::new(
339                ErrorKind::NotImplemented,
340                "not implemented".to_string(),
341            )),
342        }
343    }
344    /// Make a deep copy of an item.
345    pub fn deep_copy(&self) -> Result<Self, Error> {
346        match self {
347            Item::Value(v) => Ok(Item::Value(v.clone())),
348            Item::Node(n) => Ok(Item::Node(n.deep_copy()?)),
349            _ => Result::Err(Error::new(
350                ErrorKind::NotImplemented,
351                "not implemented".to_string(),
352            )),
353        }
354    }
355}
356
357impl<N: Node> fmt::Debug for Item<N> {
358    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
359        match self {
360            Item::Node(n) => {
361                write!(
362                    f,
363                    "node type item ({:?})",
364                    n //                    "node type item (node type {}, name \"{}\")",
365                      //                    n.node_type().to_string(),
366                      //                    n.name()
367                )
368            }
369            Item::Function => {
370                write!(f, "function type item")
371            }
372            Item::Value(v) => {
373                write!(f, "value type item ({})", v)
374            }
375        }
376    }
377}
378
379/// Nodes make up a document tree. Nodes must be fully navigable. The tree must be mutable but also stable (i.e. removing a node from the tree does not invalidate the remaining nodes).
380///
381/// Some nodes have names, such as elements. Some nodes have values, such as text or comments. Some have both a name and a value, such as attributes and processing instructions.
382///
383/// Element nodes have children and attributes.
384///
385/// Element nodes may have Namespace nodes attached. These are the declaration of XML Namespaces.
386/// An XML Namespace declaration consists of an optional prefix and a namespace URI.
387/// The namespace-iter() method iterates over all in-scope namespaces, which will include namespaces that are declared on ancestor elements.
388///
389/// Nodes must implement the PartialEq trait. This allows two (sub-)trees to be compared. The comparison is against the XML Infoset of each tree;
390/// i.e. do the trees contain the same information, but not necessarily the same string representation.
391/// For example, the order of attributes does not matter.
392pub trait Node: Clone + PartialEq + fmt::Debug {
393    type NodeIterator: Iterator<Item = Self>;
394
395    /// Create a Document-type node.
396    /// All other types of nodes are created using type-specific methods (new_element, new_text, etc).
397    fn new_document() -> Self;
398
399    /// Get the type of the node
400    fn node_type(&self) -> NodeType;
401    /// Get the name of the node, if it has one.
402    /// A namespace-type returns the prefix as a [QName] where the prefix is the local-part.
403    /// An unprefixed namespace node returns None.
404    fn name(&self) -> Option<QName>;
405    /// Get the value of the node.
406    /// If the node doesn't have a value, then returns a [Value] that is an empty string.
407    /// If the node is a namespace-type node, gives the namespace URI.
408    fn value(&self) -> Rc<Value>;
409
410    /// Resolve a name using the in-scope namespace declarations in the document,
411    /// resulting in a Qualified Name.
412    /// This will fail if the name is not a QName, or has a prefix that is unknown.
413    fn to_qname(&self, name: impl AsRef<str>) -> Result<QName, Error>;
414    /// Convert the node's qualified name to a prefixed name using the in-scope namespace declarations in the document.
415    fn to_prefixed_name(&self) -> String;
416    /// Find the prefix for the given namespace URI using the node's in-scope namespaces. If the namespace is the default, then None is returned.
417    /// If the namespace URI is not found in the in-scope namespaces returns an error.
418    fn to_namespace_prefix(&self, nsuri: &NamespaceUri) -> Result<Option<NamespacePrefix>, Error>;
419    /// Find the namespace URI for the given namespace prefix using the node's in-scope namespaces.
420    /// If the namespace prefix is not found in the in-scope namespaces returns an error.
421    fn to_namespace_uri(&self, prefix: &Option<NamespacePrefix>) -> Result<NamespaceUri, Error>;
422    /// For a namespace node give the prefix. If the namespace is the default namespace, then None is given.
423    /// If the node is not a namespace-type node then returns an error.
424    fn as_namespace_prefix(&self) -> Result<Option<&NamespacePrefix>, Error>;
425    /// For a namespace node give the namespace URI.
426    /// If the node is not a namespace-type node then returns an error.
427    fn as_namespace_uri(&self) -> Result<&NamespaceUri, Error>;
428    /// Is this namespace in scope, or is it a descoping declaration? See Namespaces in XML v1.1 s6.1.
429    /// This only applies to Namespace-type nodes. All other node types return false.
430    fn is_in_scope(&self) -> bool;
431
432    /// Get a unique identifier for this node.
433    fn get_id(&self) -> String;
434
435    /// Get the string value of the node. See XPath ???
436    fn to_string(&self) -> String;
437    /// Serialise the node as XML
438    fn to_xml(&self) -> String;
439    /// Serialise the node as XML, with options such as indentation.
440    fn to_xml_with_options(&self, od: &OutputDefinition) -> String;
441    /// Serialise the node as JSON
442    fn to_json(&self) -> String {
443        String::new()
444    }
445
446    /// Check if two Nodes are the same Node
447    fn is_same(&self, other: &Self) -> bool;
448
449    // Check if the node is attached to the tree
450    fn is_attached(&self) -> bool;
451
452    /// Get the document order of the node. The value returned is relative to the document containing the node.
453    /// Depending on the implementation, this value may be volatile;
454    /// adding or removing nodes to/from the document may invalidate the ordering.
455    fn document_order(&self) -> Vec<usize>;
456    /// Compare the document order of this node with another node in the same document.
457    fn cmp_document_order(&self, other: &Self) -> Ordering;
458
459    /// Check if a node is an element-type
460    fn is_element(&self) -> bool {
461        self.node_type() == NodeType::Element
462    }
463    /// Check if a node is unattached
464    fn is_unattached(&self) -> bool;
465
466    /// Check if a node is an XML ID
467    fn is_id(&self) -> bool;
468    /// Check if a node is an  XML IDREF or IDREFS
469    fn is_idrefs(&self) -> bool;
470
471    /// An iterator over the children of the node
472    fn child_iter(&self) -> Self::NodeIterator;
473    /// Get the first child of the node, if there is one
474    fn first_child(&self) -> Option<Self>
475    where
476        Self: Sized,
477    {
478        self.child_iter().next()
479    }
480    /// An iterator over the ancestors of the node
481    fn ancestor_iter(&self) -> Self::NodeIterator;
482    /// Get the parent of the node. Top-level nodes do not have parents, also nodes that have been detached from the tree.
483    fn parent(&self) -> Option<Self>
484    where
485        Self: Sized,
486    {
487        self.ancestor_iter().next()
488    }
489    /// Get the document node
490    fn owner_document(&self) -> Self;
491    /// An iterator over the descendants of the node
492    fn descend_iter(&self) -> Self::NodeIterator;
493    /// An iterator over the following siblings of the node
494    fn next_iter(&self) -> Self::NodeIterator;
495    /// An iterator over the preceding siblings of the node
496    fn prev_iter(&self) -> Self::NodeIterator;
497    /// An iterator over the attributes of an element
498    fn attribute_iter(&self) -> Self::NodeIterator;
499    /// Get an attribute of the node. Returns a copy of the attribute's value. If the node does not have an attribute of the given name, a value containing an empty string is returned.
500    fn get_attribute(&self, a: &QName) -> Rc<Value>;
501    /// Get an attribute of the node. If the node is not an element returns None. Otherwise returns the attribute node. If the node does not have an attribute of the given name, returns None.
502    fn get_attribute_node(&self, a: &QName) -> Option<Self>;
503
504    /// Create a new element-type node in the same document tree. The new node is not attached to the tree.
505    fn new_element(&self, qn: QName) -> Result<Self, Error>;
506    /// Create a new text-type node in the same document tree. The new node is not attached to the tree.
507    fn new_text(&self, v: Rc<Value>) -> Result<Self, Error>;
508    /// Create a new attribute-type node in the same document tree. The new node is not attached to the tree.
509    fn new_attribute(&self, qn: QName, v: Rc<Value>) -> Result<Self, Error>;
510    /// Create a new comment-type node in the same document tree. The new node is not attached to the tree.
511    fn new_comment(&self, v: Rc<Value>) -> Result<Self, Error>;
512    /// Create a new processing-instruction-type node in the same document tree. The new node is not attached to the tree.
513    fn new_processing_instruction(&self, qn: Rc<Value>, v: Rc<Value>) -> Result<Self, Error>;
514    /// Create a namespace node for an XML Namespace declaration.
515    /// A namespace may be descoped (see Namespace in XML v1.1). In this case, the prefix and namespace URI are given for the namespace being descoped, but with the in_scope argument 'false'.
516    fn new_namespace(
517        &self,
518        ns: NamespaceUri,
519        prefix: Option<NamespacePrefix>,
520        in_scope: bool,
521    ) -> Result<Self, Error>;
522
523    /// Append a node to the child list
524    fn push(&mut self, n: Self) -> Result<(), Error>;
525    /// Remove a node from the tree
526    fn pop(&mut self) -> Result<(), Error>;
527    /// Insert a node in the child list before the given node. The node will be detached from it's current position prior to insertion.
528    fn insert_before(&mut self, n: Self) -> Result<(), Error>;
529    /// Set an attribute. self must be an element-type node. att must be an attribute-type node.
530    /// Returns an error if an attribute with the same name is already attached to this element.
531    fn add_attribute(&self, att: Self) -> Result<(), Error>;
532
533    /// Shallow copy the node, i.e. copy only the node, but not it's attributes or content.
534    fn shallow_copy(&self) -> Result<Self, Error>;
535    /// Deep copy the node, i.e. the node itself and it's attributes and descendants. The resulting top-level node is unattached.
536    fn deep_copy(&self) -> Result<Self, Error>;
537    /// Canonical XML representation of the node.
538    fn get_canonical(&self) -> Result<Self, Error>;
539    /// Get the XML Declaration for the document.
540    fn xmldecl(&self) -> XMLDecl;
541    /// Set the XML Declaration for the document.
542    fn set_xmldecl(&mut self, d: XMLDecl) -> Result<(), Error>;
543    /// Add a namespace declaration to this element-type node.
544    /// NOTE: Does NOT assign a namespace to the element. The element's name defines its namespace.
545    fn add_namespace(&self, ns: Self) -> Result<(), Error>;
546    /// Compare two trees. If a non-document node is used, then the descendant subtrees are compared.
547    fn eq(&self, other: &Self) -> bool {
548        match self.node_type() {
549            NodeType::Document => {
550                if other.node_type() == NodeType::Document {
551                    self.child_iter()
552                        .zip(other.child_iter())
553                        .fold(true, |mut acc, (c, d)| {
554                            if acc {
555                                acc = Node::eq(&c, &d);
556                                acc
557                            } else {
558                                acc
559                            }
560                        })
561                    // TODO: use a method that terminates early on non-equality
562                } else {
563                    false
564                }
565            }
566            NodeType::Element => {
567                // names must match,
568                // attributes must match (order doesn't matter),
569                // content must match
570                if other.node_type() == NodeType::Element {
571                    if self.name() == other.name() {
572                        // Attributes
573                        let mut at_names: Vec<QName> =
574                            self.attribute_iter().map(|a| a.name().unwrap()).collect();
575                        if at_names.len() == other.attribute_iter().count() {
576                            at_names.sort();
577                            if at_names.iter().fold(true, |mut acc, qn| {
578                                if acc {
579                                    acc = self.get_attribute(qn) == other.get_attribute(qn);
580                                    acc
581                                } else {
582                                    acc
583                                }
584                            }) {
585                                // Content
586                                self.child_iter().zip(other.child_iter()).fold(
587                                    true,
588                                    |mut acc, (c, d)| {
589                                        if acc {
590                                            acc = Node::eq(&c, &d);
591                                            acc
592                                        } else {
593                                            acc
594                                        }
595                                    },
596                                )
597                                // TODO: use a method that terminates early on non-equality
598                            } else {
599                                false
600                            }
601                        } else {
602                            false
603                        }
604                    } else {
605                        false
606                    }
607                } else {
608                    false
609                }
610            }
611            NodeType::Text => {
612                if other.node_type() == NodeType::Text {
613                    self.value() == other.value()
614                } else {
615                    false
616                }
617            }
618            NodeType::ProcessingInstruction => {
619                if other.node_type() == NodeType::ProcessingInstruction {
620                    self.name() == other.name() && self.value() == other.value()
621                } else {
622                    false
623                }
624            }
625            _ => self.node_type() == other.node_type(), // Other types of node do not affect the equality
626        }
627    }
628    /// An iterator over the namespace nodes of an element.
629    /// Note: These nodes are calculated at the time the iterator is created.
630    /// It is not guaranteed that the namespace nodes returned
631    /// will specify the current element node as their parent.
632    fn namespace_iter(&self) -> Self::NodeIterator;
633
634    /// Retrieve the internal representation of the DTD, for use in validation functions.
635    fn get_dtd(&self) -> Option<DTD>;
636
637    /// Store an internal representation of the DTD. Does not keep a copy of the original text
638    fn set_dtd(&self, dtd: DTD) -> Result<(), Error>;
639
640    fn validate(&self, schema: Schema) -> Result<(), ValidationError>;
641
642    /// Return a list of nodes that are associated with this document, but are not attached.
643    fn unattached(&self) -> Vec<Self>;
644}