xrust/item.rs
1/*! Sequences and Items.
2
3A [Sequence] is the fundamental data type in XPath. It is a series of zero or more [Item]s.
4
5An [Item] is a [Node], Function or atomic [Value].
6
7[Node]s are defined as a trait.
8*/
9
10use qualname::{NamespacePrefix, NamespaceUri, QName};
11
12use crate::item;
13use crate::output::OutputDefinition;
14use crate::validators::{Schema, ValidationError};
15use crate::value::{Operator, Value};
16use crate::xdmerror::{Error, ErrorKind};
17use crate::xmldecl::{DTD, XMLDecl};
18use std::cmp::Ordering;
19use std::fmt;
20use std::fmt::Formatter;
21use std::rc::Rc;
22
23/// In XPath, the Sequence is the fundamental data structure.
24/// It is an ordered collection of [Item]s.
25/// The Rust implementation is a Vector of reference counted [Item]s.
26///
27/// See [SequenceTrait] for methods.
28pub type Sequence<N> = Vec<Item<N>>;
29
30pub trait SequenceTrait<N: Node> {
31 /// Return the string value of the [Sequence].
32 fn to_string(&self) -> String;
33 /// Return a XML formatted representation of the [Sequence].
34 fn to_xml(&self) -> String;
35 /// Return a XML formatted representation of the [Sequence], controlled by the supplied output definition.
36 fn to_xml_with_options(&self, od: &OutputDefinition) -> String;
37 /// Return a JSON formatted representation of the [Sequence].
38 fn to_json(&self) -> String;
39 /// Return the Effective Boolean Value of the [Sequence].
40 fn to_bool(&self) -> bool;
41 /// Convert the [Sequence] to an integer. The [Sequence] must be a singleton value.
42 fn to_int(&self) -> Result<i64, Error>;
43 /// Push an [Node] to the [Sequence]
44 fn push_node(&mut self, n: &N);
45 /// Push a [Value] to the [Sequence]
46 fn push_value(&mut self, v: &Rc<Value>);
47 /// Push an [Item] to the [Sequence]. This clones the item.
48 fn push_item(&mut self, i: &Item<N>);
49}
50
51impl<N: Node> SequenceTrait<N> for Sequence<N> {
52 /// Returns the string value of the Sequence.
53 fn to_string(&self) -> String {
54 let mut r = String::new();
55 for i in self {
56 r.push_str(i.to_string().as_str())
57 }
58 r
59 }
60 /// Renders the Sequence as XML
61 fn to_xml(&self) -> String {
62 let mut r = String::new();
63 for i in self {
64 r.push_str(i.to_xml().as_str())
65 }
66 r
67 }
68 /// Renders the Sequence as XML
69 fn to_xml_with_options(&self, od: &OutputDefinition) -> String {
70 let mut r = String::new();
71 for i in self {
72 r.push_str(i.to_xml_with_options(od).as_str())
73 }
74 r
75 }
76 /// Renders the Sequence as JSON
77 fn to_json(&self) -> String {
78 let mut r = String::new();
79 for i in self {
80 r.push_str(i.to_json().as_str())
81 }
82 r
83 }
84 /// Push a document's [Node] on to the [Sequence]. This clones the node.
85 fn push_node(&mut self, n: &N) {
86 self.push(Item::Node(n.clone()));
87 }
88 /// Push a [Value] on to the [Sequence].
89 fn push_value(&mut self, v: &Rc<Value>) {
90 self.push(Item::Value(Rc::clone(v)));
91 }
92 //fn new_function(&self, f: Function) -> Sequence {
93 //}
94 /// Push an [Item] on to the [Sequence]. This clones the Item.
95 fn push_item(&mut self, i: &Item<N>) {
96 self.push(i.clone());
97 }
98
99 /// Calculate the effective boolean value of the Sequence
100 fn to_bool(&self) -> bool {
101 if self.is_empty() {
102 false
103 } else {
104 match self[0] {
105 Item::Node(..) => true,
106 _ => {
107 if self.len() == 1 {
108 self[0].to_bool()
109 } else {
110 false // should be a type error
111 }
112 }
113 }
114 }
115 }
116
117 /// Convenience routine for integer value of the [Sequence]. The Sequence must be a singleton; i.e. be a single item.
118 fn to_int(&self) -> Result<i64, Error> {
119 if self.len() == 1 {
120 self[0].to_int()
121 } else {
122 Err(Error::new(
123 ErrorKind::TypeError,
124 String::from("type error: sequence is not a singleton"),
125 ))
126 }
127 }
128}
129
130impl<N: Node> From<Value> for Sequence<N> {
131 fn from(v: Value) -> Self {
132 vec![Item::Value(Rc::new(v))]
133 }
134}
135impl<N: Node> From<Item<N>> for Sequence<N> {
136 fn from(i: Item<N>) -> Self {
137 vec![i]
138 }
139}
140
141/// All [Node]s have a type. The type of the [Node] determines what components are meaningful, such as name and content.
142///
143/// Every document must have a single node as it's toplevel node that is of type "Document".
144///
145/// Namespace nodes represent the declaration of an XML Namespace.
146#[derive(Copy, Clone, Eq, PartialEq, Debug, Default)]
147pub enum NodeType {
148 Document,
149 Element,
150 Text,
151 Attribute,
152 Comment,
153 ProcessingInstruction,
154 Reference,
155 Namespace,
156 #[default]
157 Unknown,
158}
159
160impl NodeType {
161 /// Return a string representation of the node type.
162 pub fn to_string(&self) -> &'static str {
163 match self {
164 NodeType::Document => "Document",
165 NodeType::Element => "Element",
166 NodeType::Attribute => "Attribute",
167 NodeType::Text => "Text",
168 NodeType::ProcessingInstruction => "Processing-Instruction",
169 NodeType::Comment => "Comment",
170 NodeType::Reference => "Reference",
171 NodeType::Namespace => "Namespace",
172 NodeType::Unknown => "--None--",
173 }
174 }
175}
176
177impl fmt::Display for NodeType {
178 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
179 f.write_str(self.to_string())
180 }
181}
182
183/// An Item in a [Sequence]. Can be a node, function or [Value].
184///
185/// Functions are not yet implemented.
186#[derive(Clone)]
187pub enum Item<N: Node> {
188 /// A [Node] in the source document.
189 Node(N),
190
191 /// Functions are not yet supported
192 Function,
193
194 /// A scalar value. These are in an Rc since they are frequently shared.
195 Value(Rc<Value>),
196}
197
198impl<N: item::Node> fmt::Display for Item<N> {
199 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
200 // Gives the string value of an item. All items have a string value.
201 let result = match self {
202 Item::Node(n) => n.to_string(),
203 Item::Function => "".to_string(),
204 Item::Value(v) => v.to_string(),
205 };
206 f.write_str(result.as_str())
207 }
208}
209
210impl<N: Node> Item<N> {
211 /// Serialize as XML
212 pub fn to_xml(&self) -> String {
213 match self {
214 Item::Node(n) => n.to_xml(),
215 Item::Function => "".to_string(),
216 Item::Value(v) => v.to_string(),
217 }
218 }
219 /// Serialize as XML, with options
220 pub fn to_xml_with_options(&self, od: &OutputDefinition) -> String {
221 match self {
222 Item::Node(n) => n.to_xml_with_options(od),
223 Item::Function => "".to_string(),
224 Item::Value(v) => v.to_string(),
225 }
226 }
227 /// Serialize as JSON
228 pub fn to_json(&self) -> String {
229 match self {
230 Item::Node(n) => n.to_json(),
231 Item::Function => "".to_string(),
232 Item::Value(v) => v.to_string(),
233 }
234 }
235
236 /// Determine the effective boolean value of the item.
237 /// See XPath 2.4.3.
238 pub fn to_bool(&self) -> bool {
239 match self {
240 Item::Node(..) => true,
241 Item::Function => false,
242 Item::Value(v) => v.to_bool(),
243 }
244 }
245
246 /// Gives the integer value of the item, if possible.
247 pub fn to_int(&self) -> Result<i64, Error> {
248 match self {
249 Item::Node(..) => Result::Err(Error::new(
250 ErrorKind::TypeError,
251 String::from("type error: item is a node"),
252 )),
253 Item::Function => Result::Err(Error::new(
254 ErrorKind::TypeError,
255 String::from("type error: item is a function"),
256 )),
257 Item::Value(v) => match v.to_int() {
258 Ok(i) => Ok(i),
259 Err(e) => Result::Err(e),
260 },
261 }
262 }
263
264 /// Gives the double value of the item. Returns NaN if the value cannot be converted to a double.
265 pub fn to_double(&self) -> f64 {
266 match self {
267 Item::Node(..) => f64::NAN,
268 Item::Function => f64::NAN,
269 Item::Value(v) => v.to_double(),
270 }
271 }
272
273 /// Gives the name of the item. Certain types of Nodes have names, such as element-type nodes. If the item does not have a name returns an empty string.
274 pub fn name(&self) -> Option<QName> {
275 match self {
276 Item::Node(n) => n.name(),
277 _ => None,
278 }
279 }
280
281 // TODO: atomization
282 // fn atomize(&self);
283
284 /// Compare two items.
285 pub fn compare(&self, other: &Item<N>, op: Operator) -> Result<bool, Error> {
286 match self {
287 Item::Value(v) => match other {
288 Item::Value(w) => v.compare(w, op),
289 Item::Node(..) => v.compare(&Value::from(other.to_string()), op),
290 _ => Result::Err(Error::new(ErrorKind::TypeError, String::from("type error"))),
291 },
292 Item::Node(..) => {
293 other.compare(&Item::Value(Rc::new(Value::from(self.to_string()))), op)
294 }
295 _ => Result::Err(Error::new(ErrorKind::TypeError, String::from("type error"))),
296 }
297 }
298
299 /// Is this item a node?
300 pub fn is_node(&self) -> bool {
301 matches!(self, Item::Node(_))
302 }
303
304 /// Is this item an element-type node?
305 pub fn is_element_node(&self) -> bool {
306 match self {
307 Item::Node(n) => matches!(n.node_type(), NodeType::Element),
308 _ => false,
309 }
310 }
311
312 /// Convenience method to set an attribute for a Node-type item.
313 /// If the item is not an element-type node, then this method has no effect.
314 pub fn add_attribute(&self, a: N) -> Result<(), Error> {
315 match self {
316 Item::Node(n) => match n.node_type() {
317 NodeType::Element => n.add_attribute(a),
318 _ => Ok(()),
319 },
320 _ => Ok(()),
321 }
322 }
323
324 /// Gives the type of the item.
325 pub fn item_type(&self) -> &'static str {
326 match self {
327 Item::Node(..) => "Node",
328 Item::Function => "Function",
329 Item::Value(v) => v.value_type(),
330 }
331 }
332 /// Make a shallow copy of an item.
333 /// That is, the item is duplicated but not it's content, including attributes.
334 pub fn shallow_copy(&self) -> Result<Self, Error> {
335 match self {
336 Item::Value(v) => Ok(Item::Value(v.clone())),
337 Item::Node(n) => Ok(Item::Node(n.shallow_copy()?)),
338 _ => Result::Err(Error::new(
339 ErrorKind::NotImplemented,
340 "not implemented".to_string(),
341 )),
342 }
343 }
344 /// Make a deep copy of an item.
345 pub fn deep_copy(&self) -> Result<Self, Error> {
346 match self {
347 Item::Value(v) => Ok(Item::Value(v.clone())),
348 Item::Node(n) => Ok(Item::Node(n.deep_copy()?)),
349 _ => Result::Err(Error::new(
350 ErrorKind::NotImplemented,
351 "not implemented".to_string(),
352 )),
353 }
354 }
355}
356
357impl<N: Node> fmt::Debug for Item<N> {
358 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
359 match self {
360 Item::Node(n) => {
361 write!(
362 f,
363 "node type item ({:?})",
364 n // "node type item (node type {}, name \"{}\")",
365 // n.node_type().to_string(),
366 // n.name()
367 )
368 }
369 Item::Function => {
370 write!(f, "function type item")
371 }
372 Item::Value(v) => {
373 write!(f, "value type item ({})", v)
374 }
375 }
376 }
377}
378
379/// Nodes make up a document tree. Nodes must be fully navigable. The tree must be mutable but also stable (i.e. removing a node from the tree does not invalidate the remaining nodes).
380///
381/// Some nodes have names, such as elements. Some nodes have values, such as text or comments. Some have both a name and a value, such as attributes and processing instructions.
382///
383/// Element nodes have children and attributes.
384///
385/// Element nodes may have Namespace nodes attached. These are the declaration of XML Namespaces.
386/// An XML Namespace declaration consists of an optional prefix and a namespace URI.
387/// The namespace-iter() method iterates over all in-scope namespaces, which will include namespaces that are declared on ancestor elements.
388///
389/// Nodes must implement the PartialEq trait. This allows two (sub-)trees to be compared. The comparison is against the XML Infoset of each tree;
390/// i.e. do the trees contain the same information, but not necessarily the same string representation.
391/// For example, the order of attributes does not matter.
392pub trait Node: Clone + PartialEq + fmt::Debug {
393 type NodeIterator: Iterator<Item = Self>;
394
395 /// Create a Document-type node.
396 /// All other types of nodes are created using type-specific methods (new_element, new_text, etc).
397 fn new_document() -> Self;
398
399 /// Get the type of the node
400 fn node_type(&self) -> NodeType;
401 /// Get the name of the node, if it has one.
402 /// A namespace-type returns the prefix as a [QName] where the prefix is the local-part.
403 /// An unprefixed namespace node returns None.
404 fn name(&self) -> Option<QName>;
405 /// Get the value of the node.
406 /// If the node doesn't have a value, then returns a [Value] that is an empty string.
407 /// If the node is a namespace-type node, gives the namespace URI.
408 fn value(&self) -> Rc<Value>;
409
410 /// Resolve a name using the in-scope namespace declarations in the document,
411 /// resulting in a Qualified Name.
412 /// This will fail if the name is not a QName, or has a prefix that is unknown.
413 fn to_qname(&self, name: impl AsRef<str>) -> Result<QName, Error>;
414 /// Convert the node's qualified name to a prefixed name using the in-scope namespace declarations in the document.
415 fn to_prefixed_name(&self) -> String;
416 /// Find the prefix for the given namespace URI using the node's in-scope namespaces. If the namespace is the default, then None is returned.
417 /// If the namespace URI is not found in the in-scope namespaces returns an error.
418 fn to_namespace_prefix(&self, nsuri: &NamespaceUri) -> Result<Option<NamespacePrefix>, Error>;
419 /// Find the namespace URI for the given namespace prefix using the node's in-scope namespaces.
420 /// If the namespace prefix is not found in the in-scope namespaces returns an error.
421 fn to_namespace_uri(&self, prefix: &Option<NamespacePrefix>) -> Result<NamespaceUri, Error>;
422 /// For a namespace node give the prefix. If the namespace is the default namespace, then None is given.
423 /// If the node is not a namespace-type node then returns an error.
424 fn as_namespace_prefix(&self) -> Result<Option<&NamespacePrefix>, Error>;
425 /// For a namespace node give the namespace URI.
426 /// If the node is not a namespace-type node then returns an error.
427 fn as_namespace_uri(&self) -> Result<&NamespaceUri, Error>;
428 /// Is this namespace in scope, or is it a descoping declaration? See Namespaces in XML v1.1 s6.1.
429 /// This only applies to Namespace-type nodes. All other node types return false.
430 fn is_in_scope(&self) -> bool;
431
432 /// Get a unique identifier for this node.
433 fn get_id(&self) -> String;
434
435 /// Get the string value of the node. See XPath ???
436 fn to_string(&self) -> String;
437 /// Serialise the node as XML
438 fn to_xml(&self) -> String;
439 /// Serialise the node as XML, with options such as indentation.
440 fn to_xml_with_options(&self, od: &OutputDefinition) -> String;
441 /// Serialise the node as JSON
442 fn to_json(&self) -> String {
443 String::new()
444 }
445
446 /// Check if two Nodes are the same Node
447 fn is_same(&self, other: &Self) -> bool;
448
449 // Check if the node is attached to the tree
450 fn is_attached(&self) -> bool;
451
452 /// Get the document order of the node. The value returned is relative to the document containing the node.
453 /// Depending on the implementation, this value may be volatile;
454 /// adding or removing nodes to/from the document may invalidate the ordering.
455 fn document_order(&self) -> Vec<usize>;
456 /// Compare the document order of this node with another node in the same document.
457 fn cmp_document_order(&self, other: &Self) -> Ordering;
458
459 /// Check if a node is an element-type
460 fn is_element(&self) -> bool {
461 self.node_type() == NodeType::Element
462 }
463 /// Check if a node is unattached
464 fn is_unattached(&self) -> bool;
465
466 /// Check if a node is an XML ID
467 fn is_id(&self) -> bool;
468 /// Check if a node is an XML IDREF or IDREFS
469 fn is_idrefs(&self) -> bool;
470
471 /// An iterator over the children of the node
472 fn child_iter(&self) -> Self::NodeIterator;
473 /// Get the first child of the node, if there is one
474 fn first_child(&self) -> Option<Self>
475 where
476 Self: Sized,
477 {
478 self.child_iter().next()
479 }
480 /// An iterator over the ancestors of the node
481 fn ancestor_iter(&self) -> Self::NodeIterator;
482 /// Get the parent of the node. Top-level nodes do not have parents, also nodes that have been detached from the tree.
483 fn parent(&self) -> Option<Self>
484 where
485 Self: Sized,
486 {
487 self.ancestor_iter().next()
488 }
489 /// Get the document node
490 fn owner_document(&self) -> Self;
491 /// An iterator over the descendants of the node
492 fn descend_iter(&self) -> Self::NodeIterator;
493 /// An iterator over the following siblings of the node
494 fn next_iter(&self) -> Self::NodeIterator;
495 /// An iterator over the preceding siblings of the node
496 fn prev_iter(&self) -> Self::NodeIterator;
497 /// An iterator over the attributes of an element
498 fn attribute_iter(&self) -> Self::NodeIterator;
499 /// Get an attribute of the node. Returns a copy of the attribute's value. If the node does not have an attribute of the given name, a value containing an empty string is returned.
500 fn get_attribute(&self, a: &QName) -> Rc<Value>;
501 /// Get an attribute of the node. If the node is not an element returns None. Otherwise returns the attribute node. If the node does not have an attribute of the given name, returns None.
502 fn get_attribute_node(&self, a: &QName) -> Option<Self>;
503
504 /// Create a new element-type node in the same document tree. The new node is not attached to the tree.
505 fn new_element(&self, qn: QName) -> Result<Self, Error>;
506 /// Create a new text-type node in the same document tree. The new node is not attached to the tree.
507 fn new_text(&self, v: Rc<Value>) -> Result<Self, Error>;
508 /// Create a new attribute-type node in the same document tree. The new node is not attached to the tree.
509 fn new_attribute(&self, qn: QName, v: Rc<Value>) -> Result<Self, Error>;
510 /// Create a new comment-type node in the same document tree. The new node is not attached to the tree.
511 fn new_comment(&self, v: Rc<Value>) -> Result<Self, Error>;
512 /// Create a new processing-instruction-type node in the same document tree. The new node is not attached to the tree.
513 fn new_processing_instruction(&self, qn: Rc<Value>, v: Rc<Value>) -> Result<Self, Error>;
514 /// Create a namespace node for an XML Namespace declaration.
515 /// A namespace may be descoped (see Namespace in XML v1.1). In this case, the prefix and namespace URI are given for the namespace being descoped, but with the in_scope argument 'false'.
516 fn new_namespace(
517 &self,
518 ns: NamespaceUri,
519 prefix: Option<NamespacePrefix>,
520 in_scope: bool,
521 ) -> Result<Self, Error>;
522
523 /// Append a node to the child list
524 fn push(&mut self, n: Self) -> Result<(), Error>;
525 /// Remove a node from the tree
526 fn pop(&mut self) -> Result<(), Error>;
527 /// Insert a node in the child list before the given node. The node will be detached from it's current position prior to insertion.
528 fn insert_before(&mut self, n: Self) -> Result<(), Error>;
529 /// Set an attribute. self must be an element-type node. att must be an attribute-type node.
530 /// Returns an error if an attribute with the same name is already attached to this element.
531 fn add_attribute(&self, att: Self) -> Result<(), Error>;
532
533 /// Shallow copy the node, i.e. copy only the node, but not it's attributes or content.
534 fn shallow_copy(&self) -> Result<Self, Error>;
535 /// Deep copy the node, i.e. the node itself and it's attributes and descendants. The resulting top-level node is unattached.
536 fn deep_copy(&self) -> Result<Self, Error>;
537 /// Canonical XML representation of the node.
538 fn get_canonical(&self) -> Result<Self, Error>;
539 /// Get the XML Declaration for the document.
540 fn xmldecl(&self) -> XMLDecl;
541 /// Set the XML Declaration for the document.
542 fn set_xmldecl(&mut self, d: XMLDecl) -> Result<(), Error>;
543 /// Add a namespace declaration to this element-type node.
544 /// NOTE: Does NOT assign a namespace to the element. The element's name defines its namespace.
545 fn add_namespace(&self, ns: Self) -> Result<(), Error>;
546 /// Compare two trees. If a non-document node is used, then the descendant subtrees are compared.
547 fn eq(&self, other: &Self) -> bool {
548 match self.node_type() {
549 NodeType::Document => {
550 if other.node_type() == NodeType::Document {
551 self.child_iter()
552 .zip(other.child_iter())
553 .fold(true, |mut acc, (c, d)| {
554 if acc {
555 acc = Node::eq(&c, &d);
556 acc
557 } else {
558 acc
559 }
560 })
561 // TODO: use a method that terminates early on non-equality
562 } else {
563 false
564 }
565 }
566 NodeType::Element => {
567 // names must match,
568 // attributes must match (order doesn't matter),
569 // content must match
570 if other.node_type() == NodeType::Element {
571 if self.name() == other.name() {
572 // Attributes
573 let mut at_names: Vec<QName> =
574 self.attribute_iter().map(|a| a.name().unwrap()).collect();
575 if at_names.len() == other.attribute_iter().count() {
576 at_names.sort();
577 if at_names.iter().fold(true, |mut acc, qn| {
578 if acc {
579 acc = self.get_attribute(qn) == other.get_attribute(qn);
580 acc
581 } else {
582 acc
583 }
584 }) {
585 // Content
586 self.child_iter().zip(other.child_iter()).fold(
587 true,
588 |mut acc, (c, d)| {
589 if acc {
590 acc = Node::eq(&c, &d);
591 acc
592 } else {
593 acc
594 }
595 },
596 )
597 // TODO: use a method that terminates early on non-equality
598 } else {
599 false
600 }
601 } else {
602 false
603 }
604 } else {
605 false
606 }
607 } else {
608 false
609 }
610 }
611 NodeType::Text => {
612 if other.node_type() == NodeType::Text {
613 self.value() == other.value()
614 } else {
615 false
616 }
617 }
618 NodeType::ProcessingInstruction => {
619 if other.node_type() == NodeType::ProcessingInstruction {
620 self.name() == other.name() && self.value() == other.value()
621 } else {
622 false
623 }
624 }
625 _ => self.node_type() == other.node_type(), // Other types of node do not affect the equality
626 }
627 }
628 /// An iterator over the namespace nodes of an element.
629 /// Note: These nodes are calculated at the time the iterator is created.
630 /// It is not guaranteed that the namespace nodes returned
631 /// will specify the current element node as their parent.
632 fn namespace_iter(&self) -> Self::NodeIterator;
633
634 /// Retrieve the internal representation of the DTD, for use in validation functions.
635 fn get_dtd(&self) -> Option<DTD>;
636
637 /// Store an internal representation of the DTD. Does not keep a copy of the original text
638 fn set_dtd(&self, dtd: DTD) -> Result<(), Error>;
639
640 fn validate(&self, schema: Schema) -> Result<(), ValidationError>;
641
642 /// Return a list of nodes that are associated with this document, but are not attached.
643 fn unattached(&self) -> Vec<Self>;
644}