sxd_xpath_visitor/
lib.rs

1//! # SXD-XPath
2//!
3//! This is a pure-Rust implementation of XPath, a language for
4//! addressing parts of an XML document. It aims to implement [version
5//! 1.0 of the XPath specification][spec].
6//!
7//! XPath is wonderful for quickly navigating the complicated
8//! hierarchy that is present in many XML documents while having a
9//! concise syntax.
10//!
11//! [spec]: https://www.w3.org/TR/xpath/
12//!
13//! ### Examples
14//!
15//! The quickest way to evaluate an XPath against an XML document is
16//! to use [`evaluate_xpath`][evaluate_xpath].
17//!
18//! ```
19//! use sxd_document::parser;
20//! use sxd_xpath::{evaluate_xpath, Value};
21//!
22//! fn main() {
23//!     let package = parser::parse("<root>hello</root>").expect("failed to parse XML");
24//!     let document = package.as_document();
25//!
26//!     let value = evaluate_xpath(&document, "/root").expect("XPath evaluation failed");
27//!
28//!     assert_eq!("hello", value.string());
29//! }
30//! ```
31//!
32//! Evaluating an XPath returns a [`Value`][], representing the
33//! primary XPath types.
34//!
35//! For more complex needs, XPath parsing and evaluation can be split
36//! apart. This allows the user to specify namespaces, variables,
37//! extra functions, and which node evaluation should begin with. You
38//! may also compile an XPath once and reuse it multiple times.
39//!
40//! Parsing is handled with the [`Factory`][] and evaluation relies on
41//! the [`Context`][]. Similar functionality to above can be
42//! accomplished:
43//!
44//! ```
45//! use sxd_document::parser;
46//! use sxd_xpath::{Factory, Context, Value};
47//!
48//! fn main() {
49//!     let package = parser::parse("<root>hello</root>")
50//!         .expect("failed to parse XML");
51//!     let document = package.as_document();
52//!
53//!     let factory = Factory::new();
54//!     let xpath = factory.build("/root").expect("Could not compile XPath");
55//!
56//!     let context = Context::new();
57//!
58//!     let value = xpath.evaluate(&context, document.root())
59//!         .expect("XPath evaluation failed");
60//!
61//!     assert_eq!("hello", value.string());
62//! }
63//! ```
64//!
65//! See [`Context`][] for details on how to customize the
66//! evaluation of the XPath.
67//!
68//! [evaluate_xpath]: fn.evaluate_xpath.html
69//! [`Value`]: enum.Value.html
70//! [`Factory`]: struct.Factory.html
71//! [`Context`]: context/struct.Context.html
72//!
73//! ### Programmatically-created XML
74//!
75//! The XPath specification assumes certain properties about the XML
76//! being processed. If you are processing XML that was parsed from
77//! text, this will be true by construction. If you have
78//! programmatically created XML, please note the following cases.
79//!
80//! #### Namespaces
81//!
82//! If you have programmatically created XML with namespaces but not
83//! defined prefixes, some XPath behavior may be confusing:
84//!
85//! 1. The `name` method will not include a prefix, even if the
86//! element or attribute has a namespace.
87//! 2. The `namespace` axis will not include namespaces without
88//! prefixes.
89//!
90//! #### Document order
91//!
92//! If you have programmatically created XML but not attached the
93//! nodes to the document, some XPath behavior may be confusing:
94//!
95//! 1. These nodes have no [*document order*]. If you create a
96//! variable containing these nodes and apply a predicate to them,
97//! these nodes will appear after any nodes that are present in the
98//! document, but the relative order of the nodes is undefined.
99//!
100//! [*document order*]: https://www.w3.org/TR/xpath/#dt-document-order
101
102use snafu::{ResultExt, Snafu};
103use std::borrow::ToOwned;
104use std::string;
105use sxd_document::dom::Document;
106use sxd_document::{PrefixedName, QName};
107
108use crate::parser::Parser;
109use crate::tokenizer::{TokenDeabbreviator, Tokenizer};
110
111pub use crate::context::Context;
112
113#[macro_use]
114pub mod macros;
115pub mod axis;
116pub mod context;
117pub mod expression;
118pub mod function;
119pub mod node_test;
120pub mod nodeset;
121pub mod parser;
122mod token;
123pub mod tokenizer;
124
125// These belong in the the document
126
127#[derive(Debug, Clone, PartialEq, Eq, Hash)]
128pub struct OwnedPrefixedName {
129    prefix: Option<String>,
130    local_part: String,
131}
132
133impl<'a> From<&'a str> for OwnedPrefixedName {
134    fn from(local_part: &'a str) -> Self {
135        OwnedPrefixedName {
136            prefix: None,
137            local_part: local_part.into(),
138        }
139    }
140}
141
142impl<'a> From<(&'a str, &'a str)> for OwnedPrefixedName {
143    fn from((prefix, local_part): (&'a str, &'a str)) -> Self {
144        OwnedPrefixedName {
145            prefix: Some(prefix.into()),
146            local_part: local_part.into(),
147        }
148    }
149}
150
151impl<'a> From<PrefixedName<'a>> for OwnedPrefixedName {
152    fn from(name: PrefixedName<'a>) -> Self {
153        OwnedPrefixedName {
154            prefix: name.prefix().map(Into::into),
155            local_part: name.local_part().into(),
156        }
157    }
158}
159
160impl<'a> From<&'a OwnedPrefixedName> for OwnedPrefixedName {
161    fn from(name: &'a OwnedPrefixedName) -> Self {
162        OwnedPrefixedName {
163            prefix: name.prefix.to_owned(),
164            local_part: name.local_part.to_owned(),
165        }
166    }
167}
168
169#[derive(Debug, Clone, PartialEq, Eq, Hash)]
170pub struct OwnedQName {
171    namespace_uri: Option<String>,
172    local_part: String,
173}
174
175impl<'a> From<&'a str> for OwnedQName {
176    fn from(local_part: &'a str) -> Self {
177        OwnedQName {
178            namespace_uri: None,
179            local_part: local_part.into(),
180        }
181    }
182}
183
184impl<'a> From<(&'a str, &'a str)> for OwnedQName {
185    fn from((namespace_uri, local_part): (&'a str, &'a str)) -> Self {
186        OwnedQName {
187            namespace_uri: Some(namespace_uri.into()),
188            local_part: local_part.into(),
189        }
190    }
191}
192
193impl<'a> From<QName<'a>> for OwnedQName {
194    fn from(name: QName<'a>) -> Self {
195        OwnedQName {
196            namespace_uri: name.namespace_uri().map(Into::into),
197            local_part: name.local_part().into(),
198        }
199    }
200}
201
202pub type LiteralValue = Value<'static>;
203
204/// The primary types of values that an XPath expression accepts
205/// as an argument or returns as a result.
206#[derive(Debug, Clone, PartialEq)]
207pub enum Value<'d> {
208    /// A true or false value
209    Boolean(bool),
210    /// A IEEE-754 double-precision floating point number
211    Number(f64),
212    /// A string
213    String(string::String),
214    /// A collection of unique nodes
215    Nodeset(nodeset::Nodeset<'d>),
216}
217
218fn str_to_num(s: &str) -> f64 {
219    s.trim().parse().unwrap_or(::std::f64::NAN)
220}
221
222impl<'d> Value<'d> {
223    pub fn boolean(&self) -> bool {
224        use crate::Value::*;
225        match *self {
226            Boolean(val) => val,
227            Number(n) => n != 0.0 && !n.is_nan(),
228            String(ref s) => !s.is_empty(),
229            Nodeset(ref nodeset) => nodeset.size() > 0,
230        }
231    }
232
233    pub fn into_boolean(self) -> bool {
234        self.boolean()
235    }
236
237    pub fn number(&self) -> f64 {
238        use crate::Value::*;
239        match *self {
240            Boolean(val) => {
241                if val {
242                    1.0
243                } else {
244                    0.0
245                }
246            }
247            Number(val) => val,
248            String(ref s) => str_to_num(s),
249            Nodeset(..) => str_to_num(&self.string()),
250        }
251    }
252
253    pub fn into_number(self) -> f64 {
254        self.number()
255    }
256
257    pub fn string(&self) -> string::String {
258        use crate::Value::*;
259        match *self {
260            Boolean(v) => v.to_string(),
261            Number(n) => {
262                if n.is_infinite() {
263                    if n.signum() < 0.0 {
264                        "-Infinity".to_owned()
265                    } else {
266                        "Infinity".to_owned()
267                    }
268                } else {
269                    n.to_string()
270                }
271            }
272            String(ref val) => val.clone(),
273            Nodeset(ref ns) => match ns.document_order_first() {
274                Some(n) => n.string_value(),
275                None => "".to_owned(),
276            },
277        }
278    }
279
280    pub fn into_string(self) -> string::String {
281        use crate::Value::*;
282        match self {
283            String(val) => val,
284            other => other.string(),
285        }
286    }
287}
288
289macro_rules! from_impl {
290    ($raw:ty, $variant:expr) => {
291        impl<'d> From<$raw> for Value<'d> {
292            fn from(other: $raw) -> Value<'d> {
293                $variant(other)
294            }
295        }
296    };
297}
298
299from_impl!(bool, Value::Boolean);
300from_impl!(f64, Value::Number);
301from_impl!(String, Value::String);
302impl<'a, 'd> From<&'a str> for Value<'d> {
303    fn from(other: &'a str) -> Value<'d> {
304        Value::String(other.into())
305    }
306}
307from_impl!(nodeset::Nodeset<'d>, Value::Nodeset);
308
309macro_rules! partial_eq_impl {
310    ($raw:ty, $variant:pat => $b:expr) => {
311        impl<'d> PartialEq<$raw> for Value<'d> {
312            fn eq(&self, other: &$raw) -> bool {
313                match *self {
314                    $variant => $b == other,
315                    _ => false,
316                }
317            }
318        }
319
320        impl<'d> PartialEq<Value<'d>> for $raw {
321            fn eq(&self, other: &Value<'d>) -> bool {
322                match *other {
323                    $variant => $b == self,
324                    _ => false,
325                }
326            }
327        }
328    };
329}
330
331partial_eq_impl!(bool, Value::Boolean(ref v) => v);
332partial_eq_impl!(f64, Value::Number(ref v) => v);
333partial_eq_impl!(String, Value::String(ref v) => v);
334partial_eq_impl!(&'d str, Value::String(ref v) => v);
335partial_eq_impl!(nodeset::Nodeset<'d>, Value::Nodeset(ref v) => v);
336
337/// A compiled XPath. Construct via [`Factory`][].
338///
339/// [`Factory`]: struct.Factory.html
340#[derive(Debug)]
341pub struct XPath(Box<dyn expression::Expression + 'static>);
342
343impl XPath {
344    /// Evaluate this expression in the given context.
345    ///
346    /// # Examples
347    ///
348    /// The most common case is to pass in a reference to a [`Context`][]:
349    ///
350    /// ```rust,no-run
351    /// use sxd_document::dom::Document;
352    /// use sxd_xpath::{XPath, Context};
353    ///
354    /// fn my_evaluate(doc: Document, xpath: XPath) {
355    ///     let mut context = Context::new();
356    ///     let value = xpath.evaluate(&context, doc.root());
357    ///     println!("The result was: {:?}", value);
358    /// }
359    ///
360    /// # fn main() {}
361    /// ```
362    ///
363    /// [`Context`]: context/struct.Context.html
364    pub fn evaluate<'d, N>(
365        &self,
366        context: &Context<'d>,
367        node: N,
368    ) -> Result<Value<'d>, ExecutionError>
369    where
370        N: Into<nodeset::Node<'d>>,
371    {
372        let context = context::Evaluation::new(context, node.into());
373        self.0.evaluate(&context).map_err(ExecutionError)
374    }
375}
376
377/// The primary entrypoint to convert an XPath represented as a string
378/// to a structure that can be evaluated.
379pub struct Factory {
380    parser: Parser,
381}
382
383impl Factory {
384    pub fn new() -> Factory {
385        Factory {
386            parser: Parser::new(),
387        }
388    }
389
390    /// Compiles the given string into an XPath structure.
391    pub fn build(&self, xpath: &str) -> Result<XPath, ParserError> {
392        let tokenizer = Tokenizer::new(xpath);
393        let deabbreviator = TokenDeabbreviator::new(tokenizer);
394
395        self.parser
396            .parse(deabbreviator)
397            .map(XPath)
398            .map_err(Into::into)
399    }
400}
401
402impl Default for Factory {
403    fn default() -> Self {
404        Factory::new()
405    }
406}
407
408/// Errors that may occur when parsing an XPath
409#[derive(Debug, Snafu, Clone, PartialEq)]
410pub struct ParserError(parser::Error);
411
412/// Errors that may occur when executing an XPath
413#[derive(Debug, Snafu, Clone, PartialEq)]
414pub struct ExecutionError(expression::Error);
415
416/// The failure modes of executing an XPath.
417#[derive(Debug, Snafu, Clone, PartialEq)]
418pub enum Error {
419    /// The XPath was syntactically invalid
420    #[snafu(display("Unable to parse XPath: {}", source))]
421    Parsing { source: ParserError },
422    /// The XPath could not be executed
423    #[snafu(display("Unable to execute XPath: {}", source))]
424    Executing { source: ExecutionError },
425}
426
427/// Easily evaluate an XPath expression
428///
429/// The core XPath 1.0 functions will be available, and no variables
430/// or namespaces will be defined. The root of the document is the
431/// context node.
432///
433/// If you will be evaluating multiple XPaths or the same XPath
434/// multiple times, this may not be the most performant solution.
435///
436/// # Examples
437///
438/// ```
439/// use sxd_document::parser;
440/// use sxd_xpath::{evaluate_xpath, Value};
441///
442/// fn main() {
443///     let package = parser::parse("<root><a>1</a><b>2</b></root>").expect("failed to parse the XML");
444///     let document = package.as_document();
445///
446///     assert_eq!(Ok(Value::Number(3.0)), evaluate_xpath(&document, "/*/a + /*/b"));
447/// }
448/// ```
449pub fn evaluate_xpath<'d>(document: &'d Document<'d>, xpath: &str) -> Result<Value<'d>, Error> {
450    let factory = Factory::new();
451    let expression = factory.build(xpath).context(Parsing)?;
452
453    let context = Context::new();
454
455    expression
456        .evaluate(&context, document.root())
457        .context(Executing)
458}
459
460#[cfg(test)]
461mod test {
462    use std::borrow::ToOwned;
463
464    use sxd_document::{self, dom, Package};
465
466    use super::*;
467
468    #[test]
469    fn number_of_string_is_ieee_754_number() {
470        let v = Value::String("1.5".to_owned());
471        assert_eq!(1.5, v.number());
472    }
473
474    #[test]
475    fn number_of_string_with_negative_is_negative_number() {
476        let v = Value::String("-1.5".to_owned());
477        assert_eq!(-1.5, v.number());
478    }
479
480    #[test]
481    fn number_of_string_with_surrounding_whitespace_is_number_without_whitespace() {
482        let v = Value::String("\r\n1.5 \t".to_owned());
483        assert_eq!(1.5, v.number());
484    }
485
486    #[test]
487    fn number_of_garbage_string_is_nan() {
488        let v = Value::String("I am not an IEEE 754 number".to_owned());
489        assert!(v.number().is_nan());
490    }
491
492    #[test]
493    fn number_of_boolean_true_is_1() {
494        let v = Value::Boolean(true);
495        assert_eq!(1.0, v.number());
496    }
497
498    #[test]
499    fn number_of_boolean_false_is_0() {
500        let v = Value::Boolean(false);
501        assert_eq!(0.0, v.number());
502    }
503
504    #[test]
505    fn number_of_nodeset_is_number_value_of_first_node_in_document_order() {
506        let package = Package::new();
507        let doc = package.as_document();
508
509        let c1 = doc.create_comment("42.42");
510        let c2 = doc.create_comment("1234");
511        doc.root().append_child(c1);
512        doc.root().append_child(c2);
513
514        let v = Value::Nodeset(nodeset![c2, c1]);
515        assert_eq!(42.42, v.number());
516    }
517
518    #[test]
519    fn string_of_true_is_true() {
520        let v = Value::Boolean(true);
521        assert_eq!("true", v.string());
522    }
523
524    #[test]
525    fn string_of_false_is_false() {
526        let v = Value::Boolean(false);
527        assert_eq!("false", v.string());
528    }
529
530    #[test]
531    fn string_of_nan_is_nan() {
532        let v = Value::Number(::std::f64::NAN);
533        assert_eq!("NaN", v.string());
534    }
535
536    #[test]
537    fn string_of_positive_zero_is_zero() {
538        let v = Value::Number(0.0);
539        assert_eq!("0", v.string());
540    }
541
542    #[test]
543    fn string_of_negative_zero_is_zero() {
544        let v = Value::Number(-0.0);
545        assert_eq!("0", v.string());
546    }
547
548    #[test]
549    fn string_of_positive_infinity_is_infinity() {
550        let v = Value::Number(::std::f64::INFINITY);
551        assert_eq!("Infinity", v.string());
552    }
553
554    #[test]
555    fn string_of_negative_infinity_is_minus_infinity() {
556        let v = Value::Number(::std::f64::NEG_INFINITY);
557        assert_eq!("-Infinity", v.string());
558    }
559
560    #[test]
561    fn string_of_integer_has_no_decimal() {
562        let v = Value::Number(-42.0);
563        assert_eq!("-42", v.string());
564    }
565
566    #[test]
567    fn string_of_decimal_has_fractional_part() {
568        let v = Value::Number(1.2);
569        assert_eq!("1.2", v.string());
570    }
571
572    #[test]
573    fn string_of_nodeset_is_string_value_of_first_node_in_document_order() {
574        let package = Package::new();
575        let doc = package.as_document();
576
577        let c1 = doc.create_comment("comment 1");
578        let c2 = doc.create_comment("comment 2");
579        doc.root().append_child(c1);
580        doc.root().append_child(c2);
581
582        let v = Value::Nodeset(nodeset![c2, c1]);
583        assert_eq!("comment 1", v.string());
584    }
585
586    fn with_document<F>(xml: &str, f: F)
587    where
588        F: FnOnce(dom::Document<'_>),
589    {
590        let package = sxd_document::parser::parse(xml).expect("Unable to parse test XML");
591        f(package.as_document());
592    }
593
594    #[test]
595    fn xpath_evaluation_success() {
596        with_document("<root><child>content</child></root>", |doc| {
597            let result = evaluate_xpath(&doc, "/root/child");
598
599            assert_eq!(Ok("content".to_owned()), result.map(|v| v.string()));
600        });
601    }
602
603    #[test]
604    fn xpath_evaluation_parsing_error() {
605        with_document("<root><child>content</child></root>", |doc| {
606            let result = evaluate_xpath(&doc, "/root/child/");
607
608            let expected_error = crate::parser::TrailingSlash
609                .fail()
610                .map_err(ParserError::from)
611                .context(Parsing);
612            assert_eq!(expected_error, result);
613        });
614    }
615
616    #[test]
617    fn xpath_evaluation_execution_error() {
618        with_document("<root><child>content</child></root>", |doc| {
619            let result = evaluate_xpath(&doc, "$foo");
620
621            let expected_error = crate::expression::UnknownVariable { name: "foo" }
622                .fail()
623                .map_err(ExecutionError::from)
624                .context(Executing);
625            assert_eq!(expected_error, result);
626        });
627    }
628}