sxd_xpath/
lib.rs

1//! # SXD-XPath
2//!
3//! This is a pure-Rust implementation of XPath, a language for
4//! addressing parts of an XML document. It aims to implement [version
5//! 1.0 of the XPath specification][spec].
6//!
7//! XPath is wonderful for quickly navigating the complicated
8//! hierarchy that is present in many XML documents while having a
9//! concise syntax.
10//!
11//! [spec]: https://www.w3.org/TR/xpath/
12//!
13//! ### Examples
14//!
15//! The quickest way to evaluate an XPath against an XML document is
16//! to use [`evaluate_xpath`][evaluate_xpath].
17//!
18//! ```
19//! extern crate sxd_document;
20//! extern crate sxd_xpath;
21//!
22//! use sxd_document::parser;
23//! use sxd_xpath::{evaluate_xpath, Value};
24//!
25//! fn main() {
26//!     let package = parser::parse("<root>hello</root>").expect("failed to parse XML");
27//!     let document = package.as_document();
28//!
29//!     let value = evaluate_xpath(&document, "/root").expect("XPath evaluation failed");
30//!
31//!     assert_eq!("hello", value.string());
32//! }
33//! ```
34//!
35//! Evaluating an XPath returns a [`Value`][], representing the
36//! primary XPath types.
37//!
38//! For more complex needs, XPath parsing and evaluation can be split
39//! apart. This allows the user to specify namespaces, variables,
40//! extra functions, and which node evaluation should begin with. You
41//! may also compile an XPath once and reuse it multiple times.
42//!
43//! Parsing is handled with the [`Factory`][] and evaluation relies on
44//! the [`Context`][]. Similar functionality to above can be
45//! accomplished:
46//!
47//! ```
48//! extern crate sxd_document;
49//! extern crate sxd_xpath;
50//!
51//! use sxd_document::parser;
52//! use sxd_xpath::{Factory, Context, Value};
53//!
54//! fn main() {
55//!     let package = parser::parse("<root>hello</root>")
56//!         .expect("failed to parse XML");
57//!     let document = package.as_document();
58//!
59//!     let factory = Factory::new();
60//!     let xpath = factory.build("/root").expect("Could not compile XPath");
61//!     let xpath = xpath.expect("No XPath was compiled");
62//!
63//!     let context = Context::new();
64//!
65//!     let value = xpath.evaluate(&context, document.root())
66//!         .expect("XPath evaluation failed");
67//!
68//!     assert_eq!("hello", value.string());
69//! }
70//! ```
71//!
72//! See [`Context`][] for details on how to customize the
73//! evaluation of the XPath.
74//!
75//! [evaluate_xpath]: fn.evaluate_xpath.html
76//! [`Value`]: enum.Value.html
77//! [`Factory`]: struct.Factory.html
78//! [`Context`]: context/struct.Context.html
79//!
80//! ### Programmatically-created XML
81//!
82//! The XPath specification assumes certain properties about the XML
83//! being processed. If you are processing XML that was parsed from
84//! text, this will be true by construction. If you have
85//! programmatically created XML, please note the following cases.
86//!
87//! #### Namespaces
88//!
89//! If you have programmatically created XML with namespaces but not
90//! defined prefixes, some XPath behavior may be confusing:
91//!
92//! 1. The `name` method will not include a prefix, even if the
93//! element or attribute has a namespace.
94//! 2. The `namespace` axis will not include namespaces without
95//! prefixes.
96//!
97//! #### Document order
98//!
99//! If you have programmatically created XML but not attached the
100//! nodes to the document, some XPath behavior may be confusing:
101//!
102//! 1. These nodes have no [*document order*]. If you create a
103//! variable containing these nodes and apply a predicate to them,
104//! these nodes will appear after any nodes that are present in the
105//! document, but the relative order of the nodes is undefined.
106//!
107//! [*document order*]: https://www.w3.org/TR/xpath/#dt-document-order
108
109#[macro_use]
110extern crate peresil;
111extern crate sxd_document;
112#[macro_use]
113extern crate quick_error;
114
115use std::borrow::ToOwned;
116use std::string;
117
118use sxd_document::{PrefixedName, QName};
119use sxd_document::dom::Document;
120
121use parser::Parser;
122use tokenizer::{Tokenizer, TokenDeabbreviator};
123
124pub use context::Context;
125
126#[macro_use]
127pub mod macros;
128pub mod nodeset;
129pub mod context;
130mod axis;
131mod expression;
132pub mod function;
133mod node_test;
134mod parser;
135mod token;
136mod tokenizer;
137
138// These belong in the the document
139
140#[derive(Debug, Clone, PartialEq, Eq, Hash)]
141pub struct OwnedPrefixedName {
142    prefix: Option<String>,
143    local_part: String,
144}
145
146impl<'a> From<&'a str> for OwnedPrefixedName {
147    fn from(local_part: &'a str) -> Self {
148        OwnedPrefixedName {
149            prefix: None,
150            local_part: local_part.into(),
151        }
152    }
153}
154
155impl<'a> From<(&'a str, &'a str)> for OwnedPrefixedName {
156    fn from((prefix, local_part): (&'a str, &'a str)) -> Self {
157        OwnedPrefixedName {
158            prefix: Some(prefix.into()),
159            local_part: local_part.into(),
160        }
161    }
162}
163
164impl<'a> From<PrefixedName<'a>> for OwnedPrefixedName {
165    fn from(name: PrefixedName<'a>) -> Self {
166        OwnedPrefixedName {
167            prefix: name.prefix().map(Into::into),
168            local_part: name.local_part().into(),
169        }
170    }
171}
172
173#[derive(Debug, Clone, PartialEq, Eq, Hash)]
174pub struct OwnedQName {
175    namespace_uri: Option<String>,
176    local_part: String,
177}
178
179impl<'a> From<&'a str> for OwnedQName {
180    fn from(local_part: &'a str) -> Self {
181        OwnedQName {
182            namespace_uri: None,
183            local_part: local_part.into(),
184        }
185    }
186}
187
188impl<'a> From<(&'a str, &'a str)> for OwnedQName {
189    fn from((namespace_uri, local_part): (&'a str, &'a str)) -> Self {
190        OwnedQName {
191            namespace_uri: Some(namespace_uri.into()),
192            local_part: local_part.into(),
193        }
194    }
195}
196
197impl<'a> From<QName<'a>> for OwnedQName {
198    fn from(name: QName<'a>) -> Self {
199        OwnedQName {
200            namespace_uri: name.namespace_uri().map(Into::into),
201            local_part: name.local_part().into(),
202        }
203    }
204}
205
206type LiteralValue = Value<'static>;
207
208/// The primary types of values that an XPath expression accepts
209/// as an argument or returns as a result.
210#[derive(Debug, Clone, PartialEq)]
211pub enum Value<'d> {
212    /// A true or false value
213    Boolean(bool),
214    /// A IEEE-754 double-precision floating point number
215    Number(f64),
216    /// A string
217    String(string::String),
218    /// A collection of unique nodes
219    Nodeset(nodeset::Nodeset<'d>),
220}
221
222fn str_to_num(s: &str) -> f64 {
223    s.trim().parse().unwrap_or(::std::f64::NAN)
224}
225
226impl<'d> Value<'d> {
227    pub fn boolean(&self) -> bool {
228        use Value::*;
229        match *self {
230            Boolean(val) => val,
231            Number(n) => n != 0.0 && ! n.is_nan(),
232            String(ref s) => ! s.is_empty(),
233            Nodeset(ref nodeset) => nodeset.size() > 0,
234        }
235    }
236
237    pub fn into_boolean(self) -> bool {
238        self.boolean()
239    }
240
241    pub fn number(&self) -> f64 {
242        use Value::*;
243        match *self {
244            Boolean(val) => if val { 1.0 } else { 0.0 },
245            Number(val) => val,
246            String(ref s) => str_to_num(s),
247            Nodeset(..) => str_to_num(&self.string()),
248        }
249    }
250
251    pub fn into_number(self) -> f64 {
252        self.number()
253    }
254
255    pub fn string(&self) -> string::String {
256        use Value::*;
257        match *self {
258            Boolean(v) => v.to_string(),
259            Number(n) => {
260                if n.is_infinite() {
261                    if n.signum() < 0.0 {
262                        "-Infinity".to_owned()
263                    } else {
264                        "Infinity".to_owned()
265                    }
266                } else {
267                    n.to_string()
268                }
269            },
270            String(ref val) => val.clone(),
271            Nodeset(ref ns) => match ns.document_order_first() {
272                Some(n) => n.string_value(),
273                None => "".to_owned(),
274            },
275        }
276    }
277
278    pub fn into_string(self) -> string::String {
279        use Value::*;
280        match self {
281            String(val) => val,
282            other => other.string(),
283        }
284    }
285}
286
287macro_rules! from_impl {
288    ($raw:ty, $variant:expr) => {
289        impl<'d> From<$raw> for Value<'d> {
290            fn from(other: $raw) -> Value<'d> {
291                $variant(other)
292            }
293        }
294    }
295}
296
297from_impl!(bool, Value::Boolean);
298from_impl!(f64, Value::Number);
299from_impl!(String, Value::String);
300impl<'a, 'd> From<&'a str> for Value<'d> {
301    fn from(other: &'a str) -> Value<'d> {
302        Value::String(other.into())
303    }
304}
305from_impl!(nodeset::Nodeset<'d>, Value::Nodeset);
306
307macro_rules! partial_eq_impl {
308    ($raw:ty, $variant:pat => $b:expr) => {
309        impl<'d> PartialEq<$raw> for Value<'d> {
310            fn eq(&self, other: &$raw) -> bool {
311                match *self {
312                    $variant => $b == other,
313                    _ => false,
314                }
315            }
316        }
317
318        impl<'d> PartialEq<Value<'d>> for $raw  {
319            fn eq(&self, other: &Value<'d>) -> bool {
320                match *other {
321                    $variant => $b == self,
322                    _ => false,
323                }
324            }
325        }
326    }
327}
328
329partial_eq_impl!(bool, Value::Boolean(ref v) => v);
330partial_eq_impl!(f64, Value::Number(ref v) => v);
331partial_eq_impl!(String, Value::String(ref v) => v);
332partial_eq_impl!(&'d str, Value::String(ref v) => v);
333partial_eq_impl!(nodeset::Nodeset<'d>, Value::Nodeset(ref v) => v);
334
335/// A compiled XPath. Construct via [`Factory`][].
336///
337/// [`Factory`]: struct.Factory.html
338#[derive(Debug)]
339pub struct XPath(Box<expression::Expression + 'static>);
340
341impl XPath {
342    /// Evaluate this expression in the given context.
343    ///
344    /// # Examples
345    ///
346    /// The most common case is to pass in a reference to a [`Context`][]:
347    ///
348    /// ```rust,no-run
349    /// extern crate sxd_document;
350    /// extern crate sxd_xpath;
351    ///
352    /// use sxd_document::dom::Document;
353    /// use sxd_xpath::{XPath, Context};
354    ///
355    /// fn my_evaluate(doc: Document, xpath: XPath) {
356    ///     let mut context = Context::new();
357    ///     let value = xpath.evaluate(&context, doc.root());
358    ///     println!("The result was: {:?}", value);
359    /// }
360    ///
361    /// # fn main() {}
362    /// ```
363    ///
364    /// [`Context`]: context/struct.Context.html
365    pub fn evaluate<'d, N>(&self, context: &Context<'d>, node: N)
366                           -> Result<Value<'d>, ExecutionError>
367        where N: Into<nodeset::Node<'d>>,
368    {
369        let context = context::Evaluation::new(context, node.into());
370        self.0.evaluate(&context).map_err(ExecutionError)
371    }
372}
373
374/// The primary entrypoint to convert an XPath represented as a string
375/// to a structure that can be evaluated.
376pub struct Factory {
377    parser: Parser,
378}
379
380impl Factory {
381    pub fn new() -> Factory {
382        Factory { parser: Parser::new() }
383    }
384
385    /// Compiles the given string into an XPath structure.
386    pub fn build(&self, xpath: &str) -> Result<Option<XPath>, ParserError> {
387        let tokenizer = Tokenizer::new(xpath);
388        let deabbreviator = TokenDeabbreviator::new(tokenizer);
389
390        self.parser.parse(deabbreviator).map(|x| x.map(XPath)).map_err(ParserError)
391    }
392}
393
394impl Default for Factory {
395    fn default() -> Self {
396        Factory::new()
397    }
398}
399
400macro_rules! opaque_error {
401    (
402        $(#[$attr:meta])+
403        $name:ident($inner:ty)
404    ) => {
405        #[derive(Debug, Clone, PartialEq)]
406        $(#[$attr])+
407        pub struct $name($inner);
408
409        impl std::error::Error for $name {
410            fn description(&self) -> &str {
411                self.0.description()
412            }
413
414            fn cause(&self) -> Option<&std::error::Error> {
415                self.0.cause()
416            }
417        }
418
419        impl std::fmt::Display for $name {
420            fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
421                self.0.fmt(f)
422            }
423        }
424
425        impl From<$inner> for $name {
426            fn from(other: $inner) -> $name {
427                $name(other)
428            }
429        }
430    }
431}
432
433opaque_error!(
434    /// Errors that may occur when parsing an XPath
435    ParserError(parser::Error)
436);
437
438opaque_error!(
439    /// Errors that may occur when executing an XPath
440    ExecutionError(expression::Error)
441);
442
443quick_error! {
444    /// The failure modes of executing an XPath.
445    #[derive(Debug, Clone, PartialEq)]
446    pub enum Error {
447        /// The XPath was syntactically invalid
448        Parsing(err: ParserError) {
449            from()
450            cause(err)
451            description("Unable to parse XPath")
452            display("Unable to parse XPath: {}", err)
453        }
454        /// The XPath did not construct an expression
455        NoXPath {
456            description("XPath was empty")
457        }
458        /// The XPath could not be executed
459        Executing(err: ExecutionError) {
460            from()
461            cause(err)
462            description("Unable to execute XPath")
463            display("Unable to execute XPath: {}", err)
464        }
465    }
466}
467
468/// Easily evaluate an XPath expression
469///
470/// The core XPath 1.0 functions will be available, and no variables
471/// or namespaces will be defined. The root of the document is the
472/// context node.
473///
474/// If you will be evaluating multiple XPaths or the same XPath
475/// multiple times, this may not be the most performant solution.
476///
477/// # Examples
478///
479/// ```
480/// extern crate sxd_document;
481/// extern crate sxd_xpath;
482///
483/// use sxd_document::parser;
484/// use sxd_xpath::{evaluate_xpath, Value};
485///
486/// fn main() {
487///     let package = parser::parse("<root><a>1</a><b>2</b></root>").expect("failed to parse the XML");
488///     let document = package.as_document();
489///
490///     assert_eq!(Ok(Value::Number(3.0)), evaluate_xpath(&document, "/*/a + /*/b"));
491/// }
492/// ```
493pub fn evaluate_xpath<'d>(document: &'d Document<'d>, xpath: &str) -> Result<Value<'d>, Error> {
494    let factory = Factory::new();
495    let expression = factory.build(xpath)?;
496    let expression = expression.ok_or(Error::NoXPath)?;
497
498    let context = Context::new();
499
500    expression.evaluate(&context, document.root()).map_err(Into::into)
501}
502
503#[cfg(test)]
504mod test {
505    use std::borrow::ToOwned;
506
507    use sxd_document::{self, dom, Package};
508
509    use super::*;
510
511    #[test]
512    fn number_of_string_is_ieee_754_number() {
513        let v = Value::String("1.5".to_owned());
514        assert_eq!(1.5, v.number());
515    }
516
517    #[test]
518    fn number_of_string_with_negative_is_negative_number() {
519        let v = Value::String("-1.5".to_owned());
520        assert_eq!(-1.5, v.number());
521    }
522
523    #[test]
524    fn number_of_string_with_surrounding_whitespace_is_number_without_whitespace() {
525        let v = Value::String("\r\n1.5 \t".to_owned());
526        assert_eq!(1.5, v.number());
527    }
528
529    #[test]
530    fn number_of_garbage_string_is_nan() {
531        let v = Value::String("I am not an IEEE 754 number".to_owned());
532        assert!(v.number().is_nan());
533    }
534
535    #[test]
536    fn number_of_boolean_true_is_1() {
537        let v = Value::Boolean(true);
538        assert_eq!(1.0, v.number());
539    }
540
541    #[test]
542    fn number_of_boolean_false_is_0() {
543        let v = Value::Boolean(false);
544        assert_eq!(0.0, v.number());
545    }
546
547    #[test]
548    fn number_of_nodeset_is_number_value_of_first_node_in_document_order() {
549        let package = Package::new();
550        let doc = package.as_document();
551
552        let c1 = doc.create_comment("42.42");
553        let c2 = doc.create_comment("1234");
554        doc.root().append_child(c1);
555        doc.root().append_child(c2);
556
557        let v = Value::Nodeset(nodeset![c2, c1]);
558        assert_eq!(42.42, v.number());
559    }
560
561    #[test]
562    fn string_of_true_is_true() {
563        let v = Value::Boolean(true);
564        assert_eq!("true", v.string());
565    }
566
567    #[test]
568    fn string_of_false_is_false() {
569        let v = Value::Boolean(false);
570        assert_eq!("false", v.string());
571    }
572
573    #[test]
574    fn string_of_nan_is_nan() {
575        let v = Value::Number(::std::f64::NAN);
576        assert_eq!("NaN", v.string());
577    }
578
579    #[test]
580    fn string_of_positive_zero_is_zero() {
581        let v = Value::Number(0.0);
582        assert_eq!("0", v.string());
583    }
584
585    #[test]
586    fn string_of_negative_zero_is_zero() {
587        let v = Value::Number(-0.0);
588        assert_eq!("0", v.string());
589    }
590
591    #[test]
592    fn string_of_positive_infinity_is_infinity() {
593        let v = Value::Number(::std::f64::INFINITY);
594        assert_eq!("Infinity", v.string());
595    }
596
597    #[test]
598    fn string_of_negative_infinity_is_minus_infinity() {
599        let v = Value::Number(::std::f64::NEG_INFINITY);
600        assert_eq!("-Infinity", v.string());
601    }
602
603    #[test]
604    fn string_of_integer_has_no_decimal() {
605        let v = Value::Number(-42.0);
606        assert_eq!("-42", v.string());
607    }
608
609    #[test]
610    fn string_of_decimal_has_fractional_part() {
611        let v = Value::Number(1.2);
612        assert_eq!("1.2", v.string());
613    }
614
615    #[test]
616    fn string_of_nodeset_is_string_value_of_first_node_in_document_order() {
617        let package = Package::new();
618        let doc = package.as_document();
619
620        let c1 = doc.create_comment("comment 1");
621        let c2 = doc.create_comment("comment 2");
622        doc.root().append_child(c1);
623        doc.root().append_child(c2);
624
625        let v = Value::Nodeset(nodeset![c2, c1]);
626        assert_eq!("comment 1", v.string());
627    }
628
629    fn with_document<F>(xml: &str, f: F)
630        where F: FnOnce(dom::Document),
631    {
632        let package = sxd_document::parser::parse(xml).expect("Unable to parse test XML");
633        f(package.as_document());
634    }
635
636    #[test]
637    fn xpath_evaluation_success() {
638        with_document("<root><child>content</child></root>", |doc| {
639            let result = evaluate_xpath(&doc, "/root/child");
640
641            assert_eq!(Ok("content".to_owned()), result.map(|v| v.string()));
642        });
643    }
644
645    #[test]
646    fn xpath_evaluation_parsing_error() {
647        with_document("<root><child>content</child></root>", |doc| {
648            use Error::*;
649            use parser::Error::*;
650
651            let result = evaluate_xpath(&doc, "/root/child/");
652
653            assert_eq!(Err(Parsing(ParserError(TrailingSlash))), result);
654        });
655    }
656
657    #[test]
658    fn xpath_evaluation_execution_error() {
659        with_document("<root><child>content</child></root>", |doc| {
660            use Error::*;
661            use expression::Error::*;
662
663            let result = evaluate_xpath(&doc, "$foo");
664
665            assert_eq!(Err(Executing(ExecutionError(UnknownVariable("foo".into())))), result);
666        });
667    }
668
669    #[test]
670    fn xpath_evaluation_no_xpath_error() {
671        with_document("<root><child>content</child></root>", |doc| {
672            let result = evaluate_xpath(&doc, "");
673
674            assert_eq!(Err(Error::NoXPath), result);
675        });
676    }
677}